module Text.OPML.Conduit.Parse
(
parseOpml
, parseOpmlHead
, parseOpmlOutline
, OpmlException(..)
) where
import Conduit hiding (throwM)
import Control.Applicative hiding (many)
import Control.Exception.Safe as Exception
import Control.Monad
import Control.Monad.Fix
import Data.CaseInsensitive hiding (map)
import Data.List.NonEmpty (NonEmpty, nonEmpty)
import Data.Maybe
import Data.Monoid
import Data.Monoid.Textual hiding (map)
import Data.MonoTraversable
import Data.NonNull (NonNull, fromNullable)
import Data.Text (Text, strip, unpack)
import Data.Text.Encoding
import Data.Time.Clock
import Data.Time.LocalTime
import Data.Time.RFC822
import Data.Tree
import Data.Version
import Data.XML.Types
import Lens.Simple
import Numeric
import Prelude hiding (last)
import Text.OPML.Types
import Text.ParserCombinators.ReadP (readP_to_S)
import Text.XML.Stream.Parse
import URI.ByteString
data OpmlException = MissingText
| InvalidBool Text
| InvalidDecimal Text
| InvalidTime Text
| InvalidURI URIParseError
| InvalidVersion Text
deriving instance Eq OpmlException
deriving instance Show OpmlException
instance Exception OpmlException where
displayException MissingText = "An outline is missing the 'text' attribute."
displayException (InvalidBool t) = "Invalid boolean: " ++ unpack t
displayException (InvalidDecimal t) = "Invalid decimal: " ++ unpack t
displayException (InvalidURI e) = "Invalid URI: " ++ show e
displayException (InvalidTime t) = "Invalid time: " ++ unpack t
displayException (InvalidVersion t) = "Invalid version: " ++ unpack t
asURI :: (MonadThrow m) => Text -> m URI
asURI t = either (throwM . InvalidURI) return . parseURI laxURIParserOptions $ encodeUtf8 t
asVersion :: (MonadThrow m) => Text -> m Version
asVersion v = case filter (onull . snd) . readP_to_S parseVersion $ unpack v of
[(a, "")] -> return a
_ -> throwM $ InvalidVersion v
asDecimal :: (MonadThrow m, Integral a) => Text -> m a
asDecimal t = case filter (onull . snd) . readSigned readDec $ unpack t of
(result, _):_ -> return result
_ -> throwM $ InvalidDecimal t
asExpansionState :: (MonadThrow m, Integral a) => Text -> m [a]
asExpansionState t = mapM asDecimal . filter (not . onull) . map strip $ split (== ',') t
asTime :: (MonadThrow m) => Text -> m UTCTime
asTime t = maybe (throwM $ InvalidTime t) (return . zonedTimeToUTC) $ parseTimeRFC822 t
asBool :: (MonadThrow m) => Text -> m Bool
asBool t
| mk t == "true" = return True
| mk t == "false" = return False
| otherwise = throwM $ InvalidBool t
asNonNull :: (MonoFoldable mono, MonadThrow m) => mono -> m (NonNull mono)
asNonNull = maybe (throwM MissingText) return . fromNullable
asCategories :: Text -> [NonEmpty (NonNull Text)]
asCategories = mapMaybe (nonEmpty . mapMaybe fromNullable . split (== '/')) . split (== ',')
dateTag :: (MonadThrow m) => NameMatcher a -> ConduitM Event o m (Maybe UTCTime)
dateTag name = tagIgnoreAttrs name $ content >>= asTime
uriTag :: (MonadThrow m) => NameMatcher a -> ConduitM Event o m (Maybe URI)
uriTag name = tagIgnoreAttrs name $ content >>= asURI
expansionStateTag :: (MonadThrow m, Integral a) => ConduitM Event o m (Maybe [a])
expansionStateTag = tagIgnoreAttrs "expansionState" $ content >>= asExpansionState
textTag :: (MonadThrow m) => NameMatcher a -> ConduitM Event o m (Maybe Text)
textTag name = tagIgnoreAttrs name content
decimalTag :: (Integral i, MonadThrow m) => NameMatcher a -> ConduitM Event o m (Maybe i)
decimalTag name = tagIgnoreAttrs name $ content >>= asDecimal
projectC :: Monad m => Fold a a' b b' -> Conduit a m b
projectC prism = fix $ \recurse -> do
item <- await
case (item, item ^? (_Just . prism)) of
(_, Just a) -> yield a >> recurse
(Just _, _) -> recurse
_ -> return ()
data HeadPiece = HeadCreated UTCTime
| HeadModified UTCTime
| HeadDocs URI
| HeadExpansionState [Int]
| HeadOwnerEmail Text
| HeadOwnerId URI
| HeadOwnerName Text
| HeadTitle Text
| HeadVertScrollState Int
| HeadWindowBottom Int
| HeadWindowLeft Int
| HeadWindowRight Int
| HeadWindowTop Int
makeTraversals ''HeadPiece
parseOpmlHead :: (MonadCatch m) => ConduitM Event o m (Maybe OpmlHead)
parseOpmlHead = tagIgnoreAttrs "head" $ (manyYield' (choose piece) <* many ignoreAnyTreeContent) =$= zipConduit where
zipConduit = getZipConduit $ OpmlHead
<$> ZipConduit (projectC _HeadTitle =$= headDefC mempty)
<*> ZipConduit (projectC _HeadCreated =$= headC)
<*> ZipConduit (projectC _HeadModified =$= headC)
<*> ZipConduit (projectC _HeadOwnerName =$= headDefC mempty)
<*> ZipConduit (projectC _HeadOwnerEmail =$= headDefC mempty)
<*> ZipConduit (projectC _HeadOwnerId =$= headC)
<*> ZipConduit (projectC _HeadDocs =$= headC)
<*> ZipConduit (projectC _HeadExpansionState =$= concatC =$= sinkList)
<*> ZipConduit (projectC _HeadVertScrollState =$= headC)
<*> ZipConduit (projectC _HeadWindowBottom =$= headC)
<*> ZipConduit (projectC _HeadWindowLeft =$= headC)
<*> ZipConduit (projectC _HeadWindowRight =$= headC)
<*> ZipConduit (projectC _HeadWindowTop =$= headC)
piece = [ fmap HeadCreated <$> dateTag "dateCreated"
, fmap HeadModified <$> dateTag "dateModified"
, fmap HeadDocs <$> uriTag "docs"
, fmap HeadExpansionState <$> expansionStateTag
, fmap HeadOwnerEmail <$> textTag "ownerEmail"
, fmap HeadOwnerId <$> uriTag "ownerId"
, fmap HeadOwnerName <$> textTag "ownerName"
, fmap HeadTitle <$> textTag "title"
, fmap HeadVertScrollState <$> decimalTag "vertScrollState"
, fmap HeadWindowBottom <$> decimalTag "windowBottom"
, fmap HeadWindowLeft <$> decimalTag "windowLeft"
, fmap HeadWindowRight <$> decimalTag "windowRight"
, fmap HeadWindowTop <$> decimalTag "windowTop"
]
parseOpmlOutline :: (MonadCatch m) => ConduitM Event o m (Maybe (Tree OpmlOutline))
parseOpmlOutline = tag' "outline" attributes handler where
attributes = do
otype <- optional $ requireAttr "type"
case mk <$> otype of
Just "include" -> (,,,) otype <$> baseAttr <*> pure Nothing <*> (Just <$> linkAttr) <* ignoreAttrs
Just "link" -> (,,,) otype <$> baseAttr <*> pure Nothing <*> (Just <$> linkAttr) <* ignoreAttrs
Just "rss" -> (,,,) otype <$> baseAttr <*> (Just <$> subscriptionAttr) <*> pure Nothing <* ignoreAttrs
_ -> (,,,) otype <$> baseAttr <*> pure Nothing <*> pure Nothing <* ignoreAttrs
baseAttr = (,,,,) <$> (requireAttr "text" >>= asNonNull)
<*> optional (requireAttr "isComment" >>= asBool)
<*> optional (requireAttr "isBreakpoint" >>= asBool)
<*> optional (requireAttr "created" >>= asTime)
<*> optional (asCategories <$> requireAttr "category")
linkAttr = requireAttr "url"
subscriptionAttr = (,,,,,) <$> (requireAttr "xmlUrl" >>= asURI)
<*> optional (requireAttr "htmlUrl" >>= asURI)
<*> optional (requireAttr "description")
<*> optional (requireAttr "language")
<*> optional (requireAttr "title")
<*> optional (requireAttr "version")
handler (_, b, Just s, _) = Node <$> (OpmlOutlineSubscription <$> baseHandler b <*> pure (subscriptionHandler s)) <*> pure []
handler (_, b, _, Just l) = Node <$> (OpmlOutlineLink <$> baseHandler b <*> asURI l) <*> pure []
handler (otype, b, _, _) = Node <$> (OpmlOutlineGeneric <$> baseHandler b <*> pure (fromMaybe mempty otype))
<*> (manyYield' parseOpmlOutline =$= sinkList)
baseHandler (txt, comment, breakpoint, created, category) = return $ OutlineBase txt comment breakpoint created (fromMaybe mempty category)
subscriptionHandler (uri, html, desc, lang, title, version) = OutlineSubscription uri html (fromMaybe mempty desc) (fromMaybe mempty lang) (fromMaybe mempty title) (fromMaybe mempty version)
data OpmlDocPiece = DocHead OpmlHead | DocBody [Tree OpmlOutline]
makeTraversals ''OpmlDocPiece
parseOpml :: (MonadCatch m) => ConduitM Event o m (Maybe Opml)
parseOpml = tag' "opml" attributes handler where
attributes = (requireAttr "version" >>= asVersion) <* ignoreAttrs
handler version = (manyYield' (choose piece) <* many ignoreAnyTreeContent) =$= zipConduit version
zipConduit version = getZipConduit $ Opml version
<$> ZipConduit (projectC _DocHead =$= headDefC mkOpmlHead)
<*> ZipConduit (projectC _DocBody =$= headDefC mempty)
parseOpmlBody = tagIgnoreAttrs "body" $ manyYield' parseOpmlOutline =$= sinkList
piece = [ fmap DocHead <$> parseOpmlHead
, fmap DocBody <$> parseOpmlBody
]