{-# LANGUAGE Arrows, FlexibleContexts #-}

{- | The main parsing interface, these get* functions are intended
   to be used by consumers of this library

   This module is called Parse because it invokes the XML parsing
   machinery of this library, but consumers of the library do not
   have to interact with HXT, Arrows or XML directly.
-}
module Codec.Epub.Parse
   ( getGuide
   , getManifest
   , getMetadata
   , getPackage
   , getSpine
   )
   where

import Control.Arrow.ListArrows ( IOSLA, (>>>) )
import Control.Monad.Except ( MonadError, throwError )
import Control.Monad.IO.Class ( MonadIO, liftIO )
import Text.XML.HXT.Arrow.Namespace ( propagateNamespaces )
import Text.XML.HXT.Arrow.XmlState ( no, runX, withValidate )
import Text.XML.HXT.Arrow.XmlState.TypeDefs ( XIOState )
import Text.XML.HXT.Arrow.ReadDocument ( readString )
import Text.XML.HXT.DOM.TypeDefs ( XmlTree )

import Codec.Epub.Data.Guide
import Codec.Epub.Data.Manifest
import Codec.Epub.Data.Metadata
import Codec.Epub.Data.Package
import Codec.Epub.Data.Spine
import Codec.Epub.Parse.Guide
import Codec.Epub.Parse.Manifest
import Codec.Epub.Parse.Metadata
import Codec.Epub.Parse.Package
import Codec.Epub.Parse.Refinements
import Codec.Epub.Parse.Spine
import Codec.Epub.Util


{- Extract the epub OPF Package data contained in the supplied 
   XML string
-}
performParse :: (MonadIO m, MonadError String m, Show b) =>
   IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse :: forall (m :: * -> *) b.
(MonadIO m, MonadError String m, Show b) =>
IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse IOSLA (XIOState ()) XmlTree b
parser String
contents = do
   {- Improper encoding and schema declarations have been causing
      havok with this parse, cruelly strip them out. -}
   let cleanedContents :: String
cleanedContents = String -> String
removeIllegalStartChars (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
removeEncoding
         (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
removeDoctype (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ String
contents
   
   [b]
result <- IO [b] -> m [b]
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [b] -> m [b]) -> IO [b] -> m [b]
forall a b. (a -> b) -> a -> b
$ IOSLA (XIOState ()) XmlTree b -> IO [b]
forall c. IOSArrow XmlTree c -> IO [c]
runX (
      SysConfigList -> String -> IOStateArrow () XmlTree XmlTree
forall s b. SysConfigList -> String -> IOStateArrow s b XmlTree
readString [Bool -> SysConfig
withValidate Bool
no] String
cleanedContents
      IOStateArrow () XmlTree XmlTree
-> IOSLA (XIOState ()) XmlTree b -> IOSLA (XIOState ()) XmlTree b
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> IOStateArrow () XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
propagateNamespaces
      IOStateArrow () XmlTree XmlTree
-> IOSLA (XIOState ()) XmlTree b -> IOSLA (XIOState ()) XmlTree b
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> IOSLA (XIOState ()) XmlTree b
parser
      )

   case [b]
result of
      (b
r : []) -> b -> m b
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return b
r
      (b
_ : [b]
unparseable) -> String -> m b
forall a. String -> m a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (String -> m b) -> String -> m b
forall a b. (a -> b) -> a -> b
$
         String
"ERROR: Unable to parse epub metadata\n" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> ([b] -> String
forall a. Show a => a -> String
show [b]
unparseable)
      [] -> String -> m b
forall a. String -> m a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (String -> m b) -> String -> m b
forall a b. (a -> b) -> a -> b
$
         String
"ERROR: Unable to parse epub metadata"


{- | Parse epub guide items from a String representing the epub XML
   Package Document
-}
getGuide :: (MonadIO m, MonadError String m) =>
   String -> m [GuideRef]
getGuide :: forall (m :: * -> *).
(MonadIO m, MonadError String m) =>
String -> m [GuideRef]
getGuide = IOSLA (XIOState ()) XmlTree [GuideRef] -> String -> m [GuideRef]
forall (m :: * -> *) b.
(MonadIO m, MonadError String m, Show b) =>
IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse IOSLA (XIOState ()) XmlTree [GuideRef]
forall (a :: * -> * -> *). ArrowXml a => a XmlTree [GuideRef]
guideP


{- | Parse epub manifest data from a String representing the epub XML
   Package Document
-}
getManifest :: (MonadIO m, MonadError String m) =>
   String -> m Manifest
getManifest :: forall (m :: * -> *).
(MonadIO m, MonadError String m) =>
String -> m Manifest
getManifest = IOSLA (XIOState ()) XmlTree Manifest -> String -> m Manifest
forall (m :: * -> *) b.
(MonadIO m, MonadError String m, Show b) =>
IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse IOSLA (XIOState ()) XmlTree Manifest
forall (a :: * -> * -> *). ArrowXml a => a XmlTree Manifest
manifestP


{- | Parse epub metadata from a String representing the epub XML
   Package Document
-}
{- Parsing the metadata is a two-pass process
   First we need to parse the meta tags only, referred to in this
   code as 'refinements.'
   Second we parse the metadata tags themselves, passing in the
   refinements so their info can be merged during parse
-}
getMetadata :: (MonadIO m, MonadError String m) =>
   String -> m Metadata
getMetadata :: forall (m :: * -> *).
(MonadIO m, MonadError String m) =>
String -> m Metadata
getMetadata String
opfContents = do
   [Refinement]
refinements <- IOSLA (XIOState ()) XmlTree [Refinement]
-> String -> m [Refinement]
forall (m :: * -> *) b.
(MonadIO m, MonadError String m, Show b) =>
IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse IOSLA (XIOState ()) XmlTree [Refinement]
forall (a :: * -> * -> *). ArrowXml a => a XmlTree [Refinement]
refinementsP String
opfContents
   Metadata
rawMd <- IOSLA (XIOState ()) XmlTree Metadata -> String -> m Metadata
forall (m :: * -> *) b.
(MonadIO m, MonadError String m, Show b) =>
IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse ([Refinement] -> IOSLA (XIOState ()) XmlTree Metadata
forall (a :: * -> * -> *).
ArrowXml a =>
[Refinement] -> a XmlTree Metadata
metadataP [Refinement]
refinements) String
opfContents
   Metadata -> m Metadata
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return Metadata
rawMd


{- | Parse epub package info from a String representing the epub XML
   Package Document
-}
getPackage :: (MonadIO m, MonadError String m) =>
   String -> m Package
getPackage :: forall (m :: * -> *).
(MonadIO m, MonadError String m) =>
String -> m Package
getPackage = IOSLA (XIOState ()) XmlTree Package -> String -> m Package
forall (m :: * -> *) b.
(MonadIO m, MonadError String m, Show b) =>
IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse IOSLA (XIOState ()) XmlTree Package
forall (a :: * -> * -> *). ArrowXml a => a XmlTree Package
packageP


{- | Parse epub spine info from a String representing the epub XML
   Package Document
-}
getSpine :: (MonadIO m, MonadError String m) =>
   String -> m Spine
getSpine :: forall (m :: * -> *).
(MonadIO m, MonadError String m) =>
String -> m Spine
getSpine = IOSLA (XIOState ()) XmlTree Spine -> String -> m Spine
forall (m :: * -> *) b.
(MonadIO m, MonadError String m, Show b) =>
IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse IOSLA (XIOState ()) XmlTree Spine
forall (a :: * -> * -> *). ArrowXml a => a XmlTree Spine
spineP