{-# LANGUAGE Arrows, FlexibleContexts #-}

{- | The main parsing interface, these get* functions are intended
   to be used by consumers of this library

   This module is called Parse because it invokes the XML parsing
   machinery of this library, but consumers of the library do not
   have to interact with HXT, Arrows or XML directly.
-}
module Codec.Epub.Parse
   ( getGuide
   , getManifest
   , getMetadata
   , getPackage
   , getSpine
   )
   where

import Control.Arrow.ListArrows
import Control.Monad.Except
import Text.XML.HXT.Arrow.Namespace ( propagateNamespaces )
import Text.XML.HXT.Arrow.XmlState ( no, runX, withValidate )
import Text.XML.HXT.Arrow.XmlState.TypeDefs
import Text.XML.HXT.Arrow.ReadDocument ( readString )
import Text.XML.HXT.DOM.TypeDefs

import Codec.Epub.Data.Guide
import Codec.Epub.Data.Manifest
import Codec.Epub.Data.Metadata
import Codec.Epub.Data.Package
import Codec.Epub.Data.Spine
import Codec.Epub.Parse.Guide
import Codec.Epub.Parse.Manifest
import Codec.Epub.Parse.Metadata
import Codec.Epub.Parse.Package
import Codec.Epub.Parse.Refinements
import Codec.Epub.Parse.Spine
import Codec.Epub.Util


{- Extract the epub OPF Package data contained in the supplied 
   XML string
-}
performParse :: (MonadIO m, MonadError String m) =>
   IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse :: forall (m :: * -> *) b.
(MonadIO m, MonadError String m) =>
IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse IOSLA (XIOState ()) XmlTree b
parser String
contents = do
   {- Improper encoding and schema declarations have been causing
      havok with this parse, cruelly strip them out. -}
   let cleanedContents :: String
cleanedContents = String -> String
removeIllegalStartChars forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
removeEncoding
         forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
removeDoctype forall a b. (a -> b) -> a -> b
$ String
contents
   
   [b]
result <- forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall c. IOSArrow XmlTree c -> IO [c]
runX (
      forall s b. SysConfigList -> String -> IOStateArrow s b XmlTree
readString [Bool -> SysConfig
withValidate Bool
no] String
cleanedContents
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
propagateNamespaces
      forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> IOSLA (XIOState ()) XmlTree b
parser
      )

   case [b]
result of
      (b
r : []) -> forall (m :: * -> *) a. Monad m => a -> m a
return b
r
      [b]
_        -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError
         String
"ERROR: FIXME with a better message"


{- | Parse epub guide items from a String representing the epub XML
   Package Document
-}
getGuide :: (MonadIO m, MonadError String m) =>
   String -> m [GuideRef]
getGuide :: forall (m :: * -> *).
(MonadIO m, MonadError String m) =>
String -> m [GuideRef]
getGuide = forall (m :: * -> *) b.
(MonadIO m, MonadError String m) =>
IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse forall (a :: * -> * -> *). ArrowXml a => a XmlTree [GuideRef]
guideP


{- | Parse epub manifest data from a String representing the epub XML
   Package Document
-}
getManifest :: (MonadIO m, MonadError String m) =>
   String -> m Manifest
getManifest :: forall (m :: * -> *).
(MonadIO m, MonadError String m) =>
String -> m Manifest
getManifest = forall (m :: * -> *) b.
(MonadIO m, MonadError String m) =>
IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse forall (a :: * -> * -> *). ArrowXml a => a XmlTree Manifest
manifestP


{- | Parse epub metadata from a String representing the epub XML
   Package Document
-}
{- Parsing the metadata is a two-pass process
   First we need to parse the meta tags only, referred to in this
   code as 'refinements.'
   Second we parse the metadata tags themselves, passing in the
   refinements so their info can be merged during parse
-}
getMetadata :: (MonadIO m, MonadError String m) =>
   String -> m Metadata
getMetadata :: forall (m :: * -> *).
(MonadIO m, MonadError String m) =>
String -> m Metadata
getMetadata String
opfContents = do
   [Refinement]
refinements <- forall (m :: * -> *) b.
(MonadIO m, MonadError String m) =>
IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse forall (a :: * -> * -> *). ArrowXml a => a XmlTree [Refinement]
refinementsP String
opfContents
   Metadata
rawMd <- forall (m :: * -> *) b.
(MonadIO m, MonadError String m) =>
IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse (forall (a :: * -> * -> *).
ArrowXml a =>
[Refinement] -> a XmlTree Metadata
metadataP [Refinement]
refinements) String
opfContents
   forall (m :: * -> *) a. Monad m => a -> m a
return Metadata
rawMd


{- | Parse epub package info from a String representing the epub XML
   Package Document
-}
getPackage :: (MonadIO m, MonadError String m) =>
   String -> m Package
getPackage :: forall (m :: * -> *).
(MonadIO m, MonadError String m) =>
String -> m Package
getPackage = forall (m :: * -> *) b.
(MonadIO m, MonadError String m) =>
IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse forall (a :: * -> * -> *). ArrowXml a => a XmlTree Package
packageP


{- | Parse epub spine info from a String representing the epub XML
   Package Document
-}
getSpine :: (MonadIO m, MonadError String m) =>
   String -> m Spine
getSpine :: forall (m :: * -> *).
(MonadIO m, MonadError String m) =>
String -> m Spine
getSpine = forall (m :: * -> *) b.
(MonadIO m, MonadError String m) =>
IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse forall (a :: * -> * -> *). ArrowXml a => a XmlTree Spine
spineP