module Codec.Epub.IO
( extractFileFromZip, opfPath )
where
import Codec.Archive.LibZip
import Control.Arrow.ListArrows ( (>>>), deep )
import Control.Monad.Error
import Text.Regex
import Text.XML.HXT.Arrow.XmlArrow ( getAttrValue, hasName, isElem )
import Text.XML.HXT.Arrow.XmlState ( no, runX, withValidate )
import Text.XML.HXT.Arrow.ReadDocument ( readString )
removeEncoding :: String -> String
removeEncoding = flip (subRegex
(mkRegexWithOpts " +encoding=\"UTF-8\"" False True)) ""
removeDoctype :: String -> String
removeDoctype = flip (subRegex
(mkRegexWithOpts "<!DOCTYPE [^>]*>" False True)) ""
extractFileFromZip :: (MonadIO m, MonadError String m)
=> FilePath
-> FilePath
-> m String
extractFileFromZip zipPath filePath = do
result <- liftIO $ catchZipError
(fmap Right $ withArchive [] zipPath $ fileContents [] filePath)
(return . Left)
output <- either (throwError . show) return result
return . removeEncoding . removeDoctype $ output
opfPath :: (MonadError String m, MonadIO m)
=> FilePath
-> m String
opfPath zipPath = do
containerContents <- extractFileFromZip zipPath
"META-INF/container.xml"
result <- liftIO $ runX (
readString [withValidate no] containerContents
>>> deep (isElem >>> hasName "rootfile")
>>> getAttrValue "full-path"
)
case result of
(p : []) -> return p
_ -> throwError
"ERROR: rootfile full-path missing from META-INF/container.xml"