module Codec.Epub.IO
( extractFileFromZip, opfPath )
where
import Control.Arrow.ListArrows ( (>>>), deep )
import Control.Monad.Error
import System.Exit
import System.Process
import Text.Printf
import Text.Regex
import Text.XML.HXT.Arrow.XmlArrow ( getAttrValue, hasName, isElem )
import Text.XML.HXT.Arrow.XmlState ( no, runX, withValidate )
import Text.XML.HXT.Arrow.ReadDocument ( readString )
removeEncoding :: String -> String
removeEncoding = flip (subRegex
(mkRegexWithOpts " +encoding=\"UTF-8\"" False True)) ""
removeDoctype :: String -> String
removeDoctype = flip (subRegex
(mkRegexWithOpts "<!DOCTYPE [^>]*>" False True)) ""
handleEC :: (MonadIO m, MonadError String m)
=> String -> ExitCode -> m ()
handleEC msg (ExitFailure c)
| c > 2 = throwError $ printf "%s status: %s]\n" msg (show c)
| otherwise = return ()
handleEC _ ExitSuccess = return ()
extractFileFromZip :: (MonadIO m, MonadError String m)
=> FilePath
-> FilePath
-> m String
extractFileFromZip zipPath filePath = do
let dearchiver = "unzip"
(ec, output, _) <- liftIO $ readProcessWithExitCode
dearchiver ["-p", zipPath, filePath] ""
handleEC (printf "[ERROR %s zip file: %s path in zip: %s"
dearchiver zipPath filePath) ec
return . removeEncoding . removeDoctype $ output
opfPath :: (MonadError String m, MonadIO m)
=> FilePath
-> m String
opfPath zipPath = do
containerContents <- extractFileFromZip zipPath
"META-INF/container.xml"
result <- liftIO $ runX (
readString [withValidate no] containerContents
>>> deep (isElem >>> hasName "rootfile")
>>> getAttrValue "full-path"
)
case result of
(p : []) -> return p
_ -> throwError
"ERROR: rootfile full-path missing from META-INF/container.xml"