module Codec.Epub.IO
( opfContentsFromZip
, opfContentsFromBS
, opfContentsFromDir
, removeEncoding
, removeDoctype
)
where
import Codec.Archive.Zip
import Control.Arrow.ListArrows ( (>>>), deep )
import Control.Exception
import Control.Monad.Error
import qualified Data.ByteString.Char8 as BS
import Data.ByteString.Lazy ( fromChunks )
import qualified Data.ByteString.Lazy.Char8 as BL
import System.Directory
import System.FilePath
import Text.Regex
import Text.XML.HXT.Arrow.XmlArrow ( getAttrValue, hasName, isElem )
import Text.XML.HXT.Arrow.XmlState ( no, runX, withValidate )
import Text.XML.HXT.Arrow.ReadDocument ( readString )
removeEncoding :: String -> String
removeEncoding = flip (subRegex
(mkRegexWithOpts " +encoding=\"UTF-8\"" False True)) ""
removeDoctype :: String -> String
removeDoctype = flip (subRegex
(mkRegexWithOpts "<!DOCTYPE [^>]*>" False True)) ""
locateRootFile :: (MonadIO m, MonadError String m) =>
FilePath -> String -> m FilePath
locateRootFile containerPath containerDoc = do
result <- liftIO $ runX (
readString [withValidate no] containerDoc
>>> deep (isElem >>> hasName "rootfile")
>>> getAttrValue "full-path"
)
case result of
(p : []) -> return p
_ -> throwError $
"ERROR: rootfile full-path missing from " ++ containerPath
fileFromArchive :: MonadError String m =>
FilePath -> Archive -> m String
fileFromArchive filePath archive = do
let mbEntry = findEntryByPath filePath archive
maybe
(throwError $ "Unable to locate file " ++ filePath)
(return . BL.unpack . fromEntry) mbEntry
opfContentsFromBS :: (MonadError String m, MonadIO m)
=> BS.ByteString
-> m (FilePath, String)
opfContentsFromBS strictBytes = do
let lazyBytes = fromChunks [strictBytes]
result <- liftIO $ ( try $ evaluate
(toArchive lazyBytes) :: IO (Either SomeException Archive) )
archive <- either (throwError . show) return result
let containerPath = "META-INF/container.xml"
containerDoc <- fileFromArchive containerPath archive
rootPath <- locateRootFile containerPath containerDoc
rootContents <- fileFromArchive rootPath archive
return (rootPath, rootContents)
opfContentsFromZip :: (MonadError String m, MonadIO m)
=> FilePath
-> m (FilePath, String)
opfContentsFromZip zipPath = do
zipFileBytes <- liftIO $ BS.readFile zipPath
opfContentsFromBS zipFileBytes
opfContentsFromDir :: (MonadError String m, MonadIO m)
=> FilePath
-> m (FilePath, String)
opfContentsFromDir dir = do
liftIO $ setCurrentDirectory dir
let containerPath = "META-INF/container.xml"
containerDoc <- liftIO $ readFile containerPath
rootPath <- locateRootFile (dir </> containerPath) containerDoc
rootContents <- liftIO $ readFile rootPath
return (rootPath, rootContents)