{-# LANGUAGE FlexibleContexts #-}

-- | Functions for performing some IO operations on epub files

module Codec.Epub.IO
   ( getPkgXmlFromZip
   , getPkgPathXmlFromZip
   , getPkgPathXmlFromBS
   , getPkgPathXmlFromDir
   , mkEpubArchive
   , readArchive
   , writeArchive
   )
   where

import Codec.Archive.Zip ( Archive, ZipOption (OptRecursive),
  addFilesToArchive, emptyArchive, findEntryByPath, fromArchive, fromEntry, toArchive )
import Control.Arrow.ListArrows ( (>>>), deep )
import Control.Exception ( SomeException, evaluate, try )
import Control.Monad ( (>=>), forM, liftM )
import Control.Monad.Except ( MonadError, throwError )
import Control.Monad.IO.Class ( MonadIO, liftIO )
import qualified Data.ByteString.Char8 as BS
import Data.ByteString.Lazy ( fromChunks )
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.UTF8 as UTF8
import Data.List ( (\\), isPrefixOf )
import System.Directory ( doesDirectoryExist, getDirectoryContents, setCurrentDirectory )
import System.FilePath ( (</>) )
import Text.XML.HXT.Arrow.ReadDocument ( readString )
import Text.XML.HXT.Arrow.XmlArrow ( getAttrValue, hasName, isElem )
import Text.XML.HXT.Arrow.XmlState ( no, runX, withValidate )

import Codec.Epub.Util


locateRootFile :: (MonadIO m, MonadError String m) =>
   FilePath -> String -> m FilePath
locateRootFile :: forall (m :: * -> *).
(MonadIO m, MonadError String m) =>
String -> String -> m String
locateRootFile String
containerPath' String
containerDoc = do
   [String]
result <- IO [String] -> m [String]
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [String] -> m [String]) -> IO [String] -> m [String]
forall a b. (a -> b) -> a -> b
$ IOSArrow XmlTree String -> IO [String]
forall c. IOSArrow XmlTree c -> IO [c]
runX (
      SysConfigList -> String -> IOStateArrow () XmlTree XmlTree
forall s b. SysConfigList -> String -> IOStateArrow s b XmlTree
readString [Bool -> SysConfig
withValidate Bool
no] String
containerDoc
      IOStateArrow () XmlTree XmlTree
-> IOSArrow XmlTree String -> IOSArrow XmlTree String
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> IOStateArrow () XmlTree XmlTree -> IOStateArrow () XmlTree XmlTree
forall (t :: * -> *) b c.
Tree t =>
IOSLA (XIOState ()) (t b) c -> IOSLA (XIOState ()) (t b) c
forall (a :: * -> * -> *) (t :: * -> *) b c.
(ArrowTree a, Tree t) =>
a (t b) c -> a (t b) c
deep (IOStateArrow () XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
isElem IOStateArrow () XmlTree XmlTree
-> IOStateArrow () XmlTree XmlTree
-> IOStateArrow () XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> String -> IOStateArrow () XmlTree XmlTree
forall (a :: * -> * -> *).
ArrowXml a =>
String -> a XmlTree XmlTree
hasName String
"rootfile")
      IOStateArrow () XmlTree XmlTree
-> IOSArrow XmlTree String -> IOSArrow XmlTree String
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> String -> IOSArrow XmlTree String
forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
"full-path"
      )

   case [String]
result of
      (String
p : []) -> String -> m String
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return String
p
      [String]
_        -> String -> m String
forall a. String -> m a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (String -> m String) -> String -> m String
forall a b. (a -> b) -> a -> b
$
         String
"ERROR: rootfile full-path missing from " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
containerPath'


-- | Extract a file from a zip archive throwing an error on failure
fileFromArchive :: MonadError String m =>
   FilePath -> Archive -> m String
fileFromArchive :: forall (m :: * -> *).
MonadError String m =>
String -> Archive -> m String
fileFromArchive String
filePath Archive
archive = do
   let mbEntry :: Maybe Entry
mbEntry = String -> Archive -> Maybe Entry
findEntryByPath String
filePath Archive
archive
   m String -> (Entry -> m String) -> Maybe Entry -> m String
forall b a. b -> (a -> b) -> Maybe a -> b
maybe
      (String -> m String
forall a. String -> m a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (String -> m String) -> String -> m String
forall a b. (a -> b) -> a -> b
$ String
"Unable to locate file " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
filePath)
      (String -> m String
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> m String) -> (Entry -> String) -> Entry -> m String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
UTF8.toString (ByteString -> String) -> (Entry -> ByteString) -> Entry -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Entry -> ByteString
fromEntry) Maybe Entry
mbEntry


{- | The static location of the container.xml, as specified by the
     epub docs
-}
containerPath :: FilePath
containerPath :: String
containerPath = String
"META-INF/container.xml"


{- | Get the path and contents of the epub Package Document from
   a ByteString representing an epub (zip) file
-}
getPkgPathXmlFromBS :: (MonadError String m, MonadIO m)
   => BS.ByteString           -- ^ contents of the zip file
   -> m (FilePath, String)    -- ^ path (within the epub archive) and contents of the epub Package Document
getPkgPathXmlFromBS :: forall (m :: * -> *).
(MonadError String m, MonadIO m) =>
ByteString -> m (String, String)
getPkgPathXmlFromBS ByteString
strictBytes = do
   -- Need to turn this strict byte string into a lazy one
   let lazyBytes :: ByteString
lazyBytes = [ByteString] -> ByteString
fromChunks [ByteString
strictBytes]
   Either SomeException Archive
result <- IO (Either SomeException Archive)
-> m (Either SomeException Archive)
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Either SomeException Archive)
 -> m (Either SomeException Archive))
-> IO (Either SomeException Archive)
-> m (Either SomeException Archive)
forall a b. (a -> b) -> a -> b
$ ( IO Archive -> IO (Either SomeException Archive)
forall e a. Exception e => IO a -> IO (Either e a)
try (IO Archive -> IO (Either SomeException Archive))
-> IO Archive -> IO (Either SomeException Archive)
forall a b. (a -> b) -> a -> b
$ Archive -> IO Archive
forall a. a -> IO a
evaluate
      (ByteString -> Archive
toArchive ByteString
lazyBytes) :: IO (Either SomeException Archive) )
   Archive
archive <- (SomeException -> m Archive)
-> (Archive -> m Archive)
-> Either SomeException Archive
-> m Archive
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (String -> m Archive
forall a. String -> m a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (String -> m Archive)
-> (SomeException -> String) -> SomeException -> m Archive
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SomeException -> String
forall a. Show a => a -> String
show) Archive -> m Archive
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return Either SomeException Archive
result

   {- We need to first extract the container.xml file
      It's required to have a certain path and name in the epub
      and contains the path to what we really want, the .opf file.
   -}
   String
containerDoc <- String -> Archive -> m String
forall (m :: * -> *).
MonadError String m =>
String -> Archive -> m String
fileFromArchive String
containerPath Archive
archive

   let cleanedContents :: String
cleanedContents = String -> String
removeIllegalStartChars (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
removeEncoding
         (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
removeDoctype (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ String
containerDoc

   String
rootPath <- String -> String -> m String
forall (m :: * -> *).
(MonadIO m, MonadError String m) =>
String -> String -> m String
locateRootFile String
containerPath String
cleanedContents

   -- Now that we have the path to the .opf file, extract it
   String
rootContents <- String -> Archive -> m String
forall (m :: * -> *).
MonadError String m =>
String -> Archive -> m String
fileFromArchive String
rootPath Archive
archive

   (String, String) -> m (String, String)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (String
rootPath, String
rootContents)


{- | Get the path and contents of the epub Package Document from
   an epub (zip) file
-}
getPkgPathXmlFromZip :: (MonadError String m, MonadIO m)
   => FilePath                -- ^ path to epub zip file
   -> m (FilePath, String)    -- ^ path (within the epub archive) and contents of the epub Package Document
getPkgPathXmlFromZip :: forall (m :: * -> *).
(MonadError String m, MonadIO m) =>
String -> m (String, String)
getPkgPathXmlFromZip String
zipPath = do
   {- Strictly read this file into a ByteString, send to 
      getPkgPathXmlFromBS
   -}
   ByteString
zipFileBytes <- IO ByteString -> m ByteString
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO ByteString -> m ByteString) -> IO ByteString -> m ByteString
forall a b. (a -> b) -> a -> b
$ String -> IO ByteString
BS.readFile String
zipPath
   ByteString -> m (String, String)
forall (m :: * -> *).
(MonadError String m, MonadIO m) =>
ByteString -> m (String, String)
getPkgPathXmlFromBS ByteString
zipFileBytes


-- | Get the contents of the epub Package Document from an epub (zip) file
getPkgXmlFromZip :: (MonadError String m, MonadIO m)
   => FilePath  -- ^ path to epub zip file
   -> m String  -- ^ contents of the epub Package Document
getPkgXmlFromZip :: forall (m :: * -> *).
(MonadError String m, MonadIO m) =>
String -> m String
getPkgXmlFromZip String
zipPath = (String, String) -> String
forall a b. (a, b) -> b
snd ((String, String) -> String) -> m (String, String) -> m String
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` String -> m (String, String)
forall (m :: * -> *).
(MonadError String m, MonadIO m) =>
String -> m (String, String)
getPkgPathXmlFromZip String
zipPath


{- | Get the path and contents of the epub Package Document from
   a directory containing the files from an epub file (as in:
   it's been unzipped into a dir)
-}
getPkgPathXmlFromDir :: (MonadError String m, MonadIO m)
   => FilePath                -- ^ directory path
   -> m (FilePath, String)    -- ^ path (within the epub archive) and contents of the epub Package Document
getPkgPathXmlFromDir :: forall (m :: * -> *).
(MonadError String m, MonadIO m) =>
String -> m (String, String)
getPkgPathXmlFromDir String
dir = do
   {- We need to first extract the container.xml file
      It's required to have a certain path and name in the epub
      and contains the path to what we really want, the .opf file.
   -}
   IO () -> m ()
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> m ()) -> IO () -> m ()
forall a b. (a -> b) -> a -> b
$ String -> IO ()
setCurrentDirectory String
dir

   String
containerDoc <- IO String -> m String
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO String -> m String) -> IO String -> m String
forall a b. (a -> b) -> a -> b
$ String -> IO String
readFile String
containerPath

   String
rootPath <- String -> String -> m String
forall (m :: * -> *).
(MonadIO m, MonadError String m) =>
String -> String -> m String
locateRootFile (String
dir String -> String -> String
</> String
containerPath) String
containerDoc

   -- Now that we have the path to the .opf file, load it
   String
rootContents <- IO String -> m String
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO String -> m String) -> IO String -> m String
forall a b. (a -> b) -> a -> b
$ String -> IO String
readFile String
rootPath

   (String, String) -> m (String, String)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (String
rootPath, String
rootContents)


{- Recursively get a list of all files starting with the supplied
   parent directory. Excluding the directories themselves and ANY 
   dotfiles.
-}
getRecursiveContents :: FilePath -> IO [FilePath]
getRecursiveContents :: String -> IO [String]
getRecursiveContents String
parent = do
   [String]
fullContents <- String -> IO [String]
getDirectoryContents String
parent
   let contents :: [String]
contents = (String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (String -> Bool) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
isPrefixOf String
".") [String]
fullContents
   [[String]]
paths <- [String] -> (String -> IO [String]) -> IO [[String]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [String]
contents ((String -> IO [String]) -> IO [[String]])
-> (String -> IO [String]) -> IO [[String]]
forall a b. (a -> b) -> a -> b
$ \String
name -> do
      let path :: String
path = String
parent String -> String -> String
</> String
name
      Bool
isDirectory <- String -> IO Bool
doesDirectoryExist String
path
      if Bool
isDirectory
         then String -> IO [String]
getRecursiveContents String
path
         else [String] -> IO [String]
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return [String
path]
   [String] -> IO [String]
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ([String] -> IO [String]) -> [String] -> IO [String]
forall a b. (a -> b) -> a -> b
$ [[String]] -> [String]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[String]]
paths


{- | Construct a zip Archive containing epub book data from the 
     specified directory
-}
mkEpubArchive :: FilePath -> IO Archive
mkEpubArchive :: String -> IO Archive
mkEpubArchive String
rootDir = do
   String -> IO ()
setCurrentDirectory String
rootDir

   let mimetype :: [String]
mimetype = [String
"mimetype"]
   [String]
allFiles <- String -> IO [String]
getRecursiveContents String
"."
   let restFiles :: [String]
restFiles = [String]
allFiles [String] -> [String] -> [String]
forall a. Eq a => [a] -> [a] -> [a]
\\ [String]
mimetype

   (Archive -> [String] -> IO Archive)
-> [String] -> Archive -> IO Archive
forall a b c. (a -> b -> c) -> b -> a -> c
flip ([ZipOption] -> Archive -> [String] -> IO Archive
addFilesToArchive [ZipOption
OptRecursive]) [String]
restFiles (Archive -> IO Archive)
-> (Archive -> IO Archive) -> Archive -> IO Archive
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=>
      (Archive -> [String] -> IO Archive)
-> [String] -> Archive -> IO Archive
forall a b c. (a -> b -> c) -> b -> a -> c
flip ([ZipOption] -> Archive -> [String] -> IO Archive
addFilesToArchive []) [String
"mimetype"]
      (Archive -> IO Archive) -> Archive -> IO Archive
forall a b. (a -> b) -> a -> b
$ Archive
emptyArchive


-- | Read a zip Archive from disk
readArchive :: FilePath -> IO Archive
readArchive :: String -> IO Archive
readArchive = (ByteString -> Archive) -> IO ByteString -> IO Archive
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> Archive
toArchive (IO ByteString -> IO Archive)
-> (String -> IO ByteString) -> String -> IO Archive
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> IO ByteString
B.readFile


-- | Write a zip Archive to disk using the specified filename
writeArchive :: FilePath -> Archive -> IO ()
writeArchive :: String -> Archive -> IO ()
writeArchive String
zipPath = (String -> ByteString -> IO ()
B.writeFile String
zipPath) (ByteString -> IO ())
-> (Archive -> ByteString) -> Archive -> IO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Archive -> ByteString
fromArchive