module Data.Text.WordCount.FileRead where
import Text.Pandoc
import Filesystem.Path.CurrentOS as F
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import qualified Data.ByteString.Lazy as BSL
import System.FilePath.Glob
eitherError = either (error . show) id
globFile :: [String] -> IO T.Text
globFile files = fmap T.concat . sequence $ fmap processFile files
processFile :: String -> IO T.Text
processFile filepath = T.filter goodChar <$> case (extension . decodeString $ filepath) of
(Just "md") -> T.pack . writePlain def . filterCode . eitherError . readMarkdown def . T.unpack <$> TIO.readFile filepath
(Just "dbk") -> T.pack . writePlain def . filterCode . eitherError . readDocBook def . T.unpack <$> TIO.readFile filepath
(Just "docx") -> T.pack . writePlain def . filterCode . fst . eitherError . readDocx def <$> BSL.readFile filepath
(Just "epub") -> T.pack . writePlain def . filterCode . fst . eitherError . readEPUB def <$> BSL.readFile filepath
(Just "html") -> T.pack . writePlain def . filterCode . eitherError . readHtml def . T.unpack <$> TIO.readFile filepath
(Just "tex") -> T.pack . writePlain def . filterCode . eitherError . readLaTeX def . T.unpack <$> TIO.readFile filepath
(Just "xml") -> T.pack . writePlain def . filterCode . eitherError . readOPML def . T.unpack <$> TIO.readFile filepath
(Just "odt") -> T.pack . writePlain def . filterCode . fst . eitherError . readOdt def <$> BSL.readFile filepath
(Just "rst") -> T.pack . writePlain def . filterCode . eitherError . readRST def . T.unpack <$> TIO.readFile filepath
(Just "textile") -> T.pack . writePlain def . filterCode . eitherError . readTextile def . T.unpack <$> TIO.readFile filepath
_ -> TIO.readFile filepath
goodChar :: Char -> Bool
goodChar = not . flip any (".,?_()![]{}*&$#" :: String) . (==)
filterCode :: Pandoc -> Pandoc
filterCode (Pandoc meta content) = Pandoc meta $ filter rightBlock content
where rightBlock CodeBlock { } = False
rightBlock Table { } = False
rightBlock _ = True