module Main where

import Shell.Utility.ParseArgument (parseNumber)
import Shell.Utility.Exit (exitFailureMsg)

import qualified Options.Applicative as OP

import qualified Text.HTML.Tagchup.Parser as TagParser
import qualified Text.HTML.Tagchup.Process as TagProc
import qualified Text.HTML.Tagchup.Tag.Match as TagMatch
import qualified Text.HTML.Tagchup.Tag as Tag
import qualified Text.XML.Basic.Attribute as Attr
import qualified Text.XML.Basic.Name.MixedCase as Name -- Qualified would also be appropriate
import qualified Text.XML.Basic.Name as NameC

import qualified Codec.Archive.Zip as Zip
import qualified Data.ByteString.Lazy.UTF8 as B_UTF8
import qualified Data.ByteString.Lazy.Char8 as BC

import qualified Data.Spreadsheet as Spreadsheet
import qualified Data.NonEmpty.Mixed as NonEmptyM
import qualified Data.NonEmpty as NonEmpty
import qualified Data.List.HT as ListHT
import qualified Data.List as List
import qualified Data.Char as Char
import Data.Function.HT (Id)
import Data.Tuple.HT (mapSnd)
import Data.Maybe (mapMaybe)
import Data.Monoid ((<>))

import Control.Monad (join, guard)
import Control.Applicative (pure, (<*>), (<|>))

import Text.Printf (printf)


readByteContent :: FilePath -> IO BC.ByteString
readByteContent path =
   if List.isSuffixOf ".ods" path
      then do
         archive <- Zip.toArchive <$> BC.readFile path
         case filter (("content.xml"==) . Zip.eRelativePath) $
               Zip.zEntries archive of
            [] -> exitFailureMsg "content.xml missing in ODS file"
            -- FixMe: Zip.fromEntry is a partial function
            contentEntry : _ -> return $ Zip.fromEntry contentEntry
      else BC.readFile path


fixNameType :: Id [Tag.T Name.T String]
fixNameType = id

{-
In principle it would be fine to ignore encoding, that is,
read and write bytestrings and thus transfer encoding from XML to CSV.
However, we need to match table names.
-}
decode :: BC.ByteString -> String
decode bytes =
   case fmap (map Char.toLower) $ TagProc.getXMLEncoding $
         fixNameType $ TagParser.runSoup $ BC.take 10000 bytes of
      Just "utf-8" -> B_UTF8.toString bytes
      _ -> BC.unpack bytes


maybeTableName :: Tag.T Name.T String -> Maybe String
maybeTableName tag = do
   (foundName, attrs) <- Tag.maybeOpen tag
   guard $ NameC.match "table:table" foundName
   Attr.lookupLit "table:name" attrs

maybeCell :: Tag.T Name.T String -> Maybe Int
maybeCell tag = do
   (foundName, attrs) <- Tag.maybeOpen tag
   guard $ NameC.match "table:table-cell" foundName
   case fmap reads $ Attr.lookupLit "table:number-columns-repeated" attrs of
      Just [(n,_)] -> return n
      _ -> return 1

{-
<config:config-item-map-entry config:name="BlaBlaBla">
<table:table table:name="BlaBlaBla" table:style-name="ta1">
-}
extractTablesNames :: [Tag.T Name.T String] -> [String]
extractTablesNames = mapMaybe maybeTableName

listTables :: IO BC.ByteString -> IO ()
listTables readInput =
   mapM_ putStrLn . extractTablesNames . TagParser.runSoup . decode
      =<< readInput


extractTablesContents :: [Tag.T Name.T String] -> [(String, [[String]])]
extractTablesContents =
   map (mapSnd (
         map
            (concatMap
               (uncurry $ \n ->
                  (\texts ->
                     replicate n $
                     case texts of
                        text:_ -> text
                        [] -> "")
                   .
                   mapMaybe Tag.maybeText
                   .
                   dropWhile (not . TagMatch.openNameLit "text:p"))
             .
             snd
             .
             ListHT.segmentBeforeJust maybeCell
             .
             NonEmpty.tail)
         .
         snd
         .
         NonEmptyM.segmentBefore
            (TagMatch.openNameLit "table:table-row"))) .
   snd . ListHT.segmentBeforeJust maybeTableName

contentFromTables :: Char -> IO BC.ByteString -> IO ()
contentFromTables separator readInput =
   mapM_
      (\(tableName, content) -> do
         putStrLn ""
         putStrLn tableName
         putStrLn (Spreadsheet.toString '"' separator content)) .
   extractTablesContents .
   TagParser.runSoup . decode
      =<< readInput

contentFromTable :: Char -> Either String Int -> IO BC.ByteString -> IO ()
contentFromTable separator tableSelector readInput = do
   tables <- extractTablesContents . TagParser.runSoup . decode <$> readInput
   putStr . Spreadsheet.toString '"' separator =<<
      case filter (\(tableId, (tableName, _content)) ->
                     either (tableName==) (tableId==) tableSelector) $
           zip [1..] tables of
         (_,(_,found)):_ -> return found
         _ ->
            exitFailureMsg $
               "table with " ++
               either (printf "name %s") (printf "number %d") tableSelector ++
               " not found"


parser :: OP.Parser (IO ())
parser =
   ((OP.flag' listTables $
         OP.long "list-tables" <>
         OP.help "List all tables in an ODS document")
      <|>
      (pure contentFromTable
         <*>
         (OP.option
            (OP.eitherReader (\str ->
               case str of
                  "TAB" -> Right '\t'
                  [c] -> Right c
                  _ -> Left "separator must be one character")) $
            OP.long "separator" <>
            OP.metavar "CHAR" <>
            OP.value ',' <>
            OP.help "CSV separator, TAB for tabulator")
         <*>
         (
            (fmap Left $
             OP.strOption $
               OP.long "sheetname" <>
               OP.metavar "NAME" <>
               OP.help "Select table by name")
            <|>
            (fmap Right $
             OP.option (OP.eitherReader $ parseNumber "page" (0<) "positive") $
               OP.long "sheetnumber" <>
               OP.metavar "ONEBASED" <>
               OP.help "Select table by number")
         )))
   <*>
      OP.argument
         (OP.maybeReader $ Just . readByteContent)
         (OP.metavar "INPUT" <>
          OP.value BC.getContents <>
          OP.help "Input Document")

info :: OP.Parser a -> OP.ParserInfo a
info p =
   OP.info
      (OP.helper <*> p)
      (OP.fullDesc <>
       OP.progDesc "Convert Open Document Spreadsheet ODS to CSV.")

main :: IO ()
main = join $ OP.execParser $ info parser