module Main where import Shell.Utility.ParseArgument (parseNumber) import Shell.Utility.Exit (exitFailureMsg) import qualified Options.Applicative as OP import qualified Text.HTML.Tagchup.Parser as TagParser import qualified Text.HTML.Tagchup.Tag.Match as TagMatch import qualified Text.HTML.Tagchup.Tag as Tag import qualified Text.XML.Basic.Attribute as Attr import qualified Text.XML.Basic.Name.MixedCase as Name import qualified Text.XML.Basic.Name as NameC import qualified Data.Spreadsheet as Spreadsheet import qualified Data.NonEmpty.Mixed as NonEmptyM import qualified Data.NonEmpty as NonEmpty import qualified Data.List.HT as ListHT import Data.Tuple.HT (mapSnd) import Data.Maybe (mapMaybe) import Data.Monoid ((<>)) import Control.Monad (join, guard) import Control.Applicative (pure, (<*>), (<|>)) import Text.Printf (printf) maybeTableName :: Tag.T Name.T String -> Maybe String maybeTableName tag = do (foundName, attrs) <- Tag.maybeOpen tag guard $ NameC.match "table:table" foundName Attr.lookupLit "table:name" attrs maybeCell :: Tag.T Name.T String -> Maybe Int maybeCell tag = do (foundName, attrs) <- Tag.maybeOpen tag guard $ NameC.match "table:table-cell" foundName case fmap reads $ Attr.lookupLit "table:number-columns-repeated" attrs of Just [(n,_)] -> return n _ -> return 1 {- -} extractTablesNames :: [Tag.T Name.T String] -> [String] extractTablesNames = mapMaybe maybeTableName listTables :: FilePath -> IO () listTables input = mapM_ putStrLn . extractTablesNames . TagParser.runSoup =<< readFile input extractTablesContents :: [Tag.T Name.T String] -> [(String, [[String]])] extractTablesContents = map (mapSnd ( map (concatMap (uncurry $ \n -> (\texts -> replicate n $ case texts of text:_ -> text [] -> "") . mapMaybe Tag.maybeText . dropWhile (not . TagMatch.openNameLit "text:p")) . snd . ListHT.segmentBeforeJust maybeCell . NonEmpty.tail) . snd . NonEmptyM.segmentBefore (TagMatch.openNameLit "table:table-row"))) . snd . ListHT.segmentBeforeJust maybeTableName contentFromTables :: Char -> FilePath -> IO () contentFromTables separator input = mapM_ (\(tableName, content) -> do putStrLn "" putStrLn tableName putStrLn (Spreadsheet.toString '"' separator content)) . extractTablesContents . TagParser.runSoup =<< readFile input contentFromTable :: Char -> Either String Int -> FilePath -> IO () contentFromTable separator tableSelector input = do tables <- extractTablesContents . TagParser.runSoup <$> readFile input putStr . Spreadsheet.toString '"' separator =<< case filter (\(tableId, (tableName, _content)) -> either (tableName==) (tableId==) tableSelector) $ zip [1..] tables of (_,(_,found)):_ -> return found _ -> exitFailureMsg $ "table with " ++ either (printf "number %d") (printf "name %s") tableSelector ++ " not found" parser :: OP.Parser (IO ()) parser = ((OP.flag' listTables $ OP.long "list-tables" <> OP.help "List all tables in an ODS document") <|> (pure contentFromTable <*> (OP.option (OP.eitherReader (\str -> case str of "TAB" -> Right '\t' [c] -> Right c _ -> Left "separator must be one character")) $ OP.long "separator" <> OP.metavar "CHAR" <> OP.value ',' <> OP.help "CSV separator, TAB for tabulator") <*> ( (fmap Left $ OP.strOption $ OP.long "sheetname" <> OP.metavar "NAME" <> OP.help "Select table by name") <|> (fmap Right $ OP.option (OP.eitherReader $ parseNumber "page" (0<) "positive") $ OP.long "sheetnumber" <> OP.metavar "ONEBASED" <> OP.help "Select table by number") ))) <*> OP.strArgument (OP.metavar "INPUT" <> OP.help "Input Document") info :: OP.Parser a -> OP.ParserInfo a info p = OP.info (OP.helper <*> p) (OP.fullDesc <> OP.progDesc "Convert Open Document Spreadsheet ODS to CSV.") main :: IO () main = join $ OP.execParser $ info parser