{-# LANGUAGE FlexibleContexts  #-}
{-# LANGUAGE LambdaCase        #-}
{-# LANGUAGE OverloadedStrings #-}

{- | Convenience utility to read Xlsx tabular cells.

The majority of the @toTableRows*@ functions assume that the table
of interest consiste of contiguous rows styled with borders lines
surrounding all cells, with possible text above and below the table
that is not of interest. Like so:

@
Some documentation here....
---------------------------
| Header1 | Header2 | ... |
---------------------------
| Value1  | Value2  | ... |
---------------------------
| Value1  | Value2  | ... |
---------------------------
Maybe some annoying text here, I don't care about.
@

The heauristic used for table row selection in these functions is
that any table rows will have a bottom border line.

If the above heuristic is not valid for your table you can instead
provide your own row selection predicate to the `toTableRowsCustom`
function. For example, the predicate @\\_ _ -> True@ (or @(const
. const) True@) will select all contiguous rows.

-}
module Codec.Xlsx.Util.Tabular
       (
         -- * Types
         Tabular
       , TabularHead
       , TabularRow
         -- * Lenses
         -- ** Tabular
       , tabularHeads
       , tabularRows
         -- ** TabularHead
       , tabularHeadIx
       , tabularHeadLabel
         -- ** TabularRow
       , tabularRowIx
       , tabularRowCells
         -- * Methods
       , def
         -- * Functions
       , toTableRowsFromFile
       , toTableRows
       , toTableRows'
         -- * Custom row predicates
       , toTableRowsCustom
       ) where

import Codec.Xlsx.Util.Tabular.Imports
import qualified Data.ByteString.Lazy as ByteString

type Row =
  (Int, Cols)

type Rows =
  [(Int, Cols)] -- [Row]

type Cols =
  [(Int, Cell)]

type RowValues =
  [(Int, [(Int, Maybe CellValue)])]

-- | A @RowPredicate@ is given the Xlsx "StyleSheet" as well as the
-- row itself (consisting of the row's index and the row's cells) and
-- should return @True@ if the row is part of the table and false
-- otherwise.
type RowPredicate =
  StyleSheet -> Row -> Bool

-- |Read tabular rows from the first sheel of an Xlsx file.
-- The table is assumed to consist of all contiguous rows
-- that have bottom border lines, starting with the header.
toTableRowsFromFile :: Int -- ^ Starting row index (header row)
                    -> String -- ^ File name
                    -> IO (Maybe Tabular)
toTableRowsFromFile offset fname = do
  s <- ByteString.readFile fname
  let xlsx = toXlsx s
      rows = toTableRows' xlsx offset
  pure rows

-- |Decode cells as tabular rows.
-- The table is assumed to consist of all contiguous rows
-- that have bottom border lines, starting with the header.
toTableRows :: Xlsx -- ^ Xlsx Workbook
            -> Text -- ^ Worksheet name to decode
            -> Int -- ^ Starting row index (header row)
            -> Maybe Tabular
toTableRows = toTableRowsCustom borderBottomPredicate

-- |Decode cells from first sheet as tabular rows.
-- The table is assumed to consist of all contiguous rows
-- that have bottom border lines, starting with the header.
toTableRows' :: Xlsx -- ^ Xlsx Workbook
             -> Int -- ^ Starting row index (header row)
             -> Maybe Tabular
toTableRows' xlsx offset =
  toTableRows xlsx firstSheetName offset
  where
    firstSheetName = fst $ head $ xlsx ^. xlSheets
    -- ^ TODO: Is this still true with xlsx-0.3 or are sheets now
    -- in alphabetical order??

-- | Decode cells as tabular rows.
--   The table is assumed to consist of all contiguous rows
--   that fulfill the given predicate, starting with the header.
--
--   The predicate function is given the Xlsx @StyleSheet@ as well
--   as a row (consisting of the row's index and the row's cells)
--   and should return @True@ if the row is part of the table.
toTableRowsCustom :: (StyleSheet -> (Int, [(Int, Cell)]) -> Bool)
                           -- ^ Predicate for row selection
                  -> Xlsx  -- ^ Xlsx Workbook
                  -> Text  -- ^ Worksheet name to decode
                  -> Int   -- ^ Starting row index (header row)
                  -> Maybe Tabular
toTableRowsCustom predicate xlsx sheetName offset = do
  styles <- parseStyleSheet (xlsx ^. xlStyles) ^? _Right
  rows   <- xlsx ^? ixSheet sheetName . wsCells . to toRows
  decodeRows (predicate styles) offset rows

decodeRows p offset rs = if null rs' then Nothing else Just $
  def
  & tabularHeads .~ header'
  & tabularRows .~ rows
  where
    rs' = getCells p offset rs
    header = head rs' ^. _2
    header' =
      header
      & fmap toText
      & join
    toText (i, Just (CellText t)) = [def
                                     & tabularHeadIx .~ i
                                     & tabularHeadLabel .~ t]
    toText _ = []
    cix = fmap (view tabularHeadIx) header'
      & fromList
    rows =
      fmap rowValue (tail rs')
    rowValue rvs =
      def
      & tabularRowIx .~ (rvs ^. _1)
      & tabularRowCells .~ (rvs ^. _2 & fmap f & join)
      where
        f (i, cell) =
          [cell | cix ^. contains i]

-- |Pickup cells that has value from line
getCells :: (Row -> Bool) -- ^ Predicate
         -> Int -- ^ Start line number
         -> Rows -- ^ cell rows
         -> RowValues
getCells p i rs =
  startAt i rs
  & takeContiguous i
  & takeWhile p
  & fmap rvs
  & filter vs
  where
    rvs (i, cs) =
      (i, rowValues cs)
    filter =
      Prelude.filter
    vs (i, cs) =
      any (\(_, v) -> isJust v) cs

startAt :: Int -> Rows -> Rows
startAt i rs =
  dropWhile f rs
  where
    f (x, _) =
      x < i

-- |Take contiguous rows that start from i
takeContiguous :: Int -> Rows -> Rows
takeContiguous i rs =
  [r | (x, r@(y, _)) <- zip [i..] rs, x == y]

-- |Take rows while all valued cell has bottom border line.
-- |  * no bottom border line means out of table.
borderBottomPredicate :: RowPredicate -- StyleSheet -> Row -> Bool
borderBottomPredicate ss = or . rowBordersHas borderBottom ss . snd

rowBordersHas v ss cs =
  x
  where
    x =
      fmap f cs
    f (i, cell) =
      cellHasBorder ss cell v

rowValues cs =
  x
  where
    x =
      fmap f cs
    f (i, cell) =
      (i, cell ^. cellValue)

cellHasBorder ss cell v =
  fromMaybe False mb
  where
    b = cellBorder ss cell
    mb = borderStyleHasLine v <$> b

cellBorder :: StyleSheet -> Cell -> Maybe Border
cellBorder ss cell =
  view cellStyle cell
  >>= pure . xf
  >>= view cellXfBorderId
  >>= pure . bd
  where
    xf n = (ss ^. styleSheetCellXfs) !! n
    bd n = (ss ^. styleSheetBorders) !! n

borderStyleHasLine v b =
  fromMaybe False value
  where
  value =
    view v b
    >>= view borderStyleLine
    >>= pure . (/= LineStyleNone)