{-# LANGUAGE OverloadedStrings  #-}
{-# LANGUAGE RecordWildCards    #-}
{-# LANGUAGE TemplateHaskell    #-}
{-# OPTIONS_GHC -Wall #-}
module Codec.Xlsx.Types.SharedStringTable (
    -- * Main types
    SharedStringTable(..)
  , StringItem(..)
  , sstConstruct
  , sstLookupText
  , sstLookupRich
  , sstItem
    -- * Lenses
    -- ** SharedStringTable
  , sharedStringTable
  ) where

import           Control.Lens hiding (element)
import           Control.Monad

import           Data.Maybe (mapMaybe)
import           Data.Text (Text)
import           Data.Vector (Vector)
import           Numeric.Search.Range (searchFromTo)
import           Text.XML
import           Text.XML.Cursor
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector as V

import           Codec.Xlsx.Parser.Internal
import           Codec.Xlsx.Types
import           Codec.Xlsx.Writer.Internal

-- | Shared string table
--
-- A workbook can contain thousands of cells containing string (non-numeric)
-- data. Furthermore this data is very likely to be repeated across many rows or
-- columns. The goal of implementing a single string table that is shared across
-- the workbook is to improve performance in opening and saving the file by only
-- reading and writing the repetitive information once.
--
-- Relevant parts of the EMCA standard (2nd edition, part 1,
-- <http://www.ecma-international.org/publications/standards/Ecma-376.htm>),
-- page numbers refer to the page in the PDF rather than the page number as
-- printed on the page):
--
-- * Section 18.4, "Shared String Table" (p. 1712)
--   in particular subsection 18.4.9, "sst (Shared String Table)" (p. 1726)
--
-- TODO: The @extLst@ child element is currently unsupported.
data SharedStringTable = SharedStringTable {
    _sharedStringTable :: Vector StringItem
  }
  deriving (Show, Eq, Ord)

-- | String Item
--
-- This element is the representation of an individual string in the Shared
-- String table.
--
-- The spec says: "If the string is just a simple string with formatting applied
-- at the cell level, then the String Item (si) should contain a single text
-- element used to express the string. However, if the string in the cell is
-- more complex - i.e., has formatting applied at the character level - then the
-- string item shall consist of multiple rich text runs which collectively are
-- used to express the string.". So we have either a single "Text" field, or
-- else a list of "RichTextRun"s, each of which is some "Text" with layout
-- properties.
--
-- TODO: Currently we do not support @phoneticPr@ (Phonetic Properties, 18.4.3,
-- p. 1723) or @rPh@ (Phonetic Run, 18.4.6, p. 1725).
--
-- Section 18.4.8, "si (String Item)" (p. 1725)
data StringItem =
    StringItemText Text
  | StringItemRich [RichTextRun]
  deriving (Show, Eq, Ord)

{-------------------------------------------------------------------------------
  Lenses
-------------------------------------------------------------------------------}

makeLenses ''SharedStringTable

{-------------------------------------------------------------------------------
  Rendering
-------------------------------------------------------------------------------}

instance ToDocument SharedStringTable where
  toDocument = documentFromElement "Shared string table generated by xlsx"
             . toElement "sst"

-- | See @CT_Sst@, p. 3902.
--
-- TODO: The @count@ and @uniqCount@ attributes are currently unsupported.
instance ToElement SharedStringTable where
  toElement nm SharedStringTable{..} = Element {
      elementName       = nm
    , elementAttributes = Map.empty
    , elementNodes      = map (NodeElement . toElement "si")
                        $ V.toList _sharedStringTable
    }

-- | See @CT_Rst@, p. 3903
instance ToElement StringItem where
  toElement nm si = Element {
      elementName       = nm
    , elementAttributes = Map.empty
    , elementNodes      = map NodeElement $
        case si of
          StringItemText text -> [elementContent "t" text]
          StringItemRich rich -> map (toElement "r") rich
    }

{-------------------------------------------------------------------------------
  Parsing
-------------------------------------------------------------------------------}

-- | See @CT_Sst@, p. 3902
--
-- The optional attributes @count@ and @uniqCount@ are being ignored at least currently
instance FromCursor SharedStringTable where
  fromCursor cur = do
    let
      items = cur $/ element (n"si") >=> fromCursor
    return (SharedStringTable (V.fromList items))

-- | See @CT_Rst@, p. 3903
instance FromCursor StringItem where
  fromCursor cur = do
    let
      ts = cur $/ element (n"t") &/ content
      rs = cur $/ element (n"r") >=> fromCursor
    case (ts,rs) of
      ([t], []) ->
        return $ StringItemText t
      ([], _:_) ->
        return $ StringItemRich rs
      _ ->
        fail "invalid item"

{-------------------------------------------------------------------------------
  Extract shared strings
-------------------------------------------------------------------------------}

-- | Construct the 'SharedStringsTable' from an existing document
sstConstruct :: [Worksheet] -> SharedStringTable
sstConstruct =
    SharedStringTable . V.fromList . uniq . concatMap goSheet
  where
    goSheet :: Worksheet -> [StringItem]
    goSheet = mapMaybe (_cellValue >=> sstEntry) . Map.elems . _wsCells

    sstEntry :: CellValue -> Maybe StringItem
    sstEntry (CellText text) = Just $ StringItemText text
    sstEntry (CellRich rich) = Just $ StringItemRich rich
    sstEntry _               = Nothing

    uniq :: Ord a => [a] -> [a]
    uniq = Set.elems . Set.fromList

sstLookupText :: SharedStringTable -> Text -> Int
sstLookupText sst = sstLookup sst . StringItemText

sstLookupRich :: SharedStringTable -> [RichTextRun] -> Int
sstLookupRich sst = sstLookup sst . StringItemRich

-- | Internal generalization used by 'sstLookupText' and 'sstLookupRich'
sstLookup :: SharedStringTable -> StringItem -> Int
sstLookup SharedStringTable{_sharedStringTable = shared} si =
    case searchFromTo (\p -> shared V.! p >= si) 0 (V.length shared - 1) of
      Just i  -> i
      Nothing -> error $ "SST entry for " ++ show si ++ " not found"

sstItem :: SharedStringTable -> Int -> StringItem
sstItem SharedStringTable{_sharedStringTable = shared} = (V.!) shared