{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards   #-}
{-# LANGUAGE DeriveGeneric #-}
module Codec.Xlsx.Types.Internal.SharedStringTable (
    -- * Main types
    SharedStringTable(..)
  , sstConstruct
  , sstLookupText
  , sstLookupRich
  , sstItem
  , sstEmpty
  ) where

import Control.Monad
import qualified Data.Map as Map
import Data.Maybe (mapMaybe)
import qualified Data.Set as Set
import Data.Text (Text)
import Data.Vector (Vector)
import qualified Data.Vector as V
import GHC.Generics (Generic)
import Numeric.Search.Range (searchFromTo)
import Safe (fromJustNote)
import Text.XML
import Text.XML.Cursor

import Codec.Xlsx.Parser.Internal
import Codec.Xlsx.Types
import Codec.Xlsx.Writer.Internal

-- | Shared string table
--
-- A workbook can contain thousands of cells containing string (non-numeric)
-- data. Furthermore this data is very likely to be repeated across many rows or
-- columns. The goal of implementing a single string table that is shared across
-- the workbook is to improve performance in opening and saving the file by only
-- reading and writing the repetitive information once.
--
-- Relevant parts of the EMCA standard (2nd edition, part 1,
-- <http://www.ecma-international.org/publications/standards/Ecma-376.htm>),
-- page numbers refer to the page in the PDF rather than the page number as
-- printed on the page):
--
-- * Section 18.4, "Shared String Table" (p. 1712)
--   in particular subsection 18.4.9, "sst (Shared String Table)" (p. 1726)
--
-- TODO: The @extLst@ child element is currently unsupported.
newtype SharedStringTable = SharedStringTable {
    SharedStringTable -> Vector XlsxText
sstTable :: Vector XlsxText
  }
  deriving (SharedStringTable -> SharedStringTable -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SharedStringTable -> SharedStringTable -> Bool
$c/= :: SharedStringTable -> SharedStringTable -> Bool
== :: SharedStringTable -> SharedStringTable -> Bool
$c== :: SharedStringTable -> SharedStringTable -> Bool
Eq, Eq SharedStringTable
SharedStringTable -> SharedStringTable -> Bool
SharedStringTable -> SharedStringTable -> Ordering
SharedStringTable -> SharedStringTable -> SharedStringTable
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: SharedStringTable -> SharedStringTable -> SharedStringTable
$cmin :: SharedStringTable -> SharedStringTable -> SharedStringTable
max :: SharedStringTable -> SharedStringTable -> SharedStringTable
$cmax :: SharedStringTable -> SharedStringTable -> SharedStringTable
>= :: SharedStringTable -> SharedStringTable -> Bool
$c>= :: SharedStringTable -> SharedStringTable -> Bool
> :: SharedStringTable -> SharedStringTable -> Bool
$c> :: SharedStringTable -> SharedStringTable -> Bool
<= :: SharedStringTable -> SharedStringTable -> Bool
$c<= :: SharedStringTable -> SharedStringTable -> Bool
< :: SharedStringTable -> SharedStringTable -> Bool
$c< :: SharedStringTable -> SharedStringTable -> Bool
compare :: SharedStringTable -> SharedStringTable -> Ordering
$ccompare :: SharedStringTable -> SharedStringTable -> Ordering
Ord, Int -> SharedStringTable -> ShowS
[SharedStringTable] -> ShowS
SharedStringTable -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [SharedStringTable] -> ShowS
$cshowList :: [SharedStringTable] -> ShowS
show :: SharedStringTable -> [Char]
$cshow :: SharedStringTable -> [Char]
showsPrec :: Int -> SharedStringTable -> ShowS
$cshowsPrec :: Int -> SharedStringTable -> ShowS
Show, forall x. Rep SharedStringTable x -> SharedStringTable
forall x. SharedStringTable -> Rep SharedStringTable x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep SharedStringTable x -> SharedStringTable
$cfrom :: forall x. SharedStringTable -> Rep SharedStringTable x
Generic)

sstEmpty :: SharedStringTable
sstEmpty :: SharedStringTable
sstEmpty = Vector XlsxText -> SharedStringTable
SharedStringTable forall a. Vector a
V.empty

{-------------------------------------------------------------------------------
  Rendering
-------------------------------------------------------------------------------}

instance ToDocument SharedStringTable where
  toDocument :: SharedStringTable -> Document
toDocument = Text -> Element -> Document
documentFromElement Text
"Shared string table generated by xlsx"
             forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. ToElement a => Name -> a -> Element
toElement Name
"sst"

-- | See @CT_Sst@, p. 3902.
--
-- TODO: The @count@ and @uniqCount@ attributes are currently unsupported.
instance ToElement SharedStringTable where
  toElement :: Name -> SharedStringTable -> Element
toElement Name
nm SharedStringTable{Vector XlsxText
sstTable :: Vector XlsxText
sstTable :: SharedStringTable -> Vector XlsxText
..} = Element {
      elementName :: Name
elementName       = Name
nm
    , elementAttributes :: Map Name Text
elementAttributes = forall k a. Map k a
Map.empty
    , elementNodes :: [Node]
elementNodes      = forall a b. (a -> b) -> [a] -> [b]
map (Element -> Node
NodeElement forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. ToElement a => Name -> a -> Element
toElement Name
"si")
                        forall a b. (a -> b) -> a -> b
$ forall a. Vector a -> [a]
V.toList Vector XlsxText
sstTable
    }

{-------------------------------------------------------------------------------
  Parsing
-------------------------------------------------------------------------------}

-- | See @CT_Sst@, p. 3902
--
-- The optional attributes @count@ and @uniqCount@ are being ignored at least currently
instance FromCursor SharedStringTable where
  fromCursor :: Cursor -> [SharedStringTable]
fromCursor Cursor
cur = do
    let
      items :: [XlsxText]
items = Cursor
cur forall node a. Cursor node -> (Cursor node -> [a]) -> [a]
$/ Name -> Axis
element (Text -> Name
n_ Text
"si") forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> forall a. FromCursor a => Cursor -> [a]
fromCursor
    forall (m :: * -> *) a. Monad m => a -> m a
return (Vector XlsxText -> SharedStringTable
SharedStringTable (forall a. [a] -> Vector a
V.fromList [XlsxText]
items))

{-------------------------------------------------------------------------------
  Extract shared strings
-------------------------------------------------------------------------------}

-- | Construct the 'SharedStringsTable' from an existing document
sstConstruct :: [Worksheet] -> SharedStringTable
sstConstruct :: [Worksheet] -> SharedStringTable
sstConstruct =
    Vector XlsxText -> SharedStringTable
SharedStringTable forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> Vector a
V.fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => [a] -> [a]
uniq forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Worksheet -> [XlsxText]
goSheet
  where
    goSheet :: Worksheet -> [XlsxText]
    goSheet :: Worksheet -> [XlsxText]
goSheet = forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (Cell -> Maybe CellValue
_cellValue forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> CellValue -> Maybe XlsxText
sstEntry) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall k a. Map k a -> [a]
Map.elems forall b c a. (b -> c) -> (a -> b) -> a -> c
. Worksheet -> CellMap
_wsCells

    sstEntry :: CellValue -> Maybe XlsxText
    sstEntry :: CellValue -> Maybe XlsxText
sstEntry (CellText Text
text) = forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ Text -> XlsxText
XlsxText Text
text
    sstEntry (CellRich [RichTextRun]
rich) = forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ [RichTextRun] -> XlsxText
XlsxRichText [RichTextRun]
rich
    sstEntry CellValue
_               = forall a. Maybe a
Nothing

    uniq :: Ord a => [a] -> [a]
    uniq :: forall a. Ord a => [a] -> [a]
uniq = forall a. Set a -> [a]
Set.elems forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => [a] -> Set a
Set.fromList

sstLookupText :: SharedStringTable -> Text -> Int
sstLookupText :: SharedStringTable -> Text -> Int
sstLookupText SharedStringTable
sst = SharedStringTable -> XlsxText -> Int
sstLookup SharedStringTable
sst forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> XlsxText
XlsxText

sstLookupRich :: SharedStringTable -> [RichTextRun] -> Int
sstLookupRich :: SharedStringTable -> [RichTextRun] -> Int
sstLookupRich SharedStringTable
sst = SharedStringTable -> XlsxText -> Int
sstLookup SharedStringTable
sst forall b c a. (b -> c) -> (a -> b) -> a -> c
. [RichTextRun] -> XlsxText
XlsxRichText

-- | Internal generalization used by 'sstLookupText' and 'sstLookupRich'
sstLookup :: SharedStringTable -> XlsxText -> Int
sstLookup :: SharedStringTable -> XlsxText -> Int
sstLookup SharedStringTable{sstTable :: SharedStringTable -> Vector XlsxText
sstTable = Vector XlsxText
shared} XlsxText
si =
    forall a. Partial => [Char] -> Maybe a -> a
fromJustNote ([Char]
"SST entry for " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show XlsxText
si forall a. [a] -> [a] -> [a]
++ [Char]
" not found") forall a b. (a -> b) -> a -> b
$
    forall a. Integral a => (a -> Bool) -> a -> a -> Maybe a
searchFromTo (\Int
p -> Vector XlsxText
shared forall a. Vector a -> Int -> a
V.! Int
p forall a. Ord a => a -> a -> Bool
>= XlsxText
si) Int
0 (forall a. Vector a -> Int
V.length Vector XlsxText
shared forall a. Num a => a -> a -> a
- Int
1)

sstItem :: SharedStringTable -> Int -> Maybe XlsxText
sstItem :: SharedStringTable -> Int -> Maybe XlsxText
sstItem (SharedStringTable Vector XlsxText
shared) = forall a. Vector a -> Int -> Maybe a
(V.!?) Vector XlsxText
shared