{-# LANGUAGE CPP                        #-}
{-# LANGUAGE ConstraintKinds            #-}
{-# LANGUAGE DeriveAnyClass             #-}
{-# LANGUAGE DeriveGeneric              #-}
{-# LANGUAGE DerivingStrategies         #-}
{-# LANGUAGE FlexibleContexts           #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase                 #-}
{-# LANGUAGE OverloadedStrings          #-}
{-# LANGUAGE PackageImports             #-}
{-# LANGUAGE RankNTypes                 #-}
{-# LANGUAGE RecordWildCards            #-}
{-# LANGUAGE ScopedTypeVariables        #-}
{-# LANGUAGE StrictData                 #-}
{-# LANGUAGE TemplateHaskell            #-}
{-# LANGUAGE TypeApplications           #-}
{-# LANGUAGE UndecidableInstances       #-}

-- |
-- Module      : Codex.Xlsx.Parser.Stream
-- Description : Stream parser for xlsx files
-- Copyright   :
--   (c) Adam, 2021
--   (c) Supercede, 2021
-- License     : MIT
-- Stability   : experimental
-- Portability : POSIX
--
-- Parse @.xlsx@ sheets in constant memory.
--
-- All actions on an xlsx file run inside the 'XlsxM' monad, and must
-- be run with 'runXlsxM'. XlsxM is not a monad transformer, a design
-- inherited from the "zip" package's ZipArchive monad.
--
-- Inside the XlsxM monad, you can stream 'SheetItem's (a row) from a
-- particular sheet, using 'readSheetByIndex', which is callback-based and tied to IO.
--
module Codec.Xlsx.Parser.Stream
  ( XlsxM
  , runXlsxM
  , WorkbookInfo(..)
  , SheetInfo(..)
  , wiSheets
  , getWorkbookInfo
  , CellRow
  , readSheet
  , countRowsInSheet
  , collectItems
  -- ** Index
  , SheetIndex
  , makeIndex
  , makeIndexFromName
  -- ** SheetItem
  , SheetItem(..)
  , si_sheet_index
  , si_row
  -- ** Row
  , Row(..)
  , ri_row_index
  , ri_cell_row
  -- * Errors
  , SheetErrors(..)
  , AddCellErrors(..)
  , CoordinateErrors(..)
  , TypeError(..)
  , WorkbookError(..)
  ) where

import qualified "zip" Codec.Archive.Zip as Zip
import Codec.Xlsx.Types.Cell
import Codec.Xlsx.Types.Common
import Codec.Xlsx.Types.Internal (RefId (..))
import Codec.Xlsx.Types.Internal.Relationships (Relationship (..),
                                                Relationships (..))
import Conduit (PrimMonad, (.|))
import qualified Conduit as C
import qualified Data.Vector as V
#ifdef USE_MICROLENS
import Lens.Micro
import Lens.Micro.GHC ()
import Lens.Micro.Mtl
import Lens.Micro.Platform
import Lens.Micro.TH
#else
import Control.Lens
#endif
import Codec.Xlsx.Parser.Internal
import Control.Monad.Catch
import Control.Monad.Except
import Control.Monad.Reader
import Control.Monad.State.Strict
import Data.Bifunctor
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import Data.Conduit (ConduitT)
import qualified Data.DList as DL
import Data.Foldable
import Data.IORef
import Data.IntMap.Strict (IntMap)
import qualified Data.IntMap.Strict as IntMap
import qualified Data.Map.Strict as M
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as LT
import qualified Data.Text.Lazy.Builder as TB
import qualified Data.Text.Read as Read
import Data.Traversable (for)
import Data.XML.Types
import GHC.Generics
import Control.DeepSeq
import Codec.Xlsx.Parser.Internal.Memoize

import qualified Codec.Xlsx.Parser.Stream.HexpatInternal as HexpatInternal
import Control.Monad.Base
import Control.Monad.Trans.Control
import Text.XML.Expat.Internal.IO as Hexpat
import Text.XML.Expat.SAX as Hexpat

#ifdef USE_MICROLENS
(<>=) :: (MonadState s m, Monoid a) => ASetter' s a -> a -> m ()
l <>= a = modify (l <>~ a)
#else
#endif

type CellRow = IntMap Cell

-- | Sheet item
--
-- The current sheet at a time, every sheet is constructed of these items.
data SheetItem = MkSheetItem
  { SheetItem -> Int
_si_sheet_index :: Int       -- ^ The sheet number
  , SheetItem -> Row
_si_row         :: ~Row
  } deriving stock (forall x. Rep SheetItem x -> SheetItem
forall x. SheetItem -> Rep SheetItem x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep SheetItem x -> SheetItem
$cfrom :: forall x. SheetItem -> Rep SheetItem x
Generic, Int -> SheetItem -> ShowS
[SheetItem] -> ShowS
SheetItem -> FilePath
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [SheetItem] -> ShowS
$cshowList :: [SheetItem] -> ShowS
show :: SheetItem -> FilePath
$cshow :: SheetItem -> FilePath
showsPrec :: Int -> SheetItem -> ShowS
$cshowsPrec :: Int -> SheetItem -> ShowS
Show)
    deriving anyclass SheetItem -> ()
forall a. (a -> ()) -> NFData a
rnf :: SheetItem -> ()
$crnf :: SheetItem -> ()
NFData

data Row = MkRow
  { Row -> RowIndex
_ri_row_index   :: RowIndex  -- ^ Row number
  , Row -> CellRow
_ri_cell_row    :: ~CellRow  -- ^ Row itself
  } deriving stock (forall x. Rep Row x -> Row
forall x. Row -> Rep Row x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Row x -> Row
$cfrom :: forall x. Row -> Rep Row x
Generic, Int -> Row -> ShowS
[Row] -> ShowS
Row -> FilePath
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [Row] -> ShowS
$cshowList :: [Row] -> ShowS
show :: Row -> FilePath
$cshow :: Row -> FilePath
showsPrec :: Int -> Row -> ShowS
$cshowsPrec :: Int -> Row -> ShowS
Show)
    deriving anyclass Row -> ()
forall a. (a -> ()) -> NFData a
rnf :: Row -> ()
$crnf :: Row -> ()
NFData

makeLenses 'MkSheetItem
makeLenses 'MkRow

type SharedStringsMap = V.Vector Text

-- | Type of the excel value
--
-- Note: Some values are untyped and rules of their type resolution are not known.
-- They may be treated simply as strings as well as they may be context-dependent.
-- By far we do not bother with it.
data ExcelValueType
  = TS      -- ^ shared string
  | TStr    -- ^ either an inline string ("inlineStr") or a formula string ("str")
  | TN      -- ^ number
  | TB      -- ^ boolean
  | TE      -- ^ excell error, the sheet can contain error values, for example if =1/0, causes division by zero
  | Untyped -- ^ Not all values have types
  deriving stock (forall x. Rep ExcelValueType x -> ExcelValueType
forall x. ExcelValueType -> Rep ExcelValueType x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep ExcelValueType x -> ExcelValueType
$cfrom :: forall x. ExcelValueType -> Rep ExcelValueType x
Generic, Int -> ExcelValueType -> ShowS
[ExcelValueType] -> ShowS
ExcelValueType -> FilePath
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [ExcelValueType] -> ShowS
$cshowList :: [ExcelValueType] -> ShowS
show :: ExcelValueType -> FilePath
$cshow :: ExcelValueType -> FilePath
showsPrec :: Int -> ExcelValueType -> ShowS
$cshowsPrec :: Int -> ExcelValueType -> ShowS
Show)

-- | State for parsing sheets
data SheetState = MkSheetState
  { SheetState -> CellRow
_ps_row             :: ~CellRow        -- ^ Current row
  , SheetState -> Int
_ps_sheet_index     :: Int             -- ^ Current sheet ID (AKA 'sheetInfoSheetId')
  , SheetState -> RowIndex
_ps_cell_row_index  :: RowIndex        -- ^ Current row number
  , SheetState -> ColumnIndex
_ps_cell_col_index  :: ColumnIndex     -- ^ Current column number
  , SheetState -> Maybe Int
_ps_cell_style      :: Maybe Int
  , SheetState -> Bool
_ps_is_in_val       :: Bool            -- ^ Flag for indexing wheter the parser is in value or not
  , SheetState -> SharedStringsMap
_ps_shared_strings  :: SharedStringsMap -- ^ Shared string map
  , SheetState -> ExcelValueType
_ps_type            :: ExcelValueType  -- ^ The last detected value type

  , SheetState -> Text
_ps_text_buf        :: Text
  -- ^ for hexpat only, which can break up char data into multiple events
  , SheetState -> Bool
_ps_worksheet_ended :: Bool
  -- ^ For hexpat only, which can throw errors right at the end of the sheet
  -- rather than ending gracefully.
  } deriving stock (forall x. Rep SheetState x -> SheetState
forall x. SheetState -> Rep SheetState x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep SheetState x -> SheetState
$cfrom :: forall x. SheetState -> Rep SheetState x
Generic, Int -> SheetState -> ShowS
[SheetState] -> ShowS
SheetState -> FilePath
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [SheetState] -> ShowS
$cshowList :: [SheetState] -> ShowS
show :: SheetState -> FilePath
$cshow :: SheetState -> FilePath
showsPrec :: Int -> SheetState -> ShowS
$cshowsPrec :: Int -> SheetState -> ShowS
Show)
makeLenses 'MkSheetState

-- | State for parsing shared strings
data SharedStringsState = MkSharedStringsState
  { SharedStringsState -> Builder
_ss_string :: TB.Builder -- ^ String we are parsing
  , SharedStringsState -> DList Text
_ss_list   :: DL.DList Text -- ^ list of shared strings
  } deriving stock (forall x. Rep SharedStringsState x -> SharedStringsState
forall x. SharedStringsState -> Rep SharedStringsState x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep SharedStringsState x -> SharedStringsState
$cfrom :: forall x. SharedStringsState -> Rep SharedStringsState x
Generic, Int -> SharedStringsState -> ShowS
[SharedStringsState] -> ShowS
SharedStringsState -> FilePath
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [SharedStringsState] -> ShowS
$cshowList :: [SharedStringsState] -> ShowS
show :: SharedStringsState -> FilePath
$cshow :: SharedStringsState -> FilePath
showsPrec :: Int -> SharedStringsState -> ShowS
$cshowsPrec :: Int -> SharedStringsState -> ShowS
Show)
makeLenses 'MkSharedStringsState

type HasSheetState = MonadState SheetState
type HasSharedStringsState = MonadState SharedStringsState

-- | Represents sheets from the workbook.xml file. E.g.
-- <sheet name="Data" sheetId="1" state="hidden" r:id="rId2" /
data SheetInfo = SheetInfo
  { SheetInfo -> Text
sheetInfoName    :: Text,
    -- | The r:id attribute value.
    SheetInfo -> RefId
sheetInfoRelId   :: RefId,
    -- | The sheetId attribute value
    SheetInfo -> Int
sheetInfoSheetId :: Int
  } deriving (Int -> SheetInfo -> ShowS
[SheetInfo] -> ShowS
SheetInfo -> FilePath
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [SheetInfo] -> ShowS
$cshowList :: [SheetInfo] -> ShowS
show :: SheetInfo -> FilePath
$cshow :: SheetInfo -> FilePath
showsPrec :: Int -> SheetInfo -> ShowS
$cshowsPrec :: Int -> SheetInfo -> ShowS
Show, SheetInfo -> SheetInfo -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SheetInfo -> SheetInfo -> Bool
$c/= :: SheetInfo -> SheetInfo -> Bool
== :: SheetInfo -> SheetInfo -> Bool
$c== :: SheetInfo -> SheetInfo -> Bool
Eq)

-- | Information about the workbook contained in xl/workbook.xml
-- (currently a subset)
data WorkbookInfo = WorkbookInfo
  { WorkbookInfo -> [SheetInfo]
_wiSheets :: [SheetInfo]
  } deriving Int -> WorkbookInfo -> ShowS
[WorkbookInfo] -> ShowS
WorkbookInfo -> FilePath
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [WorkbookInfo] -> ShowS
$cshowList :: [WorkbookInfo] -> ShowS
show :: WorkbookInfo -> FilePath
$cshow :: WorkbookInfo -> FilePath
showsPrec :: Int -> WorkbookInfo -> ShowS
$cshowsPrec :: Int -> WorkbookInfo -> ShowS
Show
makeLenses 'WorkbookInfo

data XlsxMState = MkXlsxMState
  { XlsxMState -> Memoized SharedStringsMap
_xs_shared_strings :: Memoized (V.Vector Text)
  , XlsxMState -> Memoized WorkbookInfo
_xs_workbook_info  :: Memoized WorkbookInfo
  , XlsxMState -> Memoized Relationships
_xs_relationships  :: Memoized Relationships
  }

newtype XlsxM a = XlsxM {forall a. XlsxM a -> ReaderT XlsxMState ZipArchive a
_unXlsxM :: ReaderT XlsxMState Zip.ZipArchive a}
  deriving newtype
    ( forall a b. a -> XlsxM b -> XlsxM a
forall a b. (a -> b) -> XlsxM a -> XlsxM b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: forall a b. a -> XlsxM b -> XlsxM a
$c<$ :: forall a b. a -> XlsxM b -> XlsxM a
fmap :: forall a b. (a -> b) -> XlsxM a -> XlsxM b
$cfmap :: forall a b. (a -> b) -> XlsxM a -> XlsxM b
Functor,
      Functor XlsxM
forall a. a -> XlsxM a
forall a b. XlsxM a -> XlsxM b -> XlsxM a
forall a b. XlsxM a -> XlsxM b -> XlsxM b
forall a b. XlsxM (a -> b) -> XlsxM a -> XlsxM b
forall a b c. (a -> b -> c) -> XlsxM a -> XlsxM b -> XlsxM c
forall (f :: * -> *).
Functor f
-> (forall a. a -> f a)
-> (forall a b. f (a -> b) -> f a -> f b)
-> (forall a b c. (a -> b -> c) -> f a -> f b -> f c)
-> (forall a b. f a -> f b -> f b)
-> (forall a b. f a -> f b -> f a)
-> Applicative f
<* :: forall a b. XlsxM a -> XlsxM b -> XlsxM a
$c<* :: forall a b. XlsxM a -> XlsxM b -> XlsxM a
*> :: forall a b. XlsxM a -> XlsxM b -> XlsxM b
$c*> :: forall a b. XlsxM a -> XlsxM b -> XlsxM b
liftA2 :: forall a b c. (a -> b -> c) -> XlsxM a -> XlsxM b -> XlsxM c
$cliftA2 :: forall a b c. (a -> b -> c) -> XlsxM a -> XlsxM b -> XlsxM c
<*> :: forall a b. XlsxM (a -> b) -> XlsxM a -> XlsxM b
$c<*> :: forall a b. XlsxM (a -> b) -> XlsxM a -> XlsxM b
pure :: forall a. a -> XlsxM a
$cpure :: forall a. a -> XlsxM a
Applicative,
      Applicative XlsxM
forall a. a -> XlsxM a
forall a b. XlsxM a -> XlsxM b -> XlsxM b
forall a b. XlsxM a -> (a -> XlsxM b) -> XlsxM b
forall (m :: * -> *).
Applicative m
-> (forall a b. m a -> (a -> m b) -> m b)
-> (forall a b. m a -> m b -> m b)
-> (forall a. a -> m a)
-> Monad m
return :: forall a. a -> XlsxM a
$creturn :: forall a. a -> XlsxM a
>> :: forall a b. XlsxM a -> XlsxM b -> XlsxM b
$c>> :: forall a b. XlsxM a -> XlsxM b -> XlsxM b
>>= :: forall a b. XlsxM a -> (a -> XlsxM b) -> XlsxM b
$c>>= :: forall a b. XlsxM a -> (a -> XlsxM b) -> XlsxM b
Monad,
      Monad XlsxM
forall a. IO a -> XlsxM a
forall (m :: * -> *).
Monad m -> (forall a. IO a -> m a) -> MonadIO m
liftIO :: forall a. IO a -> XlsxM a
$cliftIO :: forall a. IO a -> XlsxM a
MonadIO,
      MonadThrow XlsxM
forall e a. Exception e => XlsxM a -> (e -> XlsxM a) -> XlsxM a
forall (m :: * -> *).
MonadThrow m
-> (forall e a. Exception e => m a -> (e -> m a) -> m a)
-> MonadCatch m
catch :: forall e a. Exception e => XlsxM a -> (e -> XlsxM a) -> XlsxM a
$ccatch :: forall e a. Exception e => XlsxM a -> (e -> XlsxM a) -> XlsxM a
MonadCatch,
      MonadCatch XlsxM
forall b. ((forall a. XlsxM a -> XlsxM a) -> XlsxM b) -> XlsxM b
forall a b c.
XlsxM a
-> (a -> ExitCase b -> XlsxM c) -> (a -> XlsxM b) -> XlsxM (b, c)
forall (m :: * -> *).
MonadCatch m
-> (forall b. ((forall a. m a -> m a) -> m b) -> m b)
-> (forall b. ((forall a. m a -> m a) -> m b) -> m b)
-> (forall a b c.
    m a -> (a -> ExitCase b -> m c) -> (a -> m b) -> m (b, c))
-> MonadMask m
generalBracket :: forall a b c.
XlsxM a
-> (a -> ExitCase b -> XlsxM c) -> (a -> XlsxM b) -> XlsxM (b, c)
$cgeneralBracket :: forall a b c.
XlsxM a
-> (a -> ExitCase b -> XlsxM c) -> (a -> XlsxM b) -> XlsxM (b, c)
uninterruptibleMask :: forall b. ((forall a. XlsxM a -> XlsxM a) -> XlsxM b) -> XlsxM b
$cuninterruptibleMask :: forall b. ((forall a. XlsxM a -> XlsxM a) -> XlsxM b) -> XlsxM b
mask :: forall b. ((forall a. XlsxM a -> XlsxM a) -> XlsxM b) -> XlsxM b
$cmask :: forall b. ((forall a. XlsxM a -> XlsxM a) -> XlsxM b) -> XlsxM b
MonadMask,
      Monad XlsxM
forall e a. Exception e => e -> XlsxM a
forall (m :: * -> *).
Monad m -> (forall e a. Exception e => e -> m a) -> MonadThrow m
throwM :: forall e a. Exception e => e -> XlsxM a
$cthrowM :: forall e a. Exception e => e -> XlsxM a
MonadThrow,
      MonadReader XlsxMState,
      MonadBase IO,
      MonadBaseControl IO
    )

-- | Initial parsing state
initialSheetState :: SheetState
initialSheetState :: SheetState
initialSheetState = MkSheetState
  { _ps_row :: CellRow
_ps_row             = forall a. Monoid a => a
mempty
  , _ps_sheet_index :: Int
_ps_sheet_index     = Int
0
  , _ps_cell_row_index :: RowIndex
_ps_cell_row_index  = RowIndex
0
  , _ps_cell_col_index :: ColumnIndex
_ps_cell_col_index  = ColumnIndex
0
  , _ps_is_in_val :: Bool
_ps_is_in_val       = Bool
False
  , _ps_shared_strings :: SharedStringsMap
_ps_shared_strings  = forall a. Monoid a => a
mempty
  , _ps_type :: ExcelValueType
_ps_type            = ExcelValueType
Untyped
  , _ps_text_buf :: Text
_ps_text_buf        = forall a. Monoid a => a
mempty
  , _ps_worksheet_ended :: Bool
_ps_worksheet_ended = Bool
False
  , _ps_cell_style :: Maybe Int
_ps_cell_style      = forall a. Maybe a
Nothing
  }

-- | Initial parsing state
initialSharedStrings :: SharedStringsState
initialSharedStrings :: SharedStringsState
initialSharedStrings = MkSharedStringsState
  { _ss_string :: Builder
_ss_string = forall a. Monoid a => a
mempty
  , _ss_list :: DList Text
_ss_list = forall a. Monoid a => a
mempty
  }

-- | Parse shared string entry from xml event and return it once
-- we've reached the end of given element
{-# SCC parseSharedStrings #-}
parseSharedStrings
  :: ( MonadThrow m
     , HasSharedStringsState m
     )
  => HexpatEvent -> m (Maybe Text)
parseSharedStrings :: forall (m :: * -> *).
(MonadThrow m, HasSharedStringsState m) =>
HexpatEvent -> m (Maybe Text)
parseSharedStrings = \case
  StartElement ByteString
"t" [(ByteString, Text)]
_ -> forall a. Maybe a
Nothing forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Lens' SharedStringsState Builder
ss_string forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= forall a. Monoid a => a
mempty)
  EndElement ByteString
"t"     -> forall a. a -> Maybe a
Just forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
LT.toStrict forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> Text
TB.toLazyText forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets SharedStringsState -> Builder
_ss_string
  CharacterData Text
txt  -> forall a. Maybe a
Nothing forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Lens' SharedStringsState Builder
ss_string forall s (m :: * -> *) a.
(MonadState s m, Semigroup a) =>
ASetter' s a -> a -> m ()
<>= Text -> Builder
TB.fromText Text
txt)
  HexpatEvent
_                  -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing

-- | Run a series of actions on an Xlsx file
runXlsxM :: MonadIO m => FilePath -> XlsxM a -> m a
runXlsxM :: forall (m :: * -> *) a. MonadIO m => FilePath -> XlsxM a -> m a
runXlsxM FilePath
xlsxFile (XlsxM ReaderT XlsxMState ZipArchive a
act) = forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ do
  -- TODO: don't run the withArchive multiple times but use liftWith or runInIO instead
  Memoized WorkbookInfo
_xs_workbook_info  <- forall a. IO a -> IO (Memoized a)
memoizeRef (forall (m :: * -> *) a.
MonadIO m =>
FilePath -> ZipArchive a -> m a
Zip.withArchive FilePath
xlsxFile ZipArchive WorkbookInfo
readWorkbookInfo)
  Memoized Relationships
_xs_relationships  <- forall a. IO a -> IO (Memoized a)
memoizeRef (forall (m :: * -> *) a.
MonadIO m =>
FilePath -> ZipArchive a -> m a
Zip.withArchive FilePath
xlsxFile ZipArchive Relationships
readWorkbookRelationships)
  Memoized SharedStringsMap
_xs_shared_strings <- forall a. IO a -> IO (Memoized a)
memoizeRef (forall (m :: * -> *) a.
MonadIO m =>
FilePath -> ZipArchive a -> m a
Zip.withArchive FilePath
xlsxFile ZipArchive SharedStringsMap
parseSharedStringss)
  forall (m :: * -> *) a.
MonadIO m =>
FilePath -> ZipArchive a -> m a
Zip.withArchive FilePath
xlsxFile forall a b. (a -> b) -> a -> b
$ forall r (m :: * -> *) a. ReaderT r m a -> r -> m a
runReaderT ReaderT XlsxMState ZipArchive a
act forall a b. (a -> b) -> a -> b
$ MkXlsxMState{Memoized SharedStringsMap
Memoized Relationships
Memoized WorkbookInfo
_xs_shared_strings :: Memoized SharedStringsMap
_xs_relationships :: Memoized Relationships
_xs_workbook_info :: Memoized WorkbookInfo
_xs_relationships :: Memoized Relationships
_xs_workbook_info :: Memoized WorkbookInfo
_xs_shared_strings :: Memoized SharedStringsMap
..}

liftZip :: Zip.ZipArchive a -> XlsxM a
liftZip :: forall a. ZipArchive a -> XlsxM a
liftZip = forall a. ReaderT XlsxMState ZipArchive a -> XlsxM a
XlsxM forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall r (m :: * -> *) a. (r -> m a) -> ReaderT r m a
ReaderT forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. a -> b -> a
const

parseSharedStringss :: Zip.ZipArchive (V.Vector Text)
parseSharedStringss :: ZipArchive SharedStringsMap
parseSharedStringss = do
      EntrySelector
sharedStrsSel <- forall (m :: * -> *). MonadThrow m => FilePath -> m EntrySelector
Zip.mkEntrySelector FilePath
"xl/sharedStrings.xml"
      Bool
hasSharedStrs <- EntrySelector -> ZipArchive Bool
Zip.doesEntryExist EntrySelector
sharedStrsSel
      if Bool -> Bool
not Bool
hasSharedStrs
        then forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Monoid a => a
mempty
        else do
          let state0 :: SharedStringsState
state0 = SharedStringsState
initialSharedStrings
          ConduitT () ByteString (ResourceT IO) ()
byteSrc <- forall (m :: * -> *).
(PrimMonad m, MonadThrow m, MonadResource m) =>
EntrySelector -> ZipArchive (ConduitT () ByteString m ())
Zip.getEntrySource EntrySelector
sharedStrsSel
          SharedStringsState
st <- forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall state tag text.
(GenericXMLString tag, GenericXMLString text) =>
state
-> ConduitT () ByteString (ResourceT IO) ()
-> ([SAXEvent tag text] -> StateT state IO ())
-> IO state
runExpat SharedStringsState
state0 ConduitT () ByteString (ResourceT IO) ()
byteSrc forall a b. (a -> b) -> a -> b
$ \[HexpatEvent]
evs -> forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [HexpatEvent]
evs forall a b. (a -> b) -> a -> b
$ \HexpatEvent
ev -> do
            Maybe Text
mTxt <- forall (m :: * -> *).
(MonadThrow m, HasSharedStringsState m) =>
HexpatEvent -> m (Maybe Text)
parseSharedStrings HexpatEvent
ev
            forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
t a -> (a -> f b) -> f ()
for_ Maybe Text
mTxt forall a b. (a -> b) -> a -> b
$ \Text
txt ->
              Lens' SharedStringsState (DList Text)
ss_list forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> (a -> b) -> m ()
%= (forall a. DList a -> a -> DList a
`DL.snoc` Text
txt)
          forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ forall a. [a] -> Vector a
V.fromList forall a b. (a -> b) -> a -> b
$ forall a. DList a -> [a]
DL.toList forall a b. (a -> b) -> a -> b
$ SharedStringsState -> DList Text
_ss_list SharedStringsState
st

{-# SCC getOrParseSharedStringss #-}
getOrParseSharedStringss :: XlsxM (V.Vector Text)
getOrParseSharedStringss :: XlsxM SharedStringsMap
getOrParseSharedStringss = forall (m :: * -> *) a. MonadIO m => Memoized a -> m a
runMemoized forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks XlsxMState -> Memoized SharedStringsMap
_xs_shared_strings

readWorkbookInfo :: Zip.ZipArchive WorkbookInfo
readWorkbookInfo :: ZipArchive WorkbookInfo
readWorkbookInfo = do
   EntrySelector
sel <- forall (m :: * -> *). MonadThrow m => FilePath -> m EntrySelector
Zip.mkEntrySelector FilePath
"xl/workbook.xml"
   ConduitT () ByteString (ResourceT IO) ()
src <- forall (m :: * -> *).
(PrimMonad m, MonadThrow m, MonadResource m) =>
EntrySelector -> ZipArchive (ConduitT () ByteString m ())
Zip.getEntrySource EntrySelector
sel
   [SheetInfo]
sheets <- forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall state tag text.
(GenericXMLString tag, GenericXMLString text) =>
state
-> ConduitT () ByteString (ResourceT IO) ()
-> ([SAXEvent tag text] -> StateT state IO ())
-> IO state
runExpat [] ConduitT () ByteString (ResourceT IO) ()
src forall a b. (a -> b) -> a -> b
$ \[HexpatEvent]
evs -> forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [HexpatEvent]
evs forall a b. (a -> b) -> a -> b
$ \case
     StartElement (ByteString
"sheet" :: ByteString) [(ByteString, Text)]
attrs -> do
       Text
nm <- forall (m :: * -> *).
MonadThrow m =>
ByteString -> [(ByteString, Text)] -> m Text
lookupBy ByteString
"name" [(ByteString, Text)]
attrs
       Text
sheetId <- forall (m :: * -> *).
MonadThrow m =>
ByteString -> [(ByteString, Text)] -> m Text
lookupBy ByteString
"sheetId" [(ByteString, Text)]
attrs
       Text
rId <- forall (m :: * -> *).
MonadThrow m =>
ByteString -> [(ByteString, Text)] -> m Text
lookupBy ByteString
"r:id" [(ByteString, Text)]
attrs
       Int
sheetNum <- forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> FilePath -> WorkbookError
ParseDecimalError Text
sheetId) forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ forall a. Integral a => Text -> Either FilePath a
eitherDecimal Text
sheetId
       forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (Text -> RefId -> Int -> SheetInfo
SheetInfo Text
nm (Text -> RefId
RefId Text
rId) Int
sheetNum forall a. a -> [a] -> [a]
:)
     HexpatEvent
_ -> forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
   forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ [SheetInfo] -> WorkbookInfo
WorkbookInfo [SheetInfo]
sheets

lookupBy :: MonadThrow m => ByteString -> [(ByteString, Text)] -> m Text
lookupBy :: forall (m :: * -> *).
MonadThrow m =>
ByteString -> [(ByteString, Text)] -> m Text
lookupBy ByteString
fields [(ByteString, Text)]
attrs = forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM forall a b. (a -> b) -> a -> b
$ [(ByteString, Text)] -> ByteString -> WorkbookError
LookupError [(ByteString, Text)]
attrs ByteString
fields) forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup ByteString
fields [(ByteString, Text)]
attrs

-- | Returns information about the workbook, found in
-- xl/workbook.xml. The result is cached so the XML will only be
-- decompressed and parsed once inside a larger XlsxM action.
getWorkbookInfo :: XlsxM WorkbookInfo
getWorkbookInfo :: XlsxM WorkbookInfo
getWorkbookInfo = forall (m :: * -> *) a. MonadIO m => Memoized a -> m a
runMemoized forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks XlsxMState -> Memoized WorkbookInfo
_xs_workbook_info

readWorkbookRelationships :: Zip.ZipArchive Relationships
readWorkbookRelationships :: ZipArchive Relationships
readWorkbookRelationships = do
   EntrySelector
sel <- forall (m :: * -> *). MonadThrow m => FilePath -> m EntrySelector
Zip.mkEntrySelector FilePath
"xl/_rels/workbook.xml.rels"
   ConduitT () ByteString (ResourceT IO) ()
src <- forall (m :: * -> *).
(PrimMonad m, MonadThrow m, MonadResource m) =>
EntrySelector -> ZipArchive (ConduitT () ByteString m ())
Zip.getEntrySource EntrySelector
sel
   forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Map RefId Relationship -> Relationships
Relationships forall a b. (a -> b) -> a -> b
$ forall state tag text.
(GenericXMLString tag, GenericXMLString text) =>
state
-> ConduitT () ByteString (ResourceT IO) ()
-> ([SAXEvent tag text] -> StateT state IO ())
-> IO state
runExpat forall a. Monoid a => a
mempty ConduitT () ByteString (ResourceT IO) ()
src forall a b. (a -> b) -> a -> b
$ \[HexpatEvent]
evs -> forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [HexpatEvent]
evs forall a b. (a -> b) -> a -> b
$ \case
     StartElement (ByteString
"Relationship" :: ByteString) [(ByteString, Text)]
attrs -> do
       Text
rId <- forall (m :: * -> *).
MonadThrow m =>
ByteString -> [(ByteString, Text)] -> m Text
lookupBy ByteString
"Id" [(ByteString, Text)]
attrs
       Text
rTarget <- forall (m :: * -> *).
MonadThrow m =>
ByteString -> [(ByteString, Text)] -> m Text
lookupBy ByteString
"Target" [(ByteString, Text)]
attrs
       Text
rType <- forall (m :: * -> *).
MonadThrow m =>
ByteString -> [(ByteString, Text)] -> m Text
lookupBy ByteString
"Type" [(ByteString, Text)]
attrs
       forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' forall a b. (a -> b) -> a -> b
$ forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert (Text -> RefId
RefId Text
rId) forall a b. (a -> b) -> a -> b
$
         Relationship { relType :: Text
relType = Text
rType,
                        relTarget :: FilePath
relTarget = Text -> FilePath
T.unpack Text
rTarget
                       }
     HexpatEvent
_ -> forall (f :: * -> *) a. Applicative f => a -> f a
pure ()

-- | Gets relationships for the workbook (this means the filenames in
-- the relationships map are relative to "xl/" base path within the
-- zip file.
--
-- The relationships xml file will only be parsed once when called
-- multiple times within a larger XlsxM action.
getWorkbookRelationships :: XlsxM Relationships
getWorkbookRelationships :: XlsxM Relationships
getWorkbookRelationships = forall (m :: * -> *) a. MonadIO m => Memoized a -> m a
runMemoized forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks XlsxMState -> Memoized Relationships
_xs_relationships

type HexpatEvent = SAXEvent ByteString Text

relIdToEntrySelector :: RefId -> XlsxM (Maybe Zip.EntrySelector)
relIdToEntrySelector :: RefId -> XlsxM (Maybe EntrySelector)
relIdToEntrySelector RefId
rid = do
  Relationships Map RefId Relationship
rels <- XlsxM Relationships
getWorkbookRelationships
  forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
t a -> (a -> f b) -> f (t b)
for (forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup RefId
rid Map RefId Relationship
rels) forall a b. (a -> b) -> a -> b
$ \Relationship
rel -> do
    forall (m :: * -> *). MonadThrow m => FilePath -> m EntrySelector
Zip.mkEntrySelector forall a b. (a -> b) -> a -> b
$ FilePath
"xl/" forall a. Semigroup a => a -> a -> a
<> Relationship -> FilePath
relTarget Relationship
rel

sheetIdToRelId :: Int -> XlsxM (Maybe RefId)
sheetIdToRelId :: Int -> XlsxM (Maybe RefId)
sheetIdToRelId Int
sheetId = do
  WorkbookInfo [SheetInfo]
sheets <- XlsxM WorkbookInfo
getWorkbookInfo
  forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ SheetInfo -> RefId
sheetInfoRelId forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find ((forall a. Eq a => a -> a -> Bool
== Int
sheetId) forall b c a. (b -> c) -> (a -> b) -> a -> c
. SheetInfo -> Int
sheetInfoSheetId) [SheetInfo]
sheets

sheetIdToEntrySelector :: Int -> XlsxM (Maybe Zip.EntrySelector)
sheetIdToEntrySelector :: Int -> XlsxM (Maybe EntrySelector)
sheetIdToEntrySelector Int
sheetId = do
  Int -> XlsxM (Maybe RefId)
sheetIdToRelId Int
sheetId forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
    Maybe RefId
Nothing  -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing
    Just RefId
rid -> RefId -> XlsxM (Maybe EntrySelector)
relIdToEntrySelector RefId
rid

-- If the given sheet number exists, returns Just a conduit source of the stream
-- of XML events in a particular sheet. Returns Nothing when the sheet doesn't
-- exist.
{-# SCC getSheetXmlSource #-}
getSheetXmlSource ::
  (PrimMonad m, MonadThrow m, C.MonadResource m) =>
  Int ->
  XlsxM (Maybe (ConduitT () ByteString m ()))
getSheetXmlSource :: forall (m :: * -> *).
(PrimMonad m, MonadThrow m, MonadResource m) =>
Int -> XlsxM (Maybe (ConduitT () ByteString m ()))
getSheetXmlSource Int
sheetId = do
  -- TODO: The Zip library may throw exceptions that aren't exposed from this
  -- module, so downstream library users would need to add the 'zip' package to
  -- handle them. Consider re-wrapping zip library exceptions, or just
  -- re-export them?
  Maybe EntrySelector
mSheetSel <- Int -> XlsxM (Maybe EntrySelector)
sheetIdToEntrySelector Int
sheetId
  Bool
sheetExists <- forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False) (forall a. ZipArchive a -> XlsxM a
liftZip forall b c a. (b -> c) -> (a -> b) -> a -> c
. EntrySelector -> ZipArchive Bool
Zip.doesEntryExist) Maybe EntrySelector
mSheetSel
  case Maybe EntrySelector
mSheetSel of
    Just EntrySelector
sheetSel
      | Bool
sheetExists ->
          forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. ZipArchive a -> XlsxM a
liftZip (forall (m :: * -> *).
(PrimMonad m, MonadThrow m, MonadResource m) =>
EntrySelector -> ZipArchive (ConduitT () ByteString m ())
Zip.getEntrySource EntrySelector
sheetSel)
    Maybe EntrySelector
_ -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing

{-# SCC runExpat #-}
runExpat :: forall state tag text.
  (GenericXMLString tag, GenericXMLString text) =>
  state ->
  ConduitT () ByteString (C.ResourceT IO) () ->
  ([SAXEvent tag text] -> StateT state IO ()) ->
  IO state
runExpat :: forall state tag text.
(GenericXMLString tag, GenericXMLString text) =>
state
-> ConduitT () ByteString (ResourceT IO) ()
-> ([SAXEvent tag text] -> StateT state IO ())
-> IO state
runExpat state
initialState ConduitT () ByteString (ResourceT IO) ()
byteSource [SAXEvent tag text] -> StateT state IO ()
handler = do
  -- Set up state
  IORef state
ref <- forall a. a -> IO (IORef a)
newIORef state
initialState
  -- Set up parser and callbacks
  (HParser
parseChunk, IO XMLParseLocation
_getLoc) <- Maybe Encoding
-> Maybe (ByteString -> Maybe ByteString)
-> Bool
-> IO (HParser, IO XMLParseLocation)
Hexpat.hexpatNewParser forall a. Maybe a
Nothing forall a. Maybe a
Nothing Bool
False
  let noExtra :: p -> b -> f ((), b)
noExtra p
_ b
offset = forall (f :: * -> *) a. Applicative f => a -> f a
pure ((), b
offset)
      {-# SCC processChunk #-}
      {-# INLINE processChunk #-}
      processChunk :: Bool -> ByteString -> IO ()
processChunk Bool
isFinalChunk ByteString
chunk = do
        (ForeignPtr Word8
buf, CInt
len, Maybe XMLParseError
mError) <- HParser
parseChunk ByteString
chunk Bool
isFinalChunk
        [(SAXEvent tag text, ())]
saxen <- forall tag text a.
(GenericXMLString tag, GenericXMLString text) =>
ForeignPtr Word8
-> CInt
-> (Ptr Word8 -> Int -> IO (a, Int))
-> IO [(SAXEvent tag text, a)]
HexpatInternal.parseBuf ForeignPtr Word8
buf CInt
len forall {f :: * -> *} {p} {b}. Applicative f => p -> b -> f ((), b)
noExtra
        case Maybe XMLParseError
mError of
          Just XMLParseError
err -> forall a. HasCallStack => FilePath -> a
error forall a b. (a -> b) -> a -> b
$ FilePath
"expat error: " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> FilePath
show XMLParseError
err
          Maybe XMLParseError
Nothing -> do
            state
state0 <- forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall a. IORef a -> IO a
readIORef IORef state
ref
            state
state1 <-
              {-# SCC "runExpat_runStateT_call" #-}
              forall (m :: * -> *) s a. Monad m => StateT s m a -> s -> m s
execStateT ([SAXEvent tag text] -> StateT state IO ()
handler forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> a
fst [(SAXEvent tag text, ())]
saxen) state
state0
            forall a. IORef a -> a -> IO ()
writeIORef IORef state
ref state
state1
  forall (m :: * -> *) r.
MonadUnliftIO m =>
ConduitT () Void (ResourceT m) r -> m r
C.runConduitRes forall a b. (a -> b) -> a -> b
$
    ConduitT () ByteString (ResourceT IO) ()
byteSource forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.|
    forall (m :: * -> *) i o r.
Monad m =>
(i -> ConduitT i o m r) -> ConduitT i o m ()
C.awaitForever (forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> ByteString -> IO ()
processChunk Bool
False)
  Bool -> ByteString -> IO ()
processChunk Bool
True ByteString
BS.empty
  forall a. IORef a -> IO a
readIORef IORef state
ref

runExpatForSheet ::
  SheetState ->
  ConduitT () ByteString (C.ResourceT IO) () ->
  (SheetItem -> IO ()) ->
  XlsxM ()
runExpatForSheet :: SheetState
-> ConduitT () ByteString (ResourceT IO) ()
-> (SheetItem -> IO ())
-> XlsxM ()
runExpatForSheet SheetState
initState ConduitT () ByteString (ResourceT IO) ()
byteSource SheetItem -> IO ()
inner =
  forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall state tag text.
(GenericXMLString tag, GenericXMLString text) =>
state
-> ConduitT () ByteString (ResourceT IO) ()
-> ([SAXEvent tag text] -> StateT state IO ())
-> IO state
runExpat SheetState
initState ConduitT () ByteString (ResourceT IO) ()
byteSource forall {m :: * -> *} {t :: * -> *}.
(Foldable t, MonadState SheetState m, MonadThrow m, MonadIO m) =>
t HexpatEvent -> m ()
handler
  where
    sheetName :: Int
sheetName = SheetState -> Int
_ps_sheet_index SheetState
initState
    handler :: t HexpatEvent -> m ()
handler t HexpatEvent
evs = forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ t HexpatEvent
evs forall a b. (a -> b) -> a -> b
$ \HexpatEvent
ev -> do
      Either SheetErrors (Maybe CellRow)
parseRes <- forall e (m :: * -> *) a. ExceptT e m a -> m (Either e a)
runExceptT forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
HexpatEvent -> m (Maybe CellRow)
matchHexpatEvent HexpatEvent
ev
      case Either SheetErrors (Maybe CellRow)
parseRes of
        Left SheetErrors
err -> forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM SheetErrors
err
        Right (Just CellRow
cellRow)
          | Bool -> Bool
not (forall a. IntMap a -> Bool
IntMap.null CellRow
cellRow) -> do
              RowIndex
rowNum <- forall s (m :: * -> *) a. MonadState s m => Getting a s a -> m a
use Lens' SheetState RowIndex
ps_cell_row_index
              forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ SheetItem -> IO ()
inner forall a b. (a -> b) -> a -> b
$ Int -> Row -> SheetItem
MkSheetItem Int
sheetName forall a b. (a -> b) -> a -> b
$ RowIndex -> CellRow -> Row
MkRow RowIndex
rowNum CellRow
cellRow
        Either SheetErrors (Maybe CellRow)
_ -> forall (f :: * -> *) a. Applicative f => a -> f a
pure ()

-- | this will collect the sheetitems in a list.
--   useful for cases were memory is of no concern but a sheetitem
--   type in a list is needed.
collectItems ::
  SheetIndex ->
  XlsxM [SheetItem]
collectItems :: SheetIndex -> XlsxM [SheetItem]
collectItems SheetIndex
sheetId = do
 IORef [SheetItem]
res <- forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall a. a -> IO (IORef a)
newIORef []
 forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ SheetIndex -> (SheetItem -> IO ()) -> XlsxM Bool
readSheet SheetIndex
sheetId forall a b. (a -> b) -> a -> b
$ \SheetItem
item ->
   forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (forall a. IORef a -> (a -> a) -> IO ()
modifyIORef' IORef [SheetItem]
res (SheetItem
item forall a. a -> [a] -> [a]
:))
 forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. [a] -> [a]
reverse forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall a. IORef a -> IO a
readIORef IORef [SheetItem]
res

-- | datatype representing a sheet index, looking it up by name
--   can be done with 'makeIndexFromName', which is the preferred approach.
--   although 'makeIndex' is available in case it's already known.
newtype SheetIndex = MkSheetIndex Int
 deriving newtype SheetIndex -> ()
forall a. (a -> ()) -> NFData a
rnf :: SheetIndex -> ()
$crnf :: SheetIndex -> ()
NFData

-- | This does *no* checking if the index exists or not.
--   you could have index out of bounds issues because of this.
makeIndex :: Int -> SheetIndex
makeIndex :: Int -> SheetIndex
makeIndex = Int -> SheetIndex
MkSheetIndex

-- | Look up the index of a case insensitive sheet name
makeIndexFromName :: Text -> XlsxM (Maybe SheetIndex)
makeIndexFromName :: Text -> XlsxM (Maybe SheetIndex)
makeIndexFromName Text
sheetName = do
  WorkbookInfo
wi <- XlsxM WorkbookInfo
getWorkbookInfo
  -- The Excel UI does not allow a user to create two sheets whose
  -- names differ only in alphabetic case (at least for ascii...)
  let sheetNameCI :: Text
sheetNameCI = Text -> Text
T.toLower Text
sheetName
      findRes :: Maybe SheetInfo
      findRes :: Maybe SheetInfo
findRes = forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find ((forall a. Eq a => a -> a -> Bool
== Text
sheetNameCI) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
T.toLower forall b c a. (b -> c) -> (a -> b) -> a -> c
. SheetInfo -> Text
sheetInfoName) forall a b. (a -> b) -> a -> b
$ WorkbookInfo -> [SheetInfo]
_wiSheets WorkbookInfo
wi
  forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ Int -> SheetIndex
makeIndex forall b c a. (b -> c) -> (a -> b) -> a -> c
. SheetInfo -> Int
sheetInfoSheetId forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe SheetInfo
findRes

readSheet ::
  SheetIndex ->
  -- | Function to consume the sheet's rows
  (SheetItem -> IO ()) ->
  -- | Returns False if sheet doesn't exist, or True otherwise
  XlsxM Bool
readSheet :: SheetIndex -> (SheetItem -> IO ()) -> XlsxM Bool
readSheet (MkSheetIndex Int
sheetId) SheetItem -> IO ()
inner = do
  Maybe (ConduitT () ByteString (ResourceT IO) ())
mSrc :: Maybe (ConduitT () ByteString (C.ResourceT IO) ()) <-
    forall (m :: * -> *).
(PrimMonad m, MonadThrow m, MonadResource m) =>
Int -> XlsxM (Maybe (ConduitT () ByteString m ()))
getSheetXmlSource Int
sheetId
  let
  case Maybe (ConduitT () ByteString (ResourceT IO) ())
mSrc of
    Maybe (ConduitT () ByteString (ResourceT IO) ())
Nothing -> forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
    Just ConduitT () ByteString (ResourceT IO) ()
sourceSheetXml -> do
      SharedStringsMap
sharedStrs <- XlsxM SharedStringsMap
getOrParseSharedStringss
      let sheetState0 :: SheetState
sheetState0 = SheetState
initialSheetState
            forall a b. a -> (a -> b) -> b
& Lens' SheetState SharedStringsMap
ps_shared_strings forall s t a b. ASetter s t a b -> b -> s -> t
.~ SharedStringsMap
sharedStrs
            forall a b. a -> (a -> b) -> b
& Lens' SheetState Int
ps_sheet_index forall s t a b. ASetter s t a b -> b -> s -> t
.~ Int
sheetId
      SheetState
-> ConduitT () ByteString (ResourceT IO) ()
-> (SheetItem -> IO ())
-> XlsxM ()
runExpatForSheet SheetState
sheetState0 ConduitT () ByteString (ResourceT IO) ()
sourceSheetXml SheetItem -> IO ()
inner
      forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True

-- | Returns number of rows in the given sheet (identified by the
-- sheet's ID, AKA the sheetId attribute, AKA 'sheetInfoSheetId'), or Nothing
-- if the sheet does not exist. Does not perform a full parse of the
-- XML into 'SheetItem's, so it should be more efficient than counting
-- via 'readSheetByIndex'.
countRowsInSheet :: SheetIndex -> XlsxM (Maybe Int)
countRowsInSheet :: SheetIndex -> XlsxM (Maybe Int)
countRowsInSheet (MkSheetIndex Int
sheetId) = do
  Maybe (ConduitT () ByteString (ResourceT IO) ())
mSrc :: Maybe (ConduitT () ByteString (C.ResourceT IO) ()) <-
    forall (m :: * -> *).
(PrimMonad m, MonadThrow m, MonadResource m) =>
Int -> XlsxM (Maybe (ConduitT () ByteString m ()))
getSheetXmlSource Int
sheetId
  forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
t a -> (a -> f b) -> f (t b)
for Maybe (ConduitT () ByteString (ResourceT IO) ())
mSrc forall a b. (a -> b) -> a -> b
$ \ConduitT () ByteString (ResourceT IO) ()
sourceSheetXml -> do
    forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall state tag text.
(GenericXMLString tag, GenericXMLString text) =>
state
-> ConduitT () ByteString (ResourceT IO) ()
-> ([SAXEvent tag text] -> StateT state IO ())
-> IO state
runExpat @Int @ByteString @ByteString Int
0 ConduitT () ByteString (ResourceT IO) ()
sourceSheetXml forall a b. (a -> b) -> a -> b
$ \[SAXEvent ByteString ByteString]
evs ->
      forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [SAXEvent ByteString ByteString]
evs forall a b. (a -> b) -> a -> b
$ \case
        StartElement ByteString
"row" [(ByteString, ByteString)]
_ -> forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' (forall a. Num a => a -> a -> a
+Int
1)
        SAXEvent ByteString ByteString
_                    -> forall (f :: * -> *) a. Applicative f => a -> f a
pure ()

-- | Return row from the state and empty it
popRow :: HasSheetState m => m CellRow
popRow :: forall (m :: * -> *). HasSheetState m => m CellRow
popRow = do
  CellRow
row <- forall s (m :: * -> *) a. MonadState s m => Getting a s a -> m a
use Lens' SheetState CellRow
ps_row
  Lens' SheetState CellRow
ps_row forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= forall a. Monoid a => a
mempty
  forall (f :: * -> *) a. Applicative f => a -> f a
pure CellRow
row

data AddCellErrors
  = ReadError -- ^ Could not read current cell value
      Text    -- ^ Original value
      String  -- ^ Error message
  | SharedStringsNotFound -- ^ Could not find string by index in shared string table
      Int                -- ^ Given index
      (V.Vector Text)      -- ^ Given shared strings to lookup in
  deriving Int -> AddCellErrors -> ShowS
[AddCellErrors] -> ShowS
AddCellErrors -> FilePath
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [AddCellErrors] -> ShowS
$cshowList :: [AddCellErrors] -> ShowS
show :: AddCellErrors -> FilePath
$cshow :: AddCellErrors -> FilePath
showsPrec :: Int -> AddCellErrors -> ShowS
$cshowsPrec :: Int -> AddCellErrors -> ShowS
Show

-- | Parse the given value
--
-- If it's a string, we try to get it our of a shared string table
{-# SCC parseValue #-}
parseValue :: SharedStringsMap -> Text -> ExcelValueType -> Either AddCellErrors CellValue
parseValue :: SharedStringsMap
-> Text -> ExcelValueType -> Either AddCellErrors CellValue
parseValue SharedStringsMap
sstrings Text
txt = \case
  ExcelValueType
TS -> do
    (Int
idx, Text
_) <- Text -> FilePath -> AddCellErrors
ReadError Text
txt forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
`first` forall a. Integral a => Reader a
Read.decimal @Int Text
txt
    Text
string <- forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ Int -> SharedStringsMap -> AddCellErrors
SharedStringsNotFound Int
idx SharedStringsMap
sstrings) forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ {-# SCC "sstrings_lookup_scc" #-}  (SharedStringsMap
sstrings forall s a. s -> Getting (First a) s a -> Maybe a
^? forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix Int
idx)
    forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ Text -> CellValue
CellText Text
string
  ExcelValueType
TStr -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ Text -> CellValue
CellText Text
txt
  ExcelValueType
TN -> forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
bimap (Text -> FilePath -> AddCellErrors
ReadError Text
txt) (Double -> CellValue
CellDouble forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst) forall a b. (a -> b) -> a -> b
$ Reader Double
Read.double Text
txt
  ExcelValueType
TE -> forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
bimap (Text -> FilePath -> AddCellErrors
ReadError Text
txt) (ErrorType -> CellValue
CellError forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst) forall a b. (a -> b) -> a -> b
$ forall a. FromAttrVal a => Reader a
fromAttrVal Text
txt
  ExcelValueType
TB | Text
txt forall a. Eq a => a -> a -> Bool
== Text
"1" -> forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ Bool -> CellValue
CellBool Bool
True
     | Text
txt forall a. Eq a => a -> a -> Bool
== Text
"0" -> forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ Bool -> CellValue
CellBool Bool
False
     | Bool
otherwise -> forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ Text -> FilePath -> AddCellErrors
ReadError Text
txt FilePath
"Could not read Excel boolean value (expected 0 or 1)"
  ExcelValueType
Untyped -> forall a b. b -> Either a b
Right (Text -> CellValue
parseUntypedValue Text
txt)

-- TODO: some of the cells are untyped and we need to test whether
-- they all are strings or something more complicated
parseUntypedValue :: Text -> CellValue
parseUntypedValue :: Text -> CellValue
parseUntypedValue = Text -> CellValue
CellText

-- | Adds a cell to row in state monad
{-# SCC addCellToRow #-}
addCellToRow
  :: ( MonadError SheetErrors m
     , HasSheetState m
     )
  => Text -> m ()
addCellToRow :: forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
Text -> m ()
addCellToRow Text
txt = do
  SheetState
st <- forall s (m :: * -> *). MonadState s m => m s
get
  Maybe Int
style <- forall s (m :: * -> *) a. MonadState s m => Getting a s a -> m a
use Lens' SheetState (Maybe Int)
ps_cell_style
  forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (SheetState -> Bool
_ps_is_in_val SheetState
st) forall a b. (a -> b) -> a -> b
$ do
    CellValue
val <- forall e (m :: * -> *) a. MonadError e m => Either e a -> m a
liftEither forall a b. (a -> b) -> a -> b
$ forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first AddCellErrors -> SheetErrors
ParseCellError forall a b. (a -> b) -> a -> b
$ SharedStringsMap
-> Text -> ExcelValueType -> Either AddCellErrors CellValue
parseValue (SheetState -> SharedStringsMap
_ps_shared_strings SheetState
st) Text
txt (SheetState -> ExcelValueType
_ps_type SheetState
st)
    forall s (m :: * -> *). MonadState s m => s -> m ()
put forall a b. (a -> b) -> a -> b
$ SheetState
st { _ps_row :: CellRow
_ps_row = forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert (ColumnIndex -> Int
unColumnIndex forall a b. (a -> b) -> a -> b
$ SheetState -> ColumnIndex
_ps_cell_col_index SheetState
st)
                         (Cell { _cellStyle :: Maybe Int
_cellStyle   = Maybe Int
style
                               , _cellValue :: Maybe CellValue
_cellValue   = forall a. a -> Maybe a
Just CellValue
val
                               , _cellComment :: Maybe Comment
_cellComment = forall a. Maybe a
Nothing
                               , _cellFormula :: Maybe CellFormula
_cellFormula = forall a. Maybe a
Nothing
                               }) forall a b. (a -> b) -> a -> b
$ SheetState -> CellRow
_ps_row SheetState
st}

data SheetErrors
  = ParseCoordinateError CoordinateErrors -- ^ Error while parsing coordinates
  | ParseTypeError TypeError              -- ^ Error while parsing types
  | ParseCellError AddCellErrors          -- ^ Error while parsing cells
  | ParseStyleErrors StyleError
  | HexpatParseError Hexpat.XMLParseError
  deriving stock Int -> SheetErrors -> ShowS
[SheetErrors] -> ShowS
SheetErrors -> FilePath
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [SheetErrors] -> ShowS
$cshowList :: [SheetErrors] -> ShowS
show :: SheetErrors -> FilePath
$cshow :: SheetErrors -> FilePath
showsPrec :: Int -> SheetErrors -> ShowS
$cshowsPrec :: Int -> SheetErrors -> ShowS
Show
  deriving anyclass Show SheetErrors
Typeable SheetErrors
SomeException -> Maybe SheetErrors
SheetErrors -> FilePath
SheetErrors -> SomeException
forall e.
Typeable e
-> Show e
-> (e -> SomeException)
-> (SomeException -> Maybe e)
-> (e -> FilePath)
-> Exception e
displayException :: SheetErrors -> FilePath
$cdisplayException :: SheetErrors -> FilePath
fromException :: SomeException -> Maybe SheetErrors
$cfromException :: SomeException -> Maybe SheetErrors
toException :: SheetErrors -> SomeException
$ctoException :: SheetErrors -> SomeException
Exception

type SheetValue = (ByteString, Text)
type SheetValues = [SheetValue]

data CoordinateErrors
  = CoordinateNotFound SheetValues         -- ^ If the coordinate was not specified in "r" attribute
  | NoListElement SheetValue SheetValues   -- ^ If the value is empty for some reason
  | NoTextContent Content SheetValues      -- ^ If the value has something besides @ContentText@ inside
  | DecodeFailure Text SheetValues         -- ^ If malformed coordinate text was passed
  deriving stock Int -> CoordinateErrors -> ShowS
[CoordinateErrors] -> ShowS
CoordinateErrors -> FilePath
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [CoordinateErrors] -> ShowS
$cshowList :: [CoordinateErrors] -> ShowS
show :: CoordinateErrors -> FilePath
$cshow :: CoordinateErrors -> FilePath
showsPrec :: Int -> CoordinateErrors -> ShowS
$cshowsPrec :: Int -> CoordinateErrors -> ShowS
Show
  deriving anyclass Show CoordinateErrors
Typeable CoordinateErrors
SomeException -> Maybe CoordinateErrors
CoordinateErrors -> FilePath
CoordinateErrors -> SomeException
forall e.
Typeable e
-> Show e
-> (e -> SomeException)
-> (SomeException -> Maybe e)
-> (e -> FilePath)
-> Exception e
displayException :: CoordinateErrors -> FilePath
$cdisplayException :: CoordinateErrors -> FilePath
fromException :: SomeException -> Maybe CoordinateErrors
$cfromException :: SomeException -> Maybe CoordinateErrors
toException :: CoordinateErrors -> SomeException
$ctoException :: CoordinateErrors -> SomeException
Exception

data TypeError
  = TypeNotFound SheetValues
  | TypeNoListElement SheetValue SheetValues
  | UnkownType Text SheetValues
  | TypeNoTextContent Content SheetValues
  deriving Int -> TypeError -> ShowS
[TypeError] -> ShowS
TypeError -> FilePath
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [TypeError] -> ShowS
$cshowList :: [TypeError] -> ShowS
show :: TypeError -> FilePath
$cshow :: TypeError -> FilePath
showsPrec :: Int -> TypeError -> ShowS
$cshowsPrec :: Int -> TypeError -> ShowS
Show
  deriving anyclass Show TypeError
Typeable TypeError
SomeException -> Maybe TypeError
TypeError -> FilePath
TypeError -> SomeException
forall e.
Typeable e
-> Show e
-> (e -> SomeException)
-> (SomeException -> Maybe e)
-> (e -> FilePath)
-> Exception e
displayException :: TypeError -> FilePath
$cdisplayException :: TypeError -> FilePath
fromException :: SomeException -> Maybe TypeError
$cfromException :: SomeException -> Maybe TypeError
toException :: TypeError -> SomeException
$ctoException :: TypeError -> SomeException
Exception

data WorkbookError = LookupError { WorkbookError -> [(ByteString, Text)]
lookup_attrs :: [(ByteString, Text)], WorkbookError -> ByteString
lookup_field :: ByteString }
                   | ParseDecimalError Text String
  deriving Int -> WorkbookError -> ShowS
[WorkbookError] -> ShowS
WorkbookError -> FilePath
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [WorkbookError] -> ShowS
$cshowList :: [WorkbookError] -> ShowS
show :: WorkbookError -> FilePath
$cshow :: WorkbookError -> FilePath
showsPrec :: Int -> WorkbookError -> ShowS
$cshowsPrec :: Int -> WorkbookError -> ShowS
Show
  deriving anyclass Show WorkbookError
Typeable WorkbookError
SomeException -> Maybe WorkbookError
WorkbookError -> FilePath
WorkbookError -> SomeException
forall e.
Typeable e
-> Show e
-> (e -> SomeException)
-> (SomeException -> Maybe e)
-> (e -> FilePath)
-> Exception e
displayException :: WorkbookError -> FilePath
$cdisplayException :: WorkbookError -> FilePath
fromException :: SomeException -> Maybe WorkbookError
$cfromException :: SomeException -> Maybe WorkbookError
toException :: WorkbookError -> SomeException
$ctoException :: WorkbookError -> SomeException
Exception

{-# SCC matchHexpatEvent #-}
matchHexpatEvent ::
  ( MonadError SheetErrors m,
    HasSheetState m
  ) =>
  HexpatEvent ->
  m (Maybe CellRow)
matchHexpatEvent :: forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
HexpatEvent -> m (Maybe CellRow)
matchHexpatEvent HexpatEvent
ev = case HexpatEvent
ev of
  CharacterData Text
txt -> {-# SCC "handle_CharData" #-} do
    Bool
inVal <- forall s (m :: * -> *) a. MonadState s m => Getting a s a -> m a
use Lens' SheetState Bool
ps_is_in_val
    forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
inVal forall a b. (a -> b) -> a -> b
$
      {-# SCC "append_text_buf" #-} (Lens' SheetState Text
ps_text_buf forall s (m :: * -> *) a.
(MonadState s m, Semigroup a) =>
ASetter' s a -> a -> m ()
<>= Text
txt)
    forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing
  StartElement ByteString
"c" [(ByteString, Text)]
attrs -> forall a. Maybe a
Nothing forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
[(ByteString, Text)] -> m ()
setCoord [(ByteString, Text)]
attrs forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
[(ByteString, Text)] -> m ()
setType [(ByteString, Text)]
attrs forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
[(ByteString, Text)] -> m ()
setStyle [(ByteString, Text)]
attrs)
  StartElement ByteString
"is" [(ByteString, Text)]
_ -> forall a. Maybe a
Nothing forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Lens' SheetState Bool
ps_is_in_val forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= Bool
True)
  EndElement ByteString
"is" -> forall a. Maybe a
Nothing forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
m ()
finaliseCellValue
  StartElement ByteString
"v" [(ByteString, Text)]
_ -> forall a. Maybe a
Nothing forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Lens' SheetState Bool
ps_is_in_val forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= Bool
True)
  EndElement ByteString
"v" -> forall a. Maybe a
Nothing forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
m ()
finaliseCellValue
  -- If beginning of row, empty the state and return nothing.
  -- We don't know if there is anything in the state, the user may have
  -- decided to <row> <row> (not closing). In any case it's the beginning of a new row
  -- so we clear the state.
  StartElement ByteString
"row" [(ByteString, Text)]
_ -> forall a. Maybe a
Nothing forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall (m :: * -> *). HasSheetState m => m CellRow
popRow
  -- If at the end of the row, we have collected the whole row into
  -- the current state. Empty the state and return the row.
  EndElement ByteString
"row" -> forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *). HasSheetState m => m CellRow
popRow
  StartElement ByteString
"worksheet" [(ByteString, Text)]
_ -> Lens' SheetState Bool
ps_worksheet_ended forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= Bool
False forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing
  EndElement ByteString
"worksheet" -> Lens' SheetState Bool
ps_worksheet_ended forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= Bool
True forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing
  -- Skip everything else, e.g. the formula elements <f>
  FailDocument XMLParseError
err -> do
    -- this event is emitted at the end the xml stream (possibly
    -- because the xml files in xlsx archives don't end in a
    -- newline, but that's a guess), so we use state to determine if
    -- it's expected.
    Bool
finished <- forall s (m :: * -> *) a. MonadState s m => Getting a s a -> m a
use Lens' SheetState Bool
ps_worksheet_ended
    forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
finished forall a b. (a -> b) -> a -> b
$
      forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError forall a b. (a -> b) -> a -> b
$ XMLParseError -> SheetErrors
HexpatParseError XMLParseError
err
    forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing
  HexpatEvent
_ -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing

{-# INLINE finaliseCellValue #-}
finaliseCellValue ::
  ( MonadError SheetErrors m, HasSheetState m ) => m ()
finaliseCellValue :: forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
m ()
finaliseCellValue = do
  Text
txt <- forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets SheetState -> Text
_ps_text_buf
  forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
Text -> m ()
addCellToRow Text
txt
  forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify' forall a b. (a -> b) -> a -> b
$ \SheetState
st ->
    SheetState
st { _ps_is_in_val :: Bool
_ps_is_in_val = Bool
False
       , _ps_text_buf :: Text
_ps_text_buf = forall a. Monoid a => a
mempty
       }

-- | Update state coordinates accordingly to @parseCoordinates@
{-# SCC setCoord #-}
setCoord
  :: ( MonadError SheetErrors m
     , HasSheetState m
     )
  => SheetValues -> m ()
setCoord :: forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
[(ByteString, Text)] -> m ()
setCoord [(ByteString, Text)]
list = do
  (RowIndex, ColumnIndex)
coordinates <- forall e (m :: * -> *) a. MonadError e m => Either e a -> m a
liftEither forall a b. (a -> b) -> a -> b
$ forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first CoordinateErrors -> SheetErrors
ParseCoordinateError forall a b. (a -> b) -> a -> b
$ [(ByteString, Text)]
-> Either CoordinateErrors (RowIndex, ColumnIndex)
parseCoordinates [(ByteString, Text)]
list
  Lens' SheetState ColumnIndex
ps_cell_col_index forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= ((RowIndex, ColumnIndex)
coordinates forall s a. s -> Getting a s a -> a
^. forall s t a b. Field2 s t a b => Lens s t a b
_2)
  Lens' SheetState RowIndex
ps_cell_row_index forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= ((RowIndex, ColumnIndex)
coordinates forall s a. s -> Getting a s a -> a
^. forall s t a b. Field1 s t a b => Lens s t a b
_1)

-- | Parse type from values and update state accordingly
setType
  :: ( MonadError SheetErrors m
     , HasSheetState m
 )
  => SheetValues -> m ()
setType :: forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
[(ByteString, Text)] -> m ()
setType [(ByteString, Text)]
list = do
  ExcelValueType
type' <- forall e (m :: * -> *) a. MonadError e m => Either e a -> m a
liftEither forall a b. (a -> b) -> a -> b
$ forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first TypeError -> SheetErrors
ParseTypeError forall a b. (a -> b) -> a -> b
$ [(ByteString, Text)] -> Either TypeError ExcelValueType
parseType [(ByteString, Text)]
list
  Lens' SheetState ExcelValueType
ps_type forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= ExcelValueType
type'

-- | Find sheet value by its name
findName :: ByteString -> SheetValues -> Maybe SheetValue
findName :: ByteString -> [(ByteString, Text)] -> Maybe (ByteString, Text)
findName ByteString
name = forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find ((ByteString
name forall a. Eq a => a -> a -> Bool
==) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst)
{-# INLINE findName #-}

setStyle :: (MonadError SheetErrors m, HasSheetState m) => SheetValues -> m ()
setStyle :: forall (m :: * -> *).
(MonadError SheetErrors m, HasSheetState m) =>
[(ByteString, Text)] -> m ()
setStyle [(ByteString, Text)]
list = do
  Maybe Int
style <- forall e (m :: * -> *) a. MonadError e m => Either e a -> m a
liftEither forall a b. (a -> b) -> a -> b
$ forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first StyleError -> SheetErrors
ParseStyleErrors forall a b. (a -> b) -> a -> b
$ [(ByteString, Text)] -> Either StyleError (Maybe Int)
parseStyle [(ByteString, Text)]
list
  Lens' SheetState (Maybe Int)
ps_cell_style forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= Maybe Int
style

data StyleError = InvalidStyleRef { StyleError -> Text
seInput:: Text,  StyleError -> FilePath
seErrorMsg :: String}
  deriving Int -> StyleError -> ShowS
[StyleError] -> ShowS
StyleError -> FilePath
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [StyleError] -> ShowS
$cshowList :: [StyleError] -> ShowS
show :: StyleError -> FilePath
$cshow :: StyleError -> FilePath
showsPrec :: Int -> StyleError -> ShowS
$cshowsPrec :: Int -> StyleError -> ShowS
Show

parseStyle :: SheetValues -> Either StyleError (Maybe Int)
parseStyle :: [(ByteString, Text)] -> Either StyleError (Maybe Int)
parseStyle [(ByteString, Text)]
list =
  case ByteString -> [(ByteString, Text)] -> Maybe (ByteString, Text)
findName ByteString
"s" [(ByteString, Text)]
list of
    Maybe (ByteString, Text)
Nothing -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing
    Just (ByteString
_nm, Text
valTex) -> case forall a. Integral a => Reader a
Read.decimal Text
valTex of
      Left FilePath
err        -> forall a b. a -> Either a b
Left (Text -> FilePath -> StyleError
InvalidStyleRef Text
valTex FilePath
err)
      Right (Int
i, Text
_rem) -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a
Just Int
i

-- | Parse value type
{-# SCC parseType #-}
parseType :: SheetValues -> Either TypeError ExcelValueType
parseType :: [(ByteString, Text)] -> Either TypeError ExcelValueType
parseType [(ByteString, Text)]
list =
  case ByteString -> [(ByteString, Text)] -> Maybe (ByteString, Text)
findName ByteString
"t" [(ByteString, Text)]
list of
    -- NB: According to format specification default value for cells without
    -- `t` attribute is a `n` - number.
    --
    -- <xsd:complexType name="CT_Cell" from spec (see the `CellValue` spec reference)>
    --  ..
    --  <xsd:attribute name="t" type="ST_CellType" use="optional" default="n"/>
    -- </xsd:complexType>
    Maybe (ByteString, Text)
Nothing -> forall a b. b -> Either a b
Right ExcelValueType
TN
    Just (ByteString
_nm, Text
valText)->
      case Text
valText of
        Text
"n"         -> forall a b. b -> Either a b
Right ExcelValueType
TN
        Text
"s"         -> forall a b. b -> Either a b
Right ExcelValueType
TS
         -- "Cell containing a formula string". Probably shouldn't be TStr..
        Text
"str"       -> forall a b. b -> Either a b
Right ExcelValueType
TStr
        Text
"inlineStr" -> forall a b. b -> Either a b
Right ExcelValueType
TStr
        Text
"b"         -> forall a b. b -> Either a b
Right ExcelValueType
TB
        Text
"e"         -> forall a b. b -> Either a b
Right ExcelValueType
TE
        Text
other       -> forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ Text -> [(ByteString, Text)] -> TypeError
UnkownType Text
other [(ByteString, Text)]
list

-- | Parse coordinates from a list of xml elements if such were found on "r" key
{-# SCC parseCoordinates #-}
parseCoordinates :: SheetValues -> Either CoordinateErrors (RowIndex, ColumnIndex)
parseCoordinates :: [(ByteString, Text)]
-> Either CoordinateErrors (RowIndex, ColumnIndex)
parseCoordinates [(ByteString, Text)]
list = do
  (ByteString
_nm, Text
valText) <- forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ [(ByteString, Text)] -> CoordinateErrors
CoordinateNotFound [(ByteString, Text)]
list) forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ ByteString -> [(ByteString, Text)] -> Maybe (ByteString, Text)
findName ByteString
"r" [(ByteString, Text)]
list
  forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ Text -> [(ByteString, Text)] -> CoordinateErrors
DecodeFailure Text
valText [(ByteString, Text)]
list) forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ CellRef -> Maybe (RowIndex, ColumnIndex)
fromSingleCellRef forall a b. (a -> b) -> a -> b
$ Text -> CellRef
CellRef Text
valText