{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
module Codec.Xlsx.Parser
( toXlsx
, toXlsxEither
, toXlsxFast
, toXlsxEitherFast
, ParseError(..)
, Parser
) where
import qualified Codec.Archive.Zip as Zip
import Control.Applicative
import Control.Arrow (left)
import Control.Error.Safe (headErr)
import Control.Error.Util (note)
import Control.Exception (Exception)
#ifdef USE_MICROLENS
import Lens.Micro
#else
import Control.Lens hiding ((<.>), element, views)
#endif
import Control.Monad (forM, join, void)
import Control.Monad.Except (catchError, throwError)
import Data.Bool (bool)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy as LB
import Data.ByteString.Lazy.Char8 ()
import Data.List
import Data.Map (Map)
import qualified Data.Map as M
import Data.Maybe
import Data.Monoid
import Data.Text (Text)
import qualified Data.Text as T
import Data.Traversable
import GHC.Generics (Generic)
import Prelude hiding (sequence)
import Safe
import System.FilePath.Posix
import Text.XML as X
import Text.XML.Cursor hiding (bool)
import qualified Xeno.DOM as Xeno
import Codec.Xlsx.Parser.Internal
import Codec.Xlsx.Parser.Internal.PivotTable
import Codec.Xlsx.Types
import Codec.Xlsx.Types.Cell (formulaDataFromCursor)
import Codec.Xlsx.Types.Common (xlsxTextToCellValue)
import Codec.Xlsx.Types.Internal
import Codec.Xlsx.Types.Internal.CfPair
import Codec.Xlsx.Types.Internal.CommentTable as CommentTable
import Codec.Xlsx.Types.Internal.ContentTypes as ContentTypes
import Codec.Xlsx.Types.Internal.CustomProperties
as CustomProperties
import Codec.Xlsx.Types.Internal.DvPair
import Codec.Xlsx.Types.Internal.FormulaData
import Codec.Xlsx.Types.Internal.Relationships as Relationships
import Codec.Xlsx.Types.Internal.SharedStringTable
import Codec.Xlsx.Types.PivotTable.Internal
toXlsx :: L.ByteString -> Xlsx
toXlsx = either (error . show) id . toXlsxEither
data ParseError = InvalidZipArchive
| MissingFile FilePath
| InvalidFile FilePath Text
| InvalidRef FilePath RefId
| InconsistentXlsx Text
deriving (Eq, Show, Generic)
instance Exception ParseError
type Parser = Either ParseError
toXlsxFast :: L.ByteString -> Xlsx
toXlsxFast = either (error . show) id . toXlsxEitherFast
toXlsxEither :: L.ByteString -> Parser Xlsx
toXlsxEither = toXlsxEitherBase extractSheet
toXlsxEitherFast :: L.ByteString -> Parser Xlsx
toXlsxEitherFast = toXlsxEitherBase extractSheetFast
toXlsxEitherBase ::
(Zip.Archive -> SharedStringTable -> ContentTypes -> Caches -> WorksheetFile -> Parser Worksheet)
-> L.ByteString
-> Parser Xlsx
toXlsxEitherBase parseSheet bs = do
ar <- left (const InvalidZipArchive) $ Zip.toArchiveOrFail bs
sst <- getSharedStrings ar
contentTypes <- getContentTypes ar
(wfs, names, cacheSources, dateBase) <- readWorkbook ar
sheets <- forM wfs $ \wf -> do
sheet <- parseSheet ar sst contentTypes cacheSources wf
return (wfName wf, sheet)
CustomProperties customPropMap <- getCustomProperties ar
return $ Xlsx sheets (getStyles ar) names customPropMap dateBase
data WorksheetFile = WorksheetFile { wfName :: Text
, wfPath :: FilePath
}
deriving (Show, Generic)
type Caches = [(CacheId, (Text, CellRef, [CacheField]))]
extractSheetFast :: Zip.Archive
-> SharedStringTable
-> ContentTypes
-> Caches
-> WorksheetFile
-> Parser Worksheet
extractSheetFast ar sst contentTypes caches wf = do
file <-
note (MissingFile filePath) $
Zip.fromEntry <$> Zip.findEntryByPath filePath ar
sheetRels <- getRels ar filePath
root <-
left (\ex -> InvalidFile filePath $ T.pack (show ex)) $
Xeno.parse (LB.toStrict file)
parseWorksheet root sheetRels
where
filePath = wfPath wf
parseWorksheet :: Xeno.Node -> Relationships -> Parser Worksheet
parseWorksheet root sheetRels = do
let prefixes = nsPrefixes root
odrNs =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships"
odrX = addPrefix prefixes odrNs
skip = void . maybeChild
(ws, tableIds, drawingRId, legacyDrRId) <-
liftEither . collectChildren root $ do
skip "sheetPr"
skip "dimension"
_wsSheetViews <- fmap justNonEmpty . maybeParse "sheetViews" $ \n ->
collectChildren n $ fromChildList "sheetView"
skip "sheetFormatPr"
_wsColumnsProperties <-
fmap (fromMaybe []) . maybeParse "cols" $ \n ->
collectChildren n (fromChildList "col")
(_wsRowPropertiesMap, _wsCells, _wsSharedFormulas) <-
requireAndParse "sheetData" $ \n -> do
rows <- collectChildren n $ childList "row"
collectRows <$> forM rows parseRow
skip "sheetCalcPr"
_wsProtection <- maybeFromChild "sheetProtection"
skip "protectedRanges"
skip "scenarios"
_wsAutoFilter <- maybeFromChild "autoFilter"
skip "sortState"
skip "dataConsolidate"
skip "customSheetViews"
_wsMerges <- fmap (fromMaybe []) . maybeParse "mergeCells" $ \n -> do
mCells <- collectChildren n $ childList "mergeCell"
forM mCells $ \mCell -> parseAttributes mCell $ fromAttr "ref"
_wsConditionalFormattings <-
M.fromList . map unCfPair <$> fromChildList "conditionalFormatting"
_wsDataValidations <-
fmap (fromMaybe mempty) . maybeParse "dataValidations" $ \n -> do
M.fromList . map unDvPair <$>
collectChildren n (fromChildList "dataValidation")
skip "hyperlinks"
skip "printOptions"
skip "pageMargins"
_wsPageSetup <- maybeFromChild "pageSetup"
skip "headerFooter"
skip "rowBreaks"
skip "colBreaks"
skip "customProperties"
skip "cellWatches"
skip "ignoredErrors"
skip "smartTags"
drawingRId <- maybeParse "drawing" $ \n ->
parseAttributes n $ fromAttr (odrX "id")
legacyDrRId <- maybeParse "legacyDrawing" $ \n ->
parseAttributes n $ fromAttr (odrX "id")
tableIds <- fmap (fromMaybe []) . maybeParse "tableParts" $ \n -> do
tParts <- collectChildren n $ childList "tablePart"
forM tParts $ \part ->
parseAttributes part $ fromAttr (odrX "id")
return (
Worksheet
{ _wsDrawing = Nothing
, _wsPivotTables = []
, _wsTables = []
, ..
}
, tableIds
, drawingRId
, legacyDrRId)
let commentsType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments"
commentTarget :: Maybe FilePath
commentTarget = relTarget <$> findRelByType commentsType sheetRels
legacyDrPath = fmap relTarget . flip Relationships.lookup sheetRels =<< legacyDrRId
commentsMap <-
fmap join . forM commentTarget $ getComments ar legacyDrPath
let commentCells =
M.fromList
[ (fromSingleCellRefNoting r, def { _cellComment = Just cmnt})
| (r, cmnt) <- maybe [] CommentTable.toList commentsMap
]
assignComment withCmnt noCmnt =
noCmnt & cellComment .~ (withCmnt ^. cellComment)
mergeComments = M.unionWith assignComment commentCells
tables <- forM tableIds $ \rId -> do
fp <- lookupRelPath filePath sheetRels rId
getTable ar fp
drawing <- forM drawingRId $ \dId -> do
rel <- note (InvalidRef filePath dId) $ Relationships.lookup dId sheetRels
getDrawing ar contentTypes (relTarget rel)
let ptType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotTable"
pivotTables <- forM (allByType ptType sheetRels) $ \rel -> do
let ptPath = relTarget rel
bs <- note (MissingFile ptPath) $ Zip.fromEntry <$> Zip.findEntryByPath ptPath ar
note (InconsistentXlsx $ "Bad pivot table in " <> T.pack ptPath) $
parsePivotTable (flip Prelude.lookup caches) bs
return $ ws & wsTables .~ tables
& wsCells %~ mergeComments
& wsDrawing .~ drawing
& wsPivotTables .~ pivotTables
liftEither :: Either Text a -> Parser a
liftEither = left (\t -> InvalidFile filePath t)
justNonEmpty v@(Just (_:_)) = v
justNonEmpty _ = Nothing
collectRows = foldr collectRow (M.empty, M.empty, M.empty)
collectRow ::
( Int
, Maybe RowProperties
, [(Int, Int, Cell, Maybe (SharedFormulaIndex, SharedFormulaOptions))])
-> ( Map Int RowProperties
, CellMap
, Map SharedFormulaIndex SharedFormulaOptions)
-> ( Map Int RowProperties
, CellMap
, Map SharedFormulaIndex SharedFormulaOptions)
collectRow (r, mRP, rowCells) (rowMap, cellMap, sharedF) =
let (newCells0, newSharedF0) =
unzip [(((x, y), cd), shared) | (x, y, cd, shared) <- rowCells]
newCells = M.fromAscList newCells0
newSharedF = M.fromAscList $ catMaybes newSharedF0
newRowMap =
case mRP of
Just rp -> M.insert r rp rowMap
Nothing -> rowMap
in (newRowMap, cellMap <> newCells, sharedF <> newSharedF)
parseRow ::
Xeno.Node
-> Either Text ( Int
, Maybe RowProperties
, [( Int
, Int
, Cell
, Maybe (SharedFormulaIndex, SharedFormulaOptions))])
parseRow row = do
(r, s, ht, cstHt, hidden) <-
parseAttributes row $
((,,,,) <$> fromAttr "r" <*> maybeAttr "s" <*> maybeAttr "ht" <*>
fromAttrDef "customHeight" False <*>
fromAttrDef "hidden" False)
let props =
RowProps
{ rowHeight =
if cstHt
then CustomHeight <$> ht
else AutomaticHeight <$> ht
, rowStyle = s
, rowHidden = hidden
}
cellNodes <- collectChildren row $ childList "c"
cells <- forM cellNodes parseCell
return
( r
, if props == def
then Nothing
else Just props
, cells)
parseCell ::
Xeno.Node
-> Either Text ( Int
, Int
, Cell
, Maybe (SharedFormulaIndex, SharedFormulaOptions))
parseCell cell = do
(ref, s, t) <-
parseAttributes cell $
(,,) <$> fromAttr "r" <*> maybeAttr "s" <*> fromAttrDef "t" "n"
(fNode, vNode, isNode) <-
collectChildren cell $
(,,) <$> maybeChild "f" <*> maybeChild "v" <*> maybeChild "is"
let vConverted :: (FromAttrBs a) => Either Text (Maybe a)
vConverted =
case contentBs <$> vNode of
Nothing -> return Nothing
Just c -> Just <$> fromAttrBs c
mFormulaData <- mapM fromXenoNode fNode
d <-
case t of
("s" :: ByteString) -> do
si <- vConverted
case sstItem sst =<< si of
Just xlTxt -> return $ Just (xlsxTextToCellValue xlTxt)
Nothing -> throwError "bad shared string index"
"inlineStr" -> mapM (fmap xlsxTextToCellValue . fromXenoNode) isNode
"str" -> fmap CellText <$> vConverted
"n" -> fmap CellDouble <$> vConverted
"b" -> fmap CellBool <$> vConverted
"e" -> fmap CellError <$> vConverted
unexpected ->
throwError $ "unexpected cell type " <> T.pack (show unexpected)
let (r, c) = fromSingleCellRefNoting ref
f = frmdFormula <$> mFormulaData
shared = frmdShared =<< mFormulaData
return (r, c, Cell s d Nothing f, shared)
extractSheet ::
Zip.Archive
-> SharedStringTable
-> ContentTypes
-> Caches
-> WorksheetFile
-> Parser Worksheet
extractSheet ar sst contentTypes caches wf = do
let filePath = wfPath wf
file <- note (MissingFile filePath) $ Zip.fromEntry <$> Zip.findEntryByPath filePath ar
cur <- fmap fromDocument . left (\ex -> InvalidFile filePath (T.pack $ show ex)) $
parseLBS def file
sheetRels <- getRels ar filePath
let sheetViewList = cur $/ element (n_ "sheetViews") &/ element (n_ "sheetView") >=> fromCursor
sheetViews = case sheetViewList of
[] -> Nothing
views -> Just views
let commentsType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments"
commentTarget :: Maybe FilePath
commentTarget = relTarget <$> findRelByType commentsType sheetRels
legacyDrRId = cur $/ element (n_ "legacyDrawing") >=> fromAttribute (odr"id")
legacyDrPath = fmap relTarget . flip Relationships.lookup sheetRels =<< listToMaybe legacyDrRId
commentsMap :: Maybe CommentTable <- maybe (Right Nothing) (getComments ar legacyDrPath) commentTarget
let pageSetup = listToMaybe $ cur $/ element (n_ "pageSetup") >=> fromCursor
cws = cur $/ element (n_ "cols") &/ element (n_ "col") >=> fromCursor
(rowProps, cells0, sharedFormulas) =
collect $ cur $/ element (n_ "sheetData") &/ element (n_ "row") >=> parseRow
parseRow ::
Cursor
-> [( Int
, Maybe RowProperties
, [(Int, Int, Cell, Maybe (SharedFormulaIndex, SharedFormulaOptions))])]
parseRow c = do
r <- fromAttribute "r" c
let prop = RowProps
{ rowHeight = do h <- listToMaybe $ fromAttribute "ht" c
case fromAttribute "customHeight" c of
[True] -> return $ CustomHeight h
_ -> return $ AutomaticHeight h
, rowStyle = listToMaybe $ fromAttribute "s" c
, rowHidden =
case fromAttribute "hidden" c of
[] -> False
f:_ -> f
}
return ( r
, if prop == def then Nothing else Just prop
, c $/ element (n_ "c") >=> parseCell
)
parseCell ::
Cursor
-> [(Int, Int, Cell, Maybe (SharedFormulaIndex, SharedFormulaOptions))]
parseCell cell = do
ref <- fromAttribute "r" cell
let s = listToMaybe $ cell $| attribute "s" >=> decimal
t = fromMaybe "n" $ listToMaybe $ cell $| attribute "t"
d = listToMaybe $ extractCellValue sst t cell
mFormulaData = listToMaybe $ cell $/ element (n_ "f") >=> formulaDataFromCursor
f = fst <$> mFormulaData
shared = snd =<< mFormulaData
(r, c) = fromSingleCellRefNoting ref
comment = commentsMap >>= lookupComment ref
return (r, c, Cell s d comment f, shared)
collect = foldr collectRow (M.empty, M.empty, M.empty)
collectRow ::
( Int
, Maybe RowProperties
, [(Int, Int, Cell, Maybe (SharedFormulaIndex, SharedFormulaOptions))])
-> (Map Int RowProperties, CellMap, Map SharedFormulaIndex SharedFormulaOptions)
-> (Map Int RowProperties, CellMap, Map SharedFormulaIndex SharedFormulaOptions)
collectRow (r, mRP, rowCells) (rowMap, cellMap, sharedF) =
let (newCells0, newSharedF0) =
unzip [(((x,y),cd), shared) | (x, y, cd, shared) <- rowCells]
newCells = M.fromList newCells0
newSharedF = M.fromList $ catMaybes newSharedF0
newRowMap = case mRP of
Just rp -> M.insert r rp rowMap
Nothing -> rowMap
in (newRowMap, cellMap <> newCells, sharedF <> newSharedF)
commentCells =
M.fromList
[ (fromSingleCellRefNoting r, def {_cellComment = Just cmnt})
| (r, cmnt) <- maybe [] CommentTable.toList commentsMap
]
cells = cells0 `M.union` commentCells
mProtection = listToMaybe $ cur $/ element (n_ "sheetProtection") >=> fromCursor
mDrawingId = listToMaybe $ cur $/ element (n_ "drawing") >=> fromAttribute (odr"id")
merges = cur $/ parseMerges
parseMerges :: Cursor -> [Range]
parseMerges = element (n_ "mergeCells") &/ element (n_ "mergeCell") >=> fromAttribute "ref"
condFormtattings = M.fromList . map unCfPair $ cur $/ element (n_ "conditionalFormatting") >=> fromCursor
validations = M.fromList . map unDvPair $
cur $/ element (n_ "dataValidations") &/ element (n_ "dataValidation") >=> fromCursor
tableIds =
cur $/ element (n_ "tableParts") &/ element (n_ "tablePart") >=>
fromAttribute (odr "id")
let mAutoFilter = listToMaybe $ cur $/ element (n_ "autoFilter") >=> fromCursor
mDrawing <- case mDrawingId of
Just dId -> do
rel <- note (InvalidRef filePath dId) $ Relationships.lookup dId sheetRels
Just <$> getDrawing ar contentTypes (relTarget rel)
Nothing ->
return Nothing
let ptType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotTable"
pTables <- forM (allByType ptType sheetRels) $ \rel -> do
let ptPath = relTarget rel
bs <- note (MissingFile ptPath) $ Zip.fromEntry <$> Zip.findEntryByPath ptPath ar
note (InconsistentXlsx $ "Bad pivot table in " <> T.pack ptPath) $
parsePivotTable (flip Prelude.lookup caches) bs
tables <- forM tableIds $ \rId -> do
fp <- lookupRelPath filePath sheetRels rId
getTable ar fp
return $
Worksheet
cws
rowProps
cells
mDrawing
merges
sheetViews
pageSetup
condFormtattings
validations
pTables
mAutoFilter
tables
mProtection
sharedFormulas
extractCellValue :: SharedStringTable -> Text -> Cursor -> [CellValue]
extractCellValue sst t cur
| t == "s" = do
si <- vConverted "shared string"
case sstItem sst si of
Just xlTxt -> return $ xlsxTextToCellValue xlTxt
Nothing -> fail "bad shared string index"
| t == "inlineStr" =
cur $/ element (n_ "is") >=> fmap xlsxTextToCellValue . fromCursor
| t == "str" = CellText <$> vConverted "string"
| t == "n" = CellDouble <$> vConverted "double"
| t == "b" = CellBool <$> vConverted "boolean"
| t == "e" = CellError <$> vConverted "error"
| otherwise = fail "bad cell value"
where
vConverted typeStr = do
vContent <- cur $/ element (n_ "v") >=> \c ->
return (T.concat $ c $/ content)
case fromAttrVal vContent of
Right (val, _) -> return $ val
_ -> fail $ "bad " ++ typeStr ++ " cell value"
xmlCursorOptional :: Zip.Archive -> FilePath -> Parser (Maybe Cursor)
xmlCursorOptional ar fname =
(Just <$> xmlCursorRequired ar fname) `catchError` missingToNothing
where
missingToNothing :: ParseError -> Either ParseError (Maybe a)
missingToNothing (MissingFile _) = return Nothing
missingToNothing other = throwError other
xmlCursorRequired :: Zip.Archive -> FilePath -> Parser Cursor
xmlCursorRequired ar fname = do
entry <- note (MissingFile fname) $ Zip.findEntryByPath fname ar
cur <- left (\ex -> InvalidFile fname (T.pack $ show ex)) $ parseLBS def (Zip.fromEntry entry)
return $ fromDocument cur
fromFileCursorDef ::
FromCursor a => Zip.Archive -> FilePath -> Text -> a -> Parser a
fromFileCursorDef x fp contentsDescr defVal = do
mCur <- xmlCursorOptional x fp
case mCur of
Just cur ->
headErr (InvalidFile fp $ "Couldn't parse " <> contentsDescr) $ fromCursor cur
Nothing -> return defVal
fromFileCursor :: FromCursor a => Zip.Archive -> FilePath -> Text -> Parser a
fromFileCursor x fp contentsDescr = do
cur <- xmlCursorRequired x fp
headErr (InvalidFile fp $ "Couldn't parse " <> contentsDescr) $ fromCursor cur
getSharedStrings :: Zip.Archive -> Parser SharedStringTable
getSharedStrings x =
fromFileCursorDef x "xl/sharedStrings.xml" "shared strings" sstEmpty
getContentTypes :: Zip.Archive -> Parser ContentTypes
getContentTypes x = fromFileCursor x "[Content_Types].xml" "content types"
getStyles :: Zip.Archive -> Styles
getStyles ar = case Zip.fromEntry <$> Zip.findEntryByPath "xl/styles.xml" ar of
Nothing -> Styles L.empty
Just xml -> Styles xml
getComments :: Zip.Archive -> Maybe FilePath -> FilePath -> Parser (Maybe CommentTable)
getComments ar drp fp = do
mCurComments <- xmlCursorOptional ar fp
mCurDr <- maybe (return Nothing) (xmlCursorOptional ar) drp
return (liftA2 hide (hidden <$> mCurDr) . listToMaybe . fromCursor =<< mCurComments)
where
hide refs (CommentTable m) = CommentTable $ foldl' hideComment m refs
hideComment m r = M.adjust (\c->c{_commentVisible = False}) r m
v nm = Name nm (Just "urn:schemas-microsoft-com:vml") Nothing
x nm = Name nm (Just "urn:schemas-microsoft-com:office:excel") Nothing
hidden :: Cursor -> [CellRef]
hidden cur = cur $/ checkElement visibleShape &/
element (x"ClientData") >=> shapeCellRef
visibleShape Element{..} = elementName == (v"shape") &&
maybe False (any ("visibility:hidden"==) . T.split (==';')) (M.lookup "style" elementAttributes)
shapeCellRef :: Cursor -> [CellRef]
shapeCellRef c = do
r0 <- c $/ element (x"Row") &/ content >=> decimal
c0 <- c $/ element (x"Column") &/ content >=> decimal
return $ singleCellRef (r0 + 1, c0 + 1)
getCustomProperties :: Zip.Archive -> Parser CustomProperties
getCustomProperties ar =
fromFileCursorDef ar "docProps/custom.xml" "custom properties" CustomProperties.empty
getDrawing :: Zip.Archive -> ContentTypes -> FilePath -> Parser Drawing
getDrawing ar contentTypes fp = do
cur <- xmlCursorRequired ar fp
drawingRels <- getRels ar fp
unresolved <- headErr (InvalidFile fp "Couldn't parse drawing") (fromCursor cur)
anchors <- forM (unresolved ^. xdrAnchors) $ resolveFileInfo drawingRels
return $ Drawing anchors
where
resolveFileInfo :: Relationships -> Anchor RefId RefId -> Parser (Anchor FileInfo ChartSpace)
resolveFileInfo rels uAnch =
case uAnch ^. anchObject of
Picture {..} -> do
let mRefId = _picBlipFill ^. bfpImageInfo
mFI <- lookupFI rels mRefId
let pic' =
Picture
{ _picMacro = _picMacro
, _picPublished = _picPublished
, _picNonVisual = _picNonVisual
, _picBlipFill = (_picBlipFill & bfpImageInfo .~ mFI)
, _picShapeProperties = _picShapeProperties
}
return uAnch {_anchObject = pic'}
Graphic nv rId tr -> do
chartPath <- lookupRelPath fp rels rId
chart <- readChart ar chartPath
return uAnch {_anchObject = Graphic nv chart tr}
lookupFI _ Nothing = return Nothing
lookupFI rels (Just rId) = do
path <- lookupRelPath fp rels rId
contentType <-
note (InvalidFile path "Missing content type") $
ContentTypes.lookup ("/" <> path) contentTypes
contents <-
Zip.fromEntry <$> note (MissingFile path) (Zip.findEntryByPath path ar)
return . Just $ FileInfo (stripMediaPrefix path) contentType contents
stripMediaPrefix :: FilePath -> FilePath
stripMediaPrefix p = fromMaybe p $ stripPrefix "xl/media/" p
readChart :: Zip.Archive -> FilePath -> Parser ChartSpace
readChart ar path = fromFileCursor ar path "chart"
readWorkbook :: Zip.Archive -> Parser ([WorksheetFile], DefinedNames, Caches, DateBase)
readWorkbook ar = do
let wbPath = "xl/workbook.xml"
cur <- xmlCursorRequired ar wbPath
wbRels <- getRels ar wbPath
let mkDefinedName :: Cursor -> [(Text, Maybe Text, Text)]
mkDefinedName c =
return
( headNote "Missing name attribute" $ attribute "name" c
, listToMaybe $ attribute "localSheetId" c
, T.concat $ c $/ content)
names =
cur $/ element (n_ "definedNames") &/ element (n_ "definedName") >=>
mkDefinedName
sheets <-
sequence $
cur $/ element (n_ "sheets") &/ element (n_ "sheet") >=>
liftA2 (worksheetFile wbPath wbRels) <$> attribute "name" <*>
fromAttribute (odr "id")
let cacheRefs =
cur $/ element (n_ "pivotCaches") &/ element (n_ "pivotCache") >=>
liftA2 (,) <$> fromAttribute "cacheId" <*> fromAttribute (odr "id")
caches <-
forM cacheRefs $ \(cacheId, rId) -> do
path <- lookupRelPath wbPath wbRels rId
bs <-
note (MissingFile path) $ Zip.fromEntry <$> Zip.findEntryByPath path ar
(sheet, ref, fields0, mRecRId) <-
note (InconsistentXlsx $ "Bad pivot table cache in " <> T.pack path) $
parseCache bs
fields <- case mRecRId of
Just recId -> do
cacheRels <- getRels ar path
recsPath <- lookupRelPath path cacheRels recId
rCur <- xmlCursorRequired ar recsPath
let recs = rCur $/ element (n_ "r") >=> \cur' ->
return $ cur' $/ anyElement >=> recordValueFromNode . node
return $ fillCacheFieldsFromRecords fields0 recs
Nothing ->
return fields0
return $ (cacheId, (sheet, ref, fields))
let dateBase = bool DateBase1900 DateBase1904 . fromMaybe False . listToMaybe $
cur $/ element (n_ "workbookPr") >=> fromAttribute "date1904"
return (sheets, DefinedNames names, caches, dateBase)
getTable :: Zip.Archive -> FilePath -> Parser Table
getTable ar fp = do
cur <- xmlCursorRequired ar fp
headErr (InvalidFile fp "Couldn't parse drawing") (fromCursor cur)
worksheetFile :: FilePath -> Relationships -> Text -> RefId -> Parser WorksheetFile
worksheetFile parentPath wbRels name rId =
WorksheetFile name <$> lookupRelPath parentPath wbRels rId
getRels :: Zip.Archive -> FilePath -> Parser Relationships
getRels ar fp = do
let (dir, file) = splitFileName fp
relsPath = dir </> "_rels" </> file <.> "rels"
c <- xmlCursorOptional ar relsPath
return $ maybe Relationships.empty (setTargetsFrom fp . headNote "Missing rels" . fromCursor) c
lookupRelPath :: FilePath
-> Relationships
-> RefId
-> Either ParseError FilePath
lookupRelPath fp rels rId =
relTarget <$> note (InvalidRef fp rId) (Relationships.lookup rId rels)