{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE DeriveGeneric #-} module Codec.Xlsx.Types.Common ( CellRef(..) , singleCellRef , fromSingleCellRef , fromSingleCellRefNoting , Range , mkRange , fromRange , SqRef(..) , XlsxText(..) , xlsxTextToCellValue , Formula(..) , CellValue(..) , ErrorType(..) , DateBase(..) , dateFromNumber , dateToNumber , int2col , col2int ) where import GHC.Generics (Generic) import Control.Arrow import Control.DeepSeq (NFData) import Control.Monad (forM, guard) import qualified Data.ByteString as BS import Data.Char import Data.Ix (inRange) import qualified Data.Map as Map import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Encoding as T import Data.Time.Calendar (Day, addDays, diffDays, fromGregorian) import Data.Time.Clock (UTCTime(UTCTime), picosecondsToDiffTime) import Safe import Text.XML import Text.XML.Cursor import Codec.Xlsx.Parser.Internal import Codec.Xlsx.Types.RichText import Codec.Xlsx.Writer.Internal -- | convert column number (starting from 1) to its textual form (e.g. 3 -> \"C\") int2col :: Int -> Text int2col = T.pack . reverse . map int2let . base26 where int2let 0 = 'Z' int2let x = chr $ (x - 1) + ord 'A' base26 0 = [] base26 i = let i' = (i `mod` 26) i'' = if i' == 0 then 26 else i' in seq i' (i' : base26 ((i - i'') `div` 26)) -- | reverse to 'int2col' col2int :: Text -> Int col2int = T.foldl' (\i c -> i * 26 + let2int c) 0 where let2int c = 1 + ord c - ord 'A' -- | Excel cell or cell range reference (e.g. @E3@) -- See 18.18.62 @ST_Ref@ (p. 2482) newtype CellRef = CellRef { unCellRef :: Text } deriving (Eq, Ord, Show, Generic) instance NFData CellRef -- | Render position in @(row, col)@ format to an Excel reference. -- -- > mkCellRef (2, 4) == "D2" singleCellRef :: (Int, Int) -> CellRef singleCellRef = CellRef . singleCellRefRaw singleCellRefRaw :: (Int, Int) -> Text singleCellRefRaw (row, col) = T.concat [int2col col, T.pack (show row)] -- | reverse to 'mkCellRef' fromSingleCellRef :: CellRef -> Maybe (Int, Int) fromSingleCellRef = fromSingleCellRefRaw . unCellRef fromSingleCellRefRaw :: Text -> Maybe (Int, Int) fromSingleCellRefRaw t = do let (colT, rowT) = T.span (inRange ('A', 'Z')) t guard $ not (T.null colT) && not (T.null rowT) && T.all isDigit rowT row <- decimal rowT return (row, col2int colT) -- | reverse to 'mkCellRef' expecting valid reference and failig with -- a standard error message like /"Bad cell reference 'XXX'"/ fromSingleCellRefNoting :: CellRef -> (Int, Int) fromSingleCellRefNoting ref = fromJustNote errMsg $ fromSingleCellRefRaw txt where txt = unCellRef ref errMsg = "Bad cell reference '" ++ T.unpack txt ++ "'" -- | Excel range (e.g. @D13:H14@), actually store as as 'CellRef' in -- xlsx type Range = CellRef -- | Render range -- -- > mkRange (2, 4) (6, 8) == "D2:H6" mkRange :: (Int, Int) -> (Int, Int) -> Range mkRange fr to = CellRef $ T.concat [singleCellRefRaw fr, T.pack ":", singleCellRefRaw to] -- | reverse to 'mkRange' fromRange :: Range -> Maybe ((Int, Int), (Int, Int)) fromRange r = case T.split (== ':') (unCellRef r) of [from, to] -> (,) <$> fromSingleCellRefRaw from <*> fromSingleCellRefRaw to _ -> Nothing -- | A sequence of cell references -- -- See 18.18.76 "ST_Sqref (Reference Sequence)" (p.2488) newtype SqRef = SqRef [CellRef] deriving (Eq, Ord, Show, Generic) instance NFData SqRef -- | Common type containing either simple string or rich formatted text. -- Used in @si@, @comment@ and @is@ elements -- -- E.g. @si@ spec says: "If the string is just a simple string with formatting applied -- at the cell level, then the String Item (si) should contain a single text -- element used to express the string. However, if the string in the cell is -- more complex - i.e., has formatting applied at the character level - then the -- string item shall consist of multiple rich text runs which collectively are -- used to express the string.". So we have either a single "Text" field, or -- else a list of "RichTextRun"s, each of which is some "Text" with layout -- properties. -- -- TODO: Currently we do not support @phoneticPr@ (Phonetic Properties, 18.4.3, -- p. 1723) or @rPh@ (Phonetic Run, 18.4.6, p. 1725). -- -- Section 18.4.8, "si (String Item)" (p. 1725) -- -- See @CT_Rst@, p. 3903 data XlsxText = XlsxText Text | XlsxRichText [RichTextRun] deriving (Eq, Ord, Show, Generic) instance NFData XlsxText xlsxTextToCellValue :: XlsxText -> CellValue xlsxTextToCellValue (XlsxText txt) = CellText txt xlsxTextToCellValue (XlsxRichText rich) = CellRich rich -- | A formula -- -- See 18.18.35 "ST_Formula (Formula)" (p. 2457) newtype Formula = Formula {unFormula :: Text} deriving (Eq, Ord, Show, Generic) instance NFData Formula -- | Cell values include text, numbers and booleans, -- standard includes date format also but actually dates -- are represented by numbers with a date format assigned -- to a cell containing it data CellValue = CellText Text | CellDouble Double | CellBool Bool | CellRich [RichTextRun] | CellError ErrorType deriving (Eq, Ord, Show, Generic) instance NFData CellValue -- | The evaluation of an expression can result in an error having one -- of a number of error values. -- -- See Annex L, L.2.16.8 "Error values" (p. 4764) data ErrorType = ErrorDiv0 -- ^ @#DIV/0!@ - Intended to indicate when any number, including -- zero, is divided by zero. | ErrorNA -- ^ @#N/A@ - Intended to indicate when a designated value is not -- available. For example, some functions, such as @SUMX2MY2@, -- perform a series of operations on corresponding elements in two -- arrays. If those arrays do not have the same number of elements, -- then for some elements in the longer array, there are no -- corresponding elements in the shorter one; that is, one or more -- values in the shorter array are not available. This error value -- can be produced by calling the function @NA@. | ErrorName -- ^ @#NAME?@ - Intended to indicate when what looks like a name is -- used, but no such name has been defined. For example, @XYZ/3@, -- where @XYZ@ is not a defined name. @Total is & A10@, where -- neither @Total@ nor @is@ is a defined name. Presumably, @"Total -- is " & A10@ was intended. @SUM(A1C10)@, where the range @A1:C10@ -- was intended. | ErrorNull -- ^ @#NULL!@ - Intended to indicate when two areas are required to -- intersect, but do not. For example, In the case of @SUM(B1 C1)@, -- the space between @B1@ and @C1@ is treated as the binary -- intersection operator, when a comma was intended. | ErrorNum -- ^ @#NUM!@ - Intended to indicate when an argument to a function -- has a compatible type, but has a value that is outside the domain -- over which that function is defined. (This is known as a domain -- error.) For example, Certain calls to @ASIN@, @ATANH@, @FACT@, -- and @SQRT@ might result in domain errors. Intended to indicate -- that the result of a function cannot be represented in a value of -- the specified type, typically due to extreme magnitude. (This is -- known as a range error.) For example, @FACT(1000)@ might result -- in a range error. | ErrorRef -- ^ @#REF!@ - Intended to indicate when a cell reference is -- invalid. For example, If a formula contains a reference to a -- cell, and then the row or column containing that cell is deleted, -- a @#REF!@ error results. If a worksheet does not support 20,001 -- columns, @OFFSET(A1,0,20000)@ results in a @#REF!@ error. | ErrorValue -- ^ @#VALUE!@ - Intended to indicate when an incompatible type -- argument is passed to a function, or an incompatible type operand -- is used with an operator. For example, In the case of a function -- argument, a number was expected, but text was provided. In the -- case of @1+"ABC"@, the binary addition operator is not defined for -- text. deriving (Eq, Ord, Show, Generic) instance NFData ErrorType -- | Specifies date base used for conversion of serial values to and -- from datetime values -- -- See Annex L, L.2.16.9.1 "Date Conversion for Serial Values" (p. 4765) data DateBase = DateBase1900 -- ^ 1900 date base system, the lower limit is January 1, -9999 -- 00:00:00, which has serial value -4346018. The upper-limit is -- December 31, 9999, 23:59:59, which has serial value -- 2,958,465.9999884. The base date for this date base system is -- December 30, 1899, which has a serial value of 0. | DateBase1904 -- ^ 1904 backward compatibility date-base system, the lower limit -- is January 1, 1904, 00:00:00, which has serial value 0. The upper -- limit is December 31, 9999, 23:59:59, which has serial value -- 2,957,003.9999884. The base date for this date base system is -- January 1, 1904, which has a serial value of 0. deriving (Eq, Show, Generic) instance NFData DateBase baseDate :: DateBase -> Day baseDate DateBase1900 = fromGregorian 1899 12 30 baseDate DateBase1904 = fromGregorian 1904 1 1 -- | Convertts serial value into datetime according to the specified -- date base -- -- > show (dateFromNumber DateBase1900 42929.75) == "2017-07-13 18:00:00 UTC" dateFromNumber :: RealFrac t => DateBase -> t -> UTCTime dateFromNumber b d = UTCTime day diffTime where (numberOfDays, fractionOfOneDay) = properFraction d day = addDays numberOfDays $ baseDate b diffTime = picosecondsToDiffTime (round (fractionOfOneDay * 24*60*60*1E12)) -- | Converts datetime into serial value dateToNumber :: Fractional a => DateBase -> UTCTime -> a dateToNumber b (UTCTime day diffTime) = numberOfDays + fractionOfOneDay where numberOfDays = fromIntegral (diffDays day $ baseDate b) fractionOfOneDay = realToFrac diffTime / (24 * 60 * 60) {------------------------------------------------------------------------------- Parsing -------------------------------------------------------------------------------} -- | See @CT_Rst@, p. 3903 instance FromCursor XlsxText where fromCursor cur = do let ts = cur $/ element (n_ "t") >=> contentOrEmpty rs = cur $/ element (n_ "r") >=> fromCursor case (ts,rs) of ([t], []) -> return $ XlsxText t ([], _:_) -> return $ XlsxRichText rs _ -> fail "invalid item" instance FromXenoNode XlsxText where fromXenoNode root = do (mCh, rs) <- collectChildren root $ (,) <$> maybeChild "t" <*> fromChildList "r" mT <- mapM contentX mCh case mT of Just t -> return $ XlsxText t Nothing -> case rs of [] -> Left $ "missing rich text subelements" _ -> return $ XlsxRichText rs instance FromAttrVal CellRef where fromAttrVal = fmap (first CellRef) . fromAttrVal instance FromAttrBs CellRef where -- we presume that cell references contain only latin letters, -- numbers and colon fromAttrBs = pure . CellRef . T.decodeLatin1 instance FromAttrVal SqRef where fromAttrVal t = do rs <- mapM (fmap fst . fromAttrVal) $ T.split (== ' ') t readSuccess $ SqRef rs instance FromAttrBs SqRef where fromAttrBs bs = do -- split on space rs <- forM (BS.split 32 bs) fromAttrBs return $ SqRef rs -- | See @ST_Formula@, p. 3873 instance FromCursor Formula where fromCursor cur = [Formula . T.concat $ cur $/ content] instance FromXenoNode Formula where fromXenoNode = fmap Formula . contentX instance FromAttrVal Formula where fromAttrVal t = readSuccess $ Formula t instance FromAttrBs Formula where fromAttrBs = fmap Formula . fromAttrBs instance FromAttrVal ErrorType where fromAttrVal "#DIV/0!" = readSuccess ErrorDiv0 fromAttrVal "#N/A" = readSuccess ErrorNA fromAttrVal "#NAME?" = readSuccess ErrorName fromAttrVal "#NULL!" = readSuccess ErrorNull fromAttrVal "#NUM!" = readSuccess ErrorNum fromAttrVal "#REF!" = readSuccess ErrorRef fromAttrVal "#VALUE!" = readSuccess ErrorValue fromAttrVal t = invalidText "ErrorType" t instance FromAttrBs ErrorType where fromAttrBs "#DIV/0!" = return ErrorDiv0 fromAttrBs "#N/A" = return ErrorNA fromAttrBs "#NAME?" = return ErrorName fromAttrBs "#NULL!" = return ErrorNull fromAttrBs "#NUM!" = return ErrorNum fromAttrBs "#REF!" = return ErrorRef fromAttrBs "#VALUE!" = return ErrorValue fromAttrBs x = unexpectedAttrBs "ErrorType" x {------------------------------------------------------------------------------- Rendering -------------------------------------------------------------------------------} -- | See @CT_Rst@, p. 3903 instance ToElement XlsxText where toElement nm si = Element { elementName = nm , elementAttributes = Map.empty , elementNodes = map NodeElement $ case si of XlsxText text -> [elementContent "t" text] XlsxRichText rich -> map (toElement "r") rich } instance ToAttrVal CellRef where toAttrVal = toAttrVal . unCellRef -- See 18.18.76, "ST_Sqref (Reference Sequence)", p. 2488. instance ToAttrVal SqRef where toAttrVal (SqRef refs) = T.intercalate " " $ map toAttrVal refs -- | See @ST_Formula@, p. 3873 instance ToElement Formula where toElement nm (Formula txt) = elementContent nm txt instance ToAttrVal ErrorType where toAttrVal ErrorDiv0 = "#DIV/0!" toAttrVal ErrorNA = "#N/A" toAttrVal ErrorName = "#NAME?" toAttrVal ErrorNull = "#NULL!" toAttrVal ErrorNum = "#NUM!" toAttrVal ErrorRef = "#REF!" toAttrVal ErrorValue = "#VALUE!"