module Pdf.Toolbox.Core.Parsers.Object
(
parseObject,
parseDict,
parseArray,
parseName,
parseStr,
parseHexStr,
parseRef,
parseNumber,
parseBoolean,
parseTillStreamData,
parseIndirectObject,
isRegularChar
)
where
import Data.List
import Data.Char
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BS8
import Data.Attoparsec.ByteString (Parser)
import qualified Data.Attoparsec.ByteString.Char8 as P
#if MIN_VERSION_attoparsec(0, 12, 0)
import qualified Data.Scientific as Scientific
#endif
import Control.Applicative
import Control.Monad
import Pdf.Toolbox.Core.Object.Types
import Pdf.Toolbox.Core.Parsers.Util
parseDict :: Parser Dict
parseDict = do
_ <- P.string "<<"
dict <- many parseKey
P.skipSpace
_ <- P.string ">>"
return $ Dict dict
parseKey :: Parser (Name, Object ())
parseKey = do
P.skipSpace
key <- parseName
val <- parseObject
return (key, val)
parseArray :: Parser Array
parseArray = do
_ <- P.char '['
array <- many parseObject
P.skipSpace
_ <- P.char ']'
return $ Array array
parseNumber :: Parser Number
parseNumber = P.choice [
number,
NumReal <$> (P.signed $ read . ("0."++) . BS8.unpack <$> (P.char '.' >> P.takeWhile1 isDigit))
]
where
#if MIN_VERSION_attoparsec(0, 12, 0)
number = toNum <$> P.scientific
toNum = either NumReal NumInt . Scientific.floatingOrInteger
#else
number = toNum <$> P.number
toNum (P.I i) = NumInt $ fromIntegral i
toNum (P.D d) = NumReal d
#endif
parseStr :: Parser Str
parseStr = do
_ <- P.char '('
str <- takeStr 0 []
return $ Str $ BS8.pack str
where
takeStr :: Int -> String -> Parser String
takeStr lvl res = do
ch <- P.anyChar
case ch of
'(' -> takeStr (lvl + 1) (ch : res)
')' -> if lvl == 0
then return $ reverse res
else takeStr (lvl 1) (ch : res)
'\\' -> do
ch' <- P.anyChar
if ch' `elem` ("()\\" :: String)
then takeStr lvl (ch' : res)
else case ch' of
'r' -> takeStr lvl ('\r' : res)
'n' -> takeStr lvl ('\n' : res)
'f' -> takeStr lvl ('\f' : res)
'b' -> takeStr lvl ('\b' : res)
't' -> takeStr lvl ('\t' : res)
'\r' -> takeStr lvl res
_ -> do
ds <- take3Digits [ch']
let i = toEnum
. foldl'
(\acc (a, b) -> acc + a * charToInt b)
0
. zip [1, 8, 64]
$ ds
takeStr lvl (i : res)
_ -> takeStr lvl (ch : res)
charToInt ch = fromEnum ch 48
take3Digits ds
| length ds >= 3
= return ds
| otherwise
= do
d <- P.peekChar'
if isDigit d
then do
void P.anyChar
take3Digits (d : ds)
else
return (ds ++ repeat '0')
parseHexStr :: Parser Str
parseHexStr = do
_ <- P.char '<'
str <- many takeHex
_ <- P.char '>'
return $ Str $ BS.pack str
where
takeHex = do
ch1 <- P.satisfy isHexDigit
ch2 <- P.satisfy isHexDigit
return $ fromIntegral $ digitToInt ch1 * 16 + digitToInt ch2
parseRef :: Parser Ref
parseRef = do
obj <- P.decimal
P.skipSpace
gen <- P.decimal
P.skipSpace
_ <- P.char 'R'
return $ Ref obj gen
parseName :: Parser Name
parseName = do
_ <- P.char '/'
Name <$> P.takeWhile1 isRegularChar
isRegularChar :: Char -> Bool
isRegularChar = (`notElem` ("[]()/<>{}% \n\r" :: String))
parseBoolean :: Parser Boolean
parseBoolean = Boolean <$> P.choice [
P.string "true" >> return True,
P.string "false" >> return False
]
parseTillStreamData :: Parser ()
parseTillStreamData = do
P.skipSpace
_ <- P.string "stream"
endOfLine
parseObject :: Parser (Object ())
parseObject = do
P.skipSpace
P.choice [
const ONull <$> P.string "null",
OName <$> parseName,
OBoolean <$> parseBoolean,
ODict <$> parseDict,
OArray <$> parseArray,
OStr <$> parseStr,
OStr <$> parseHexStr,
ORef <$> parseRef,
ONumber <$> parseNumber
]
parseIndirectObject :: Parser (Ref, Object ())
parseIndirectObject = do
P.skipSpace
index <- P.decimal :: Parser Int
P.skipSpace
gen <- P.decimal :: Parser Int
P.skipSpace
_ <- P.string "obj"
P.skipSpace
obj <- parseObject
let ref = Ref index gen
case obj of
ODict d -> P.choice [
parseTillStreamData >> return (ref, OStream $ Stream d ()),
return (ref, ODict d)
]
_ -> return (ref, obj)