{-# LANGUAGE OverloadedStrings #-}

-- | This module contains parsers for pdf objects

module Pdf.Core.Parsers.Object
( -- * Parse any object
  parseObject
  -- * Parse object of specific type
, parseDict
, parseArray
, parseName
, parseString
, parseHexString
, parseRef
, parseNumber
, parseBool
  -- * Other
, parseTillStreamData
, parseIndirectObject
, isRegularChar
)
where

import Pdf.Core.Object
import qualified Pdf.Core.Name as Name
import Pdf.Core.Parsers.Util

import Data.Char
import Data.List
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BS8
import Data.Attoparsec.ByteString (Parser)
import qualified Data.Attoparsec.ByteString.Char8 as P
import Data.Scientific (Scientific)
import qualified Data.Scientific as Scientific
import qualified Data.Vector as Vector
import qualified Data.HashMap.Strict as HashMap
import Control.Applicative
import Control.Monad

-- | Parse a dictionary
parseDict :: Parser Dict
parseDict :: Parser Dict
parseDict = do
  Parser ByteString ByteString -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString ByteString -> Parser ByteString ())
-> Parser ByteString ByteString -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString ByteString
P.string ByteString
"<<"
  [(Name, Object)]
dict <- Parser ByteString (Name, Object)
-> Parser ByteString [(Name, Object)]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser ByteString (Name, Object)
parseKey
  Parser ByteString ()
P.skipSpace
  Parser ByteString ByteString -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString ByteString -> Parser ByteString ())
-> Parser ByteString ByteString -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString ByteString
P.string ByteString
">>"
  Dict -> Parser Dict
forall (m :: * -> *) a. Monad m => a -> m a
return (Dict -> Parser Dict) -> Dict -> Parser Dict
forall a b. (a -> b) -> a -> b
$ [(Name, Object)] -> Dict
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList [(Name, Object)]
dict

parseKey :: Parser (Name, Object)
parseKey :: Parser ByteString (Name, Object)
parseKey = do
  Parser ByteString ()
P.skipSpace
  Name
key <- Parser Name
parseName
  Object
val <- Parser Object
parseObject
  (Name, Object) -> Parser ByteString (Name, Object)
forall (m :: * -> *) a. Monad m => a -> m a
return (Name
key, Object
val)

-- | Parse an array
parseArray :: Parser Array
parseArray :: Parser Array
parseArray = do
  Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString Char -> Parser ByteString ())
-> Parser ByteString Char -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ Char -> Parser ByteString Char
P.char Char
'['
  [Object]
array <- Parser Object -> Parser ByteString [Object]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser Object
parseObject
  Parser ByteString ()
P.skipSpace
  Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString Char -> Parser ByteString ())
-> Parser ByteString Char -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ Char -> Parser ByteString Char
P.char Char
']'
  Array -> Parser Array
forall (m :: * -> *) a. Monad m => a -> m a
return (Array -> Parser Array) -> Array -> Parser Array
forall a b. (a -> b) -> a -> b
$ [Object] -> Array
forall a. [a] -> Vector a
Vector.fromList [Object]
array

-- | Parse number
parseNumber :: Parser Scientific
parseNumber :: Parser Scientific
parseNumber = [Parser Scientific] -> Parser Scientific
forall (f :: * -> *) a. Alternative f => [f a] -> f a
P.choice [
  Parser Scientific
P.scientific,
  Double -> Scientific
forall a. RealFloat a => a -> Scientific
Scientific.fromFloatDigits (Double -> Scientific)
-> Parser ByteString Double -> Parser Scientific
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
    (Parser ByteString Double -> Parser ByteString Double
forall a. Num a => Parser a -> Parser a
P.signed
      (Parser ByteString Double -> Parser ByteString Double)
-> Parser ByteString Double -> Parser ByteString Double
forall a b. (a -> b) -> a -> b
$ String -> Double
forall a. Read a => String -> a
read
      (String -> Double)
-> (ByteString -> String) -> ByteString -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
"0."String -> String -> String
forall a. [a] -> [a] -> [a]
++)
      (String -> String)
-> (ByteString -> String) -> ByteString -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
BS8.unpack (ByteString -> Double)
-> Parser ByteString ByteString -> Parser ByteString Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
        (Char -> Parser ByteString Char
P.char Char
'.' Parser ByteString Char
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser ByteString ByteString
P.takeWhile1 Char -> Bool
isDigit) :: Parser Double)
  ]

-- | Parse literal string
parseString :: Parser ByteString
parseString :: Parser ByteString ByteString
parseString = do
  Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString Char -> Parser ByteString ())
-> Parser ByteString Char -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ Char -> Parser ByteString Char
P.char Char
'('
  String
str <- Int -> String -> Parser String
takeStr Int
0 []
  ByteString -> Parser ByteString ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> Parser ByteString ByteString)
-> ByteString -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BS8.pack String
str
  where
  takeStr :: Int -> String -> Parser String
  takeStr :: Int -> String -> Parser String
takeStr Int
lvl String
res = do
    Char
ch <- Parser ByteString Char
P.anyChar
    case Char
ch of
      Char
'(' -> Int -> String -> Parser String
takeStr (Int
lvl Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Char
ch Char -> String -> String
forall a. a -> [a] -> [a]
: String
res)
      Char
')' -> if Int
lvl Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
               then String -> Parser String
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Parser String) -> String -> Parser String
forall a b. (a -> b) -> a -> b
$ String -> String
forall a. [a] -> [a]
reverse String
res
               else Int -> String -> Parser String
takeStr (Int
lvl Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (Char
ch Char -> String -> String
forall a. a -> [a] -> [a]
: String
res)
      Char
'\\' -> do
        Char
ch' <- Parser ByteString Char
P.anyChar
        if Char
ch' Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"()\\" :: String)
          then Int -> String -> Parser String
takeStr Int
lvl (Char
ch' Char -> String -> String
forall a. a -> [a] -> [a]
: String
res)
          else case Char
ch' of
                 Char
'r' -> Int -> String -> Parser String
takeStr Int
lvl (Char
'\r' Char -> String -> String
forall a. a -> [a] -> [a]
: String
res)
                 Char
'n' -> Int -> String -> Parser String
takeStr Int
lvl (Char
'\n' Char -> String -> String
forall a. a -> [a] -> [a]
: String
res)
                 Char
'f' -> Int -> String -> Parser String
takeStr Int
lvl (Char
'\f' Char -> String -> String
forall a. a -> [a] -> [a]
: String
res)
                 Char
'b' -> Int -> String -> Parser String
takeStr Int
lvl (Char
'\b' Char -> String -> String
forall a. a -> [a] -> [a]
: String
res)
                 Char
't' -> Int -> String -> Parser String
takeStr Int
lvl (Char
'\t' Char -> String -> String
forall a. a -> [a] -> [a]
: String
res)
                 Char
'\r' -> Int -> String -> Parser String
takeStr Int
lvl String
res
                 Char
_ -> do
                   String
ds <- String -> Parser String
take3Digits [Char
ch']
                   let i :: Char
i = Int -> Char
forall a. Enum a => Int -> a
toEnum
                         (Int -> Char) -> (String -> Int) -> String -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> (Int, Char) -> Int) -> Int -> [(Int, Char)] -> Int
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl'
                             (\Int
acc (Int
a, Char
b) -> Int
acc Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
a Int -> Int -> Int
forall a. Num a => a -> a -> a
* Char -> Int
forall a. Enum a => a -> Int
charToInt Char
b)
                             Int
0
                         ([(Int, Char)] -> Int)
-> (String -> [(Int, Char)]) -> String -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> String -> [(Int, Char)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
1, Int
8, Int
64]
                         (String -> Char) -> String -> Char
forall a b. (a -> b) -> a -> b
$ String
ds
                   Int -> String -> Parser String
takeStr Int
lvl (Char
i Char -> String -> String
forall a. a -> [a] -> [a]
: String
res)
      Char
_ -> Int -> String -> Parser String
takeStr Int
lvl (Char
ch Char -> String -> String
forall a. a -> [a] -> [a]
: String
res)
  charToInt :: a -> Int
charToInt a
ch = a -> Int
forall a. Enum a => a -> Int
fromEnum a
ch Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
48
  take3Digits :: String -> Parser String
take3Digits String
ds
    | String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
ds Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
3
    = String -> Parser String
forall (m :: * -> *) a. Monad m => a -> m a
return String
ds
    | Bool
otherwise
    = do
      Char
d <- Parser ByteString Char
P.peekChar'
      if Char -> Bool
isDigit Char
d
        then do
          Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void Parser ByteString Char
P.anyChar
          String -> Parser String
take3Digits (Char
d Char -> String -> String
forall a. a -> [a] -> [a]
: String
ds)
        else
          String -> Parser String
forall (m :: * -> *) a. Monad m => a -> m a
return (String
ds String -> String -> String
forall a. [a] -> [a] -> [a]
++ Char -> String
forall a. a -> [a]
repeat Char
'0')

-- | Parse hex string
parseHexString :: Parser ByteString
parseHexString :: Parser ByteString ByteString
parseHexString = do
  Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString Char -> Parser ByteString ())
-> Parser ByteString Char -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ Char -> Parser ByteString Char
P.char Char
'<'
  [Word8]
str <- Parser ByteString Word8 -> Parser ByteString [Word8]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser ByteString Word8
takeHex
  Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString Char -> Parser ByteString ())
-> Parser ByteString Char -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ Char -> Parser ByteString Char
P.char Char
'>'
  ByteString -> Parser ByteString ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> Parser ByteString ByteString)
-> ByteString -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ [Word8] -> ByteString
BS.pack [Word8]
str
  where
  takeHex :: Parser ByteString Word8
takeHex = do
    Char
ch1 <- (Char -> Bool) -> Parser ByteString Char
P.satisfy Char -> Bool
isHexDigit
    Char
ch2 <- (Char -> Bool) -> Parser ByteString Char
P.satisfy Char -> Bool
isHexDigit
    Word8 -> Parser ByteString Word8
forall (m :: * -> *) a. Monad m => a -> m a
return (Word8 -> Parser ByteString Word8)
-> Word8 -> Parser ByteString Word8
forall a b. (a -> b) -> a -> b
$ Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Char -> Int
digitToInt Char
ch1 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
16 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Char -> Int
digitToInt Char
ch2

-- | Parse a reference
parseRef :: Parser Ref
parseRef :: Parser Ref
parseRef = do
  Int
obj <- Parser Int
forall a. Integral a => Parser a
P.decimal
  Parser ByteString ()
P.skipSpace
  Int
gen <- Parser Int
forall a. Integral a => Parser a
P.decimal
  Parser ByteString ()
P.skipSpace
  Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString Char -> Parser ByteString ())
-> Parser ByteString Char -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ Char -> Parser ByteString Char
P.char Char
'R'
  Ref -> Parser Ref
forall (m :: * -> *) a. Monad m => a -> m a
return (Ref -> Parser Ref) -> Ref -> Parser Ref
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Ref
R Int
obj Int
gen

-- | Parse a name
parseName :: Parser Name
parseName :: Parser Name
parseName = do
  Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString Char -> Parser ByteString ())
-> Parser ByteString Char -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ Char -> Parser ByteString Char
P.char Char
'/'
  -- XXX: escaping
  ByteString
bs <- (Char -> Bool) -> Parser ByteString ByteString
P.takeWhile1 Char -> Bool
isRegularChar
  (String -> Parser Name)
-> (Name -> Parser Name) -> Either String Name -> Parser Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Parser Name
forall (m :: * -> *) a. MonadFail m => String -> m a
fail Name -> Parser Name
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String Name -> Parser Name)
-> Either String Name -> Parser Name
forall a b. (a -> b) -> a -> b
$
    ByteString -> Either String Name
Name.make ByteString
bs

-- | Whether the character can appear in 'Name'
isRegularChar :: Char -> Bool
isRegularChar :: Char -> Bool
isRegularChar = (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` (String
"[]()/<>{}% \n\r" :: String))

-- | Parse bool value
parseBool :: Parser Bool
parseBool :: Parser Bool
parseBool = [Parser Bool] -> Parser Bool
forall (f :: * -> *) a. Alternative f => [f a] -> f a
P.choice [
  ByteString -> Parser ByteString ByteString
P.string ByteString
"true" Parser ByteString ByteString -> Parser Bool -> Parser Bool
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Bool -> Parser Bool
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
True,
  ByteString -> Parser ByteString ByteString
P.string ByteString
"false" Parser ByteString ByteString -> Parser Bool -> Parser Bool
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Bool -> Parser Bool
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
False
  ]

-- | Consumes input till stream's data
--
-- Use 'parseDict' then 'parseTillStreamData'
-- to determine whether the object is dictionary or stream.
-- If 'parseTillStreamData' fails, then it is a dictionary.
-- Otherwise it is stream, and current position in input data
-- will point to stream's data start
--
-- >>> parse (parseDict >>= \dict -> parseTillStreamData >> return dict) "<</Key 123>>\nstream\n1234\nendstream"
-- Done "1234\nendstream" Dict [(Name "Key",ONumber (NumInt 123))]
parseTillStreamData :: Parser ()
parseTillStreamData :: Parser ByteString ()
parseTillStreamData = do
  Parser ByteString ()
P.skipSpace
  Parser ByteString ByteString -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString ByteString -> Parser ByteString ())
-> Parser ByteString ByteString -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString ByteString
P.string ByteString
"stream"
  Parser ByteString ()
endOfLine

-- | Parse any 'Object' except 'Stream'
-- because for 'Stream' we need offset of data in file
--
-- >>> parseOnly parseObject "/Name"
-- Right (OName (Name "Name"))
parseObject :: Parser Object
parseObject :: Parser Object
parseObject = do
  Parser ByteString ()
P.skipSpace
  [Parser Object] -> Parser Object
forall (f :: * -> *) a. Alternative f => [f a] -> f a
P.choice [
    Object -> ByteString -> Object
forall a b. a -> b -> a
const Object
Null (ByteString -> Object)
-> Parser ByteString ByteString -> Parser Object
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> Parser ByteString ByteString
P.string ByteString
"null",
    Name -> Object
Name (Name -> Object) -> Parser Name -> Parser Object
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Name
parseName,
    Bool -> Object
Bool (Bool -> Object) -> Parser Bool -> Parser Object
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bool
parseBool,
    Dict -> Object
Dict (Dict -> Object) -> Parser Dict -> Parser Object
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Dict
parseDict,
    Array -> Object
Array (Array -> Object) -> Parser Array -> Parser Object
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Array
parseArray,
    ByteString -> Object
String (ByteString -> Object)
-> Parser ByteString ByteString -> Parser Object
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString ByteString
parseString,
    ByteString -> Object
String (ByteString -> Object)
-> Parser ByteString ByteString -> Parser Object
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString ByteString
parseHexString,
    Ref -> Object
Ref (Ref -> Object) -> Parser Ref -> Parser Object
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Ref
parseRef,
    Scientific -> Object
Number (Scientific -> Object) -> Parser Scientific -> Parser Object
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Scientific
parseNumber
    ]

-- | Parse object. Input position should point
-- to offset defined in XRef
--
-- >>> parseOnly parseIndirectObject "1 2 obj\n12"
-- Right (Ref 1 2,ONumber (NumInt 12))
parseIndirectObject :: Parser (Ref, Object)
parseIndirectObject :: Parser (Ref, Object)
parseIndirectObject = do
  Parser ByteString ()
P.skipSpace
  Int
index <- Parser Int
forall a. Integral a => Parser a
P.decimal :: Parser Int
  Parser ByteString ()
P.skipSpace
  Int
gen <- Parser Int
forall a. Integral a => Parser a
P.decimal :: Parser Int
  Parser ByteString ()
P.skipSpace
  Parser ByteString ByteString -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString ByteString -> Parser ByteString ())
-> Parser ByteString ByteString -> Parser ByteString ()
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString ByteString
P.string ByteString
"obj"
  Parser ByteString ()
P.skipSpace
  Object
obj <- Parser Object
parseObject
  let ref :: Ref
ref = Int -> Int -> Ref
R Int
index Int
gen
  case Object
obj of
    Dict Dict
d -> [Parser (Ref, Object)] -> Parser (Ref, Object)
forall (f :: * -> *) a. Alternative f => [f a] -> f a
P.choice [
      Parser ByteString ()
parseTillStreamData Parser ByteString ()
-> Parser (Ref, Object) -> Parser (Ref, Object)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Ref, Object) -> Parser (Ref, Object)
forall (m :: * -> *) a. Monad m => a -> m a
return (Ref
ref, Stream -> Object
Stream (Dict -> Int64 -> Stream
S Dict
d Int64
0)),
      (Ref, Object) -> Parser (Ref, Object)
forall (m :: * -> *) a. Monad m => a -> m a
return (Ref
ref, Dict -> Object
Dict Dict
d)
      ]
    Object
_ -> (Ref, Object) -> Parser (Ref, Object)
forall (m :: * -> *) a. Monad m => a -> m a
return (Ref
ref, Object
obj)