{-# LANGUAGE OverloadedStrings #-}

{-|
Module      : PDF.Object
Description : Function to parse objects in a PDF file
Copyright   : (c) Keiichiro Shikano, 2016
License     : MIT
Maintainer  : k16.shikano@gmail.com

Functions to parsea and show objects in a PDF file. 
It provides a basic way to find information from a PDF file.
-}

module PDF.Object
  ( parsePdfLetters
  , parsePDFObj
  , parseRefsArray
  , pdfObj
  , pdfletters
  , pdfarray
  , pdfdictionary
  , xref
  ,
  ) where

import Data.Char (chr)
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BSL
import qualified Data.Text as T
import Data.Text.Encoding (decodeUtf16BEWith)
import Data.Text.Encoding.Error (strictDecode)
import Numeric (readOct, readHex)
import Data.ByteString.Builder (toLazyByteString, word16BE)

import Data.Attoparsec.ByteString.Char8 hiding (take)
import Data.Attoparsec.Combinator
import Control.Applicative

import Debug.Trace

import PDF.Definition

spaces :: Parser ByteString ()
spaces = Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany (Parser ByteString Char
comment Parser ByteString Char
-> Parser ByteString Char -> Parser ByteString Char
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> String -> Parser ByteString Char
oneOf String
pdfspaces) --skipSpace
oneOf :: String -> Parser ByteString Char
oneOf = (Char -> Bool) -> Parser ByteString Char
satisfy ((Char -> Bool) -> Parser ByteString Char)
-> (String -> Char -> Bool) -> String -> Parser ByteString Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Char -> Bool
inClass
noneOf :: String -> Parser ByteString Char
noneOf = (Char -> Bool) -> Parser ByteString Char
satisfy ((Char -> Bool) -> Parser ByteString Char)
-> (String -> Char -> Bool) -> String -> Parser ByteString Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Char -> Bool
notInClass

-- parse pdf objects

pdfObj :: Parser PDFBS
pdfObj :: Parser PDFBS
pdfObj = do
  Parser ByteString ()
spaces -- skipMany (comment <|> oneOf pdfspaces)
  String
objn <- Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser ByteString Char
digit Parser ByteString String
-> Parser ByteString ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Parser ByteString ()
spaces Parser ByteString ()
-> Parser ByteString Char -> Parser ByteString Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> String -> Parser ByteString Char
oneOf String
"0123456789" Parser ByteString Char
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ByteString -> Parser ByteString ByteString
string ByteString
" obj")
  String
object <- Parser ByteString Char
-> Parser ByteString ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser ByteString Char
anyChar (Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (Parser ByteString ByteString -> Parser ByteString ByteString)
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString ByteString
string ByteString
"endobj")
  Parser ByteString ()
spaces
  Parser ByteString String -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany Parser ByteString String
xref
  Parser ByteString String -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany Parser ByteString String
startxref
  PDFBS -> Parser PDFBS
forall (m :: * -> *) a. Monad m => a -> m a
return (PDFBS -> Parser PDFBS) -> PDFBS -> Parser PDFBS
forall a b. (a -> b) -> a -> b
$ (String -> Int
forall a. Read a => String -> a
read String
objn, String -> ByteString
BS.pack String
object)

pdfspaces :: [Char]
pdfspaces :: String
pdfspaces = (Int -> Char) -> [Int] -> String
forall a b. (a -> b) -> [a] -> [b]
map Int -> Char
chr [Int
0, Int
9, Int
10, Int
12, Int
13, Int
32]

parsePDFObj :: PDFBS -> PDFObj
parsePDFObj :: PDFBS -> PDFObj
parsePDFObj (Int
n,ByteString
pdfobject) = case Parser [Obj] -> ByteString -> Either String [Obj]
forall a. Parser a -> ByteString -> Either String a
parseOnly (Parser ByteString ()
spaces Parser ByteString () -> Parser [Obj] -> Parser [Obj]
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Obj -> Parser [Obj]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 (Parser ByteString Obj -> Parser ByteString Obj
forall i a. Parser i a -> Parser i a
try Parser ByteString Obj
pdfobj Parser ByteString Obj
-> Parser ByteString Obj -> Parser ByteString Obj
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ByteString Obj -> Parser ByteString Obj
forall i a. Parser i a -> Parser i a
try Parser ByteString Obj
objother)) ByteString
pdfobject of
  Left  String
err -> (Int
n,[Obj
PdfNull])
  Right [Obj]
obj -> (Int
n,[Obj]
obj)

comment :: Parser Char
comment :: Parser ByteString Char
comment = do
  Char -> Parser ByteString Char
char Char
'%'
  String -> Parser ByteString Char
noneOf String
"%"
  Parser ByteString Char
-> Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser ByteString Char
anyChar (Parser ByteString Char -> Parser ByteString String)
-> Parser ByteString Char -> Parser ByteString String
forall a b. (a -> b) -> a -> b
$ String -> Parser ByteString Char
oneOf String
"\r\n"
  Char -> Parser ByteString Char
forall (m :: * -> *) a. Monad m => a -> m a
return Char
' '

xref :: Parser String
xref :: Parser ByteString String
xref = do
  Parser ByteString ()
spaces
  ByteString -> Parser ByteString ByteString
string ByteString
"xref"
  Parser ByteString ()
spaces
  String
ref <- Parser ByteString Char
-> Parser ByteString ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser ByteString Char
anyChar (Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (Parser ByteString ByteString -> Parser ByteString ByteString)
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString ByteString
string ByteString
"%%EOF")
  Parser ByteString ()
spaces
  String -> Parser ByteString String
forall (m :: * -> *) a. Monad m => a -> m a
return String
""

startxref :: Parser String
startxref :: Parser ByteString String
startxref = do
  Parser ByteString ()
spaces
  ByteString -> Parser ByteString ByteString
string ByteString
"startxref"
  Parser ByteString ()
spaces
  String
ref <- Parser ByteString Char
-> Parser ByteString ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser ByteString Char
anyChar (Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (Parser ByteString ByteString -> Parser ByteString ByteString)
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString ByteString
string ByteString
"%%EOF")
  Parser ByteString ()
spaces
  String -> Parser ByteString String
forall (m :: * -> *) a. Monad m => a -> m a
return String
""
  
stream :: Parser PDFStream
stream :: Parser PDFStream
stream = do
  ByteString -> Parser ByteString ByteString
string ByteString
"stream"
  Parser ByteString ()
spaces
  PDFStream
stm <- String -> PDFStream
BSL.pack (String -> PDFStream)
-> Parser ByteString String -> Parser PDFStream
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Char
-> Parser ByteString ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser ByteString Char
anyChar (Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (Parser ByteString ByteString -> Parser ByteString ByteString)
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString ByteString
string ByteString
"endstream")
  PDFStream -> Parser PDFStream
forall (m :: * -> *) a. Monad m => a -> m a
return PDFStream
stm

pdfdictionary :: Parser Obj
pdfdictionary :: Parser ByteString Obj
pdfdictionary = Dict -> Obj
PdfDict (Dict -> Obj) -> Parser ByteString Dict -> Parser ByteString Obj
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser ByteString ()
spaces Parser ByteString ()
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ByteString -> Parser ByteString ByteString
string ByteString
"<<" Parser ByteString ByteString
-> Parser ByteString Dict -> Parser ByteString Dict
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString ()
spaces Parser ByteString ()
-> Parser ByteString Dict -> Parser ByteString Dict
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString (Obj, Obj)
-> Parser ByteString ByteString -> Parser ByteString Dict
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser ByteString (Obj, Obj)
dictEntry (Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (Parser ByteString ByteString -> Parser ByteString ByteString)
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ Parser ByteString ()
spaces Parser ByteString ()
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ByteString -> Parser ByteString ByteString
string ByteString
">>"))

dictEntry :: Parser (Obj, Obj)
dictEntry :: Parser ByteString (Obj, Obj)
dictEntry = (,) (Obj -> Obj -> (Obj, Obj))
-> Parser ByteString Obj -> Parser ByteString (Obj -> (Obj, Obj))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Obj
pdfname Parser ByteString (Obj -> (Obj, Obj))
-> Parser ByteString Obj -> Parser ByteString (Obj, Obj)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString Obj
pdfobj

pdfarray :: Parser Obj
pdfarray :: Parser ByteString Obj
pdfarray = [Obj] -> Obj
PdfArray ([Obj] -> Obj) -> Parser [Obj] -> Parser ByteString Obj
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ByteString -> Parser ByteString ByteString
string ByteString
"[" Parser ByteString ByteString -> Parser [Obj] -> Parser [Obj]
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString ()
spaces Parser ByteString () -> Parser [Obj] -> Parser [Obj]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString Obj
-> Parser ByteString ByteString -> Parser [Obj]
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser ByteString Obj
pdfobj (Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (Parser ByteString ByteString -> Parser ByteString ByteString)
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ Parser ByteString ()
spaces Parser ByteString ()
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ByteString -> Parser ByteString ByteString
string ByteString
"]"))

pdfname :: Parser Obj
pdfname :: Parser ByteString Obj
pdfname = String -> Obj
PdfName (String -> Obj) -> (ByteString -> String) -> ByteString -> Obj
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
BS.unpack (ByteString -> Obj)
-> Parser ByteString ByteString -> Parser ByteString Obj
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ByteString -> ByteString -> ByteString
BS.append (ByteString -> ByteString -> ByteString)
-> Parser ByteString ByteString
-> Parser ByteString (ByteString -> ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> Parser ByteString ByteString
string ByteString
"/" Parser ByteString (ByteString -> ByteString)
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (String -> ByteString
BS.pack (String -> ByteString)
-> Parser ByteString String -> Parser ByteString ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Char
-> Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser ByteString Char
anyChar (Parser ByteString Char -> Parser ByteString Char
forall i a. Parser i a -> Parser i a
try (Parser ByteString Char -> Parser ByteString Char)
-> Parser ByteString Char -> Parser ByteString Char
forall a b. (a -> b) -> a -> b
$ Parser ByteString Char -> Parser ByteString Char
forall i a. Parser i a -> Parser i a
lookAhead (Parser ByteString Char -> Parser ByteString Char)
-> Parser ByteString Char -> Parser ByteString Char
forall a b. (a -> b) -> a -> b
$ String -> Parser ByteString Char
oneOf String
"><][)( \n\r/"))) Parser ByteString Obj
-> Parser ByteString () -> Parser ByteString Obj
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
spaces

pdfletters :: Parser Obj
pdfletters :: Parser ByteString Obj
pdfletters = String -> Obj
PdfText (String -> Obj)
-> Parser ByteString String -> Parser ByteString Obj
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString String
parsePdfLetters

parsePdfLetters :: Parser String
parsePdfLetters :: Parser ByteString String
parsePdfLetters = [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([String] -> String)
-> Parser ByteString [String] -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Parser ByteString Char
char Char
'(' Parser ByteString Char
-> Parser ByteString [String] -> Parser ByteString [String]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*>
                               Parser ByteString String
-> Parser ByteString Char -> Parser ByteString [String]
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill ([Parser ByteString String] -> Parser ByteString String
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [ Parser ByteString String -> Parser ByteString String
forall i a. Parser i a -> Parser i a
try Parser ByteString String
pdfutf
                                                , Parser ByteString String -> Parser ByteString String
forall i a. Parser i a -> Parser i a
try Parser ByteString String
pdfoctutf
                                                , Parser ByteString String
pdfletter])
                               (Parser ByteString Char -> Parser ByteString Char
forall i a. Parser i a -> Parser i a
try (Parser ByteString Char -> Parser ByteString Char)
-> Parser ByteString Char -> Parser ByteString Char
forall a b. (a -> b) -> a -> b
$ Char -> Parser ByteString Char
char Char
')'))
  where pdfletter :: Parser ByteString String
pdfletter =
          [Parser ByteString String] -> Parser ByteString String
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [ String
"(" String -> Parser ByteString ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (ByteString -> Parser ByteString ByteString
string ByteString
"\\(")
                 , String
")" String -> Parser ByteString ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (ByteString -> Parser ByteString ByteString
string ByteString
"\\)")
                 , String
"\\" String -> Parser ByteString ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (ByteString -> Parser ByteString ByteString
string ByteString
"\\\\")
                 , String
"\n" String -> Parser ByteString ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (ByteString -> Parser ByteString ByteString
string ByteString
"\\n")
                 , String
"\r" String -> Parser ByteString ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (ByteString -> Parser ByteString ByteString
string ByteString
"\\r")
                 , String
"\t" String -> Parser ByteString ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (ByteString -> Parser ByteString ByteString
string ByteString
"\\t")
                 , String
"\b" String -> Parser ByteString ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (ByteString -> Parser ByteString ByteString
string ByteString
"\\b")
                 , String
"\f" String -> Parser ByteString ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (ByteString -> Parser ByteString ByteString
string ByteString
"\\f")
                 , String -> String
octal (String -> String)
-> Parser ByteString String -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString String -> Parser ByteString String
forall i a. Parser i a -> Parser i a
try (Char -> Parser ByteString Char
char Char
'\\' Parser ByteString Char
-> Parser ByteString String -> Parser ByteString String
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> Parser ByteString Char -> Parser ByteString String
forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
3 (String -> Parser ByteString Char
oneOf String
"01234567"))
                 , String -> String
octal (String -> String)
-> Parser ByteString String -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString String -> Parser ByteString String
forall i a. Parser i a -> Parser i a
try (Char -> Parser ByteString Char
char Char
'\\' Parser ByteString Char
-> Parser ByteString String -> Parser ByteString String
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> Parser ByteString Char -> Parser ByteString String
forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
2 (String -> Parser ByteString Char
oneOf String
"01234567"))
                 , String -> String
octal (String -> String) -> (Char -> String) -> Char -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> String -> String
forall a. a -> [a] -> [a]
:[]) (Char -> String)
-> Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Char -> Parser ByteString Char
forall i a. Parser i a -> Parser i a
try (Char -> Parser ByteString Char
char Char
'\\' Parser ByteString Char
-> Parser ByteString Char -> Parser ByteString Char
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> String -> Parser ByteString Char
oneOf String
"01234567")
                 , Char -> String
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> String)
-> Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Char -> Parser ByteString Char
forall i a. Parser i a -> Parser i a
try (String -> Parser ByteString Char
noneOf String
"\\")
                 , String
"" String -> Parser ByteString ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ByteString -> Parser ByteString ByteString
string ByteString
"\\"
                 ]

        octal :: String -> String
octal = Char -> String
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> String) -> (String -> Char) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
chr (Int -> Char) -> (String -> Int) -> String -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int, String) -> Int
forall a b. (a, b) -> a
fst ((Int, String) -> Int)
-> (String -> (Int, String)) -> String -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Int, String)] -> (Int, String)
forall a. [a] -> a
head ([(Int, String)] -> (Int, String))
-> (String -> [(Int, String)]) -> String -> (Int, String)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [(Int, String)]
forall a. (Eq a, Num a) => ReadS a
readOct
        
        pdfutf :: Parser String
        pdfutf :: Parser ByteString String
pdfutf = do 
          [String]
str <- ByteString -> Parser ByteString ByteString
string ByteString
"\254\255" Parser ByteString ByteString
-> Parser ByteString [String] -> Parser ByteString [String]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString String
-> Parser ByteString ByteString -> Parser ByteString [String]
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser ByteString String
pdfletter (Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
lookAhead (Parser ByteString ByteString -> Parser ByteString ByteString)
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString ByteString
string ByteString
")")
          String -> Parser ByteString String
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Parser ByteString String)
-> String -> Parser ByteString String
forall a b. (a -> b) -> a -> b
$ String -> String
utf16be (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [String]
str
        
        pdfoctutf :: Parser String
        pdfoctutf :: Parser ByteString String
pdfoctutf = do
          [String]
str <- ByteString -> Parser ByteString ByteString
string ByteString
"\\376\\377" Parser ByteString ByteString
-> Parser ByteString [String] -> Parser ByteString [String]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString String
-> Parser ByteString ByteString -> Parser ByteString [String]
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser ByteString String
pdfletter (Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
lookAhead (Parser ByteString ByteString -> Parser ByteString ByteString)
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString ByteString
string ByteString
")")
          String -> Parser ByteString String
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Parser ByteString String)
-> String -> Parser ByteString String
forall a b. (a -> b) -> a -> b
$ String -> String
utf16be (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [String]
str
        
utf16be :: String -> String
utf16be = Text -> String
T.unpack (Text -> String) -> (String -> Text) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. OnDecodeError -> ByteString -> Text
decodeUtf16BEWith OnDecodeError
strictDecode (ByteString -> Text) -> (String -> ByteString) -> String -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString
BS.pack

pdfstream :: Parser Obj
pdfstream :: Parser ByteString Obj
pdfstream = PDFStream -> Obj
PdfStream (PDFStream -> Obj) -> Parser PDFStream -> Parser ByteString Obj
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser PDFStream
stream

pdfnumber :: Parser Obj
pdfnumber :: Parser ByteString Obj
pdfnumber = Double -> Obj
PdfNumber (Double -> Obj)
-> Parser ByteString Double -> Parser ByteString Obj
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Double
pdfdigit
  where pdfdigit :: Parser ByteString Double
pdfdigit = do 
          String
sign <- Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser ByteString Char -> Parser ByteString String)
-> Parser ByteString Char -> Parser ByteString String
forall a b. (a -> b) -> a -> b
$ Char -> Parser ByteString Char
char Char
'-'
          String
num <- (String -> String -> String
forall a. [a] -> [a] -> [a]
(++) (String -> String -> String)
-> Parser ByteString String -> Parser ByteString (String -> String)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ((String
"0"String -> String -> String
forall a. [a] -> [a] -> [a]
++) (String -> String)
-> (ByteString -> String) -> ByteString -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
BS.unpack (ByteString -> String)
-> Parser ByteString ByteString -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ByteString -> Parser ByteString ByteString
string ByteString
".") Parser ByteString (String -> String)
-> Parser ByteString String -> Parser ByteString String
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser ByteString Char
digit))
                 Parser ByteString String
-> Parser ByteString String -> Parser ByteString String
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
                 (String -> String -> String
forall a. [a] -> [a] -> [a]
(++) (String -> String -> String)
-> Parser ByteString String -> Parser ByteString (String -> String)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser ByteString Char
digit) Parser ByteString (String -> String)
-> Parser ByteString String -> Parser ByteString String
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (String -> String -> String
forall a. [a] -> [a] -> [a]
(++) (String -> String -> String)
-> Parser ByteString String -> Parser ByteString (String -> String)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser ByteString Char -> Parser ByteString String)
-> Parser ByteString Char -> Parser ByteString String
forall a b. (a -> b) -> a -> b
$ Char -> Parser ByteString Char
char Char
'.') Parser ByteString (String -> String)
-> Parser ByteString String -> Parser ByteString String
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser ByteString Char
digit))
          Parser ByteString ()
spaces
          Double -> Parser ByteString Double
forall (m :: * -> *) a. Monad m => a -> m a
return (Double -> Parser ByteString Double)
-> Double -> Parser ByteString Double
forall a b. (a -> b) -> a -> b
$ String -> Double
forall a. Read a => String -> a
read (String -> Double) -> String -> Double
forall a b. (a -> b) -> a -> b
$ String
sign String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
num

pdfhex :: Parser Obj
pdfhex :: Parser ByteString Obj
pdfhex = String -> Obj
PdfHex (String -> Obj)
-> Parser ByteString String -> Parser ByteString Obj
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString String
hex
  where hex :: Parser ByteString String
hex = do
          Char -> Parser ByteString Char
char Char
'<'
          ByteString
lets <- String -> ByteString
BS.pack (String -> ByteString)
-> Parser ByteString String -> Parser ByteString ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Char
-> Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill (String -> Parser ByteString Char
oneOf String
"0123456789abcdefABCDEF") (Parser ByteString Char -> Parser ByteString Char
forall i a. Parser i a -> Parser i a
try (Parser ByteString Char -> Parser ByteString Char)
-> Parser ByteString Char -> Parser ByteString Char
forall a b. (a -> b) -> a -> b
$ Char -> Parser ByteString Char
char Char
'>')
          case Parser ByteString String -> ByteString -> Either String String
forall a. Parser a -> ByteString -> Either String a
parseOnly ((Parser ByteString ByteString -> Parser ByteString ByteString
forall i a. Parser i a -> Parser i a
try (Parser ByteString ByteString -> Parser ByteString ByteString)
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString ByteString
string ByteString
"feff" Parser ByteString ByteString
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ByteString -> Parser ByteString ByteString
string ByteString
"FEFF") Parser ByteString ByteString
-> Parser ByteString String -> Parser ByteString String
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 (String -> Parser ByteString Char
oneOf String
"0123456789abcdefABCDEF"))) ByteString
lets of
            Right String
s -> String -> Parser ByteString String
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Parser ByteString String)
-> String -> Parser ByteString String
forall a b. (a -> b) -> a -> b
$ ByteString -> String
pdfhexletter (ByteString -> String) -> ByteString -> String
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BS.pack String
s
            Left String
e -> String -> Parser ByteString String
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Parser ByteString String)
-> (ByteString -> String) -> ByteString -> Parser ByteString String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
BS.unpack (ByteString -> Parser ByteString String)
-> ByteString -> Parser ByteString String
forall a b. (a -> b) -> a -> b
$ ByteString
lets

pdfhexletter :: ByteString -> String
pdfhexletter ByteString
s = case Parser ByteString String -> ByteString -> Either String String
forall a. Parser a -> ByteString -> Either String a
parseOnly ([String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([String] -> String)
-> Parser ByteString [String] -> Parser ByteString String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString String -> Parser ByteString [String]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser ByteString String
pdfhexutf16be) ByteString
s of
  Right String
t -> String -> String
utf16be String
t
  Left String
e -> ByteString -> String
BS.unpack ByteString
s

pdfhexutf16be :: Parser String
pdfhexutf16be :: Parser ByteString String
pdfhexutf16be = do
  String
c <- Int -> Parser ByteString Char -> Parser ByteString String
forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
4 (Parser ByteString Char -> Parser ByteString String)
-> Parser ByteString Char -> Parser ByteString String
forall a b. (a -> b) -> a -> b
$ String -> Parser ByteString Char
oneOf String
"0123456789ABCDEFabcdef"
  let b :: String
b = PDFStream -> String
BSL.unpack (PDFStream -> String) -> (Word16 -> PDFStream) -> Word16 -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> PDFStream
toLazyByteString (Builder -> PDFStream)
-> (Word16 -> Builder) -> Word16 -> PDFStream
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word16 -> Builder
word16BE (Word16 -> String) -> Word16 -> String
forall a b. (a -> b) -> a -> b
$ (Word16, String) -> Word16
forall a b. (a, b) -> a
fst ((Word16, String) -> Word16)
-> (String -> (Word16, String)) -> String -> Word16
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Word16, String)] -> (Word16, String)
forall a. [a] -> a
head ([(Word16, String)] -> (Word16, String))
-> (String -> [(Word16, String)]) -> String -> (Word16, String)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [(Word16, String)]
forall a. (Eq a, Num a) => ReadS a
readHex (String -> Word16) -> String -> Word16
forall a b. (a -> b) -> a -> b
$ String
c
  String -> Parser ByteString String
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Parser ByteString String)
-> String -> Parser ByteString String
forall a b. (a -> b) -> a -> b
$ String
b

pdfbool :: Parser Obj
pdfbool :: Parser ByteString Obj
pdfbool = Bool -> Obj
PdfBool (Bool -> Obj) -> Parser ByteString Bool -> Parser ByteString Obj
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Bool
True Bool -> Parser ByteString ByteString -> Parser ByteString Bool
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ByteString -> Parser ByteString ByteString
string ByteString
"true"
                       Parser ByteString Bool
-> Parser ByteString Bool -> Parser ByteString Bool
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> 
                       Bool
False Bool -> Parser ByteString ByteString -> Parser ByteString Bool
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ByteString -> Parser ByteString ByteString
string ByteString
"false")

pdfnull :: Parser Obj
pdfnull :: Parser ByteString Obj
pdfnull = Obj
PdfNull Obj -> Parser ByteString ByteString -> Parser ByteString Obj
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ByteString -> Parser ByteString ByteString
string ByteString
"null"

pdfobj :: Parser Obj
pdfobj :: Parser ByteString Obj
pdfobj = [Parser ByteString Obj] -> Parser ByteString Obj
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [ Parser ByteString Obj -> Parser ByteString Obj
forall i a. Parser i a -> Parser i a
try Parser ByteString Obj
rrefs Parser ByteString Obj
-> Parser ByteString () -> Parser ByteString Obj
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
spaces
                , Parser ByteString Obj -> Parser ByteString Obj
forall i a. Parser i a -> Parser i a
try Parser ByteString Obj
pdfname Parser ByteString Obj
-> Parser ByteString () -> Parser ByteString Obj
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
spaces
                , Parser ByteString Obj -> Parser ByteString Obj
forall i a. Parser i a -> Parser i a
try Parser ByteString Obj
pdfnumber Parser ByteString Obj
-> Parser ByteString () -> Parser ByteString Obj
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
spaces
                , Parser ByteString Obj -> Parser ByteString Obj
forall i a. Parser i a -> Parser i a
try Parser ByteString Obj
pdfhex Parser ByteString Obj
-> Parser ByteString () -> Parser ByteString Obj
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
spaces -- Hexadecimal String
                , Parser ByteString Obj -> Parser ByteString Obj
forall i a. Parser i a -> Parser i a
try Parser ByteString Obj
pdfbool Parser ByteString Obj
-> Parser ByteString () -> Parser ByteString Obj
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
spaces
                , Parser ByteString Obj -> Parser ByteString Obj
forall i a. Parser i a -> Parser i a
try Parser ByteString Obj
pdfnull Parser ByteString Obj
-> Parser ByteString () -> Parser ByteString Obj
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
spaces
                , Parser ByteString Obj -> Parser ByteString Obj
forall i a. Parser i a -> Parser i a
try Parser ByteString Obj
pdfarray Parser ByteString Obj
-> Parser ByteString () -> Parser ByteString Obj
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
spaces
                , Parser ByteString Obj -> Parser ByteString Obj
forall i a. Parser i a -> Parser i a
try Parser ByteString Obj
pdfdictionary Parser ByteString Obj
-> Parser ByteString () -> Parser ByteString Obj
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
spaces
                , {-# SCC pdfstream #-} Parser ByteString Obj -> Parser ByteString Obj
forall i a. Parser i a -> Parser i a
try Parser ByteString Obj
pdfstream Parser ByteString Obj
-> Parser ByteString () -> Parser ByteString Obj
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
spaces
                , Parser ByteString Obj
pdfletters Parser ByteString Obj
-> Parser ByteString () -> Parser ByteString Obj
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
spaces -- Literal String
                ]

rrefs :: Parser Obj
rrefs :: Parser ByteString Obj
rrefs = do  
  String
objnum <- Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser ByteString Char
digit
  Parser ByteString ()
spaces
  String -> Parser ByteString Char
oneOf String
"0123456789"
  Parser ByteString ()
spaces
  ByteString -> Parser ByteString ByteString
string ByteString
"R"
  Parser ByteString ()
spaces
  Obj -> Parser ByteString Obj
forall (m :: * -> *) a. Monad m => a -> m a
return (Obj -> Parser ByteString Obj) -> Obj -> Parser ByteString Obj
forall a b. (a -> b) -> a -> b
$ Int -> Obj
ObjRef (String -> Int
forall a. Read a => String -> a
read String
objnum)

objother :: Parser Obj
objother :: Parser ByteString Obj
objother = String -> Obj
ObjOther (String -> Obj)
-> Parser ByteString String -> Parser ByteString Obj
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser ByteString Char
-> Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser ByteString Char
anyChar Parser ByteString Char
space)

parseRefsArray :: [Obj] -> [Int]
parseRefsArray :: [Obj] -> [Int]
parseRefsArray (ObjRef Int
x:[Obj]
y) = (Int
xInt -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:[Obj] -> [Int]
parseRefsArray [Obj]
y)
parseRefsArray (Obj
x:[Obj]
y)  = ([Obj] -> [Int]
parseRefsArray [Obj]
y)
parseRefsArray [] = []