{-# LANGUAGE OverloadedStrings #-}

-- | Parsers for XRef

module Pdf.Core.Parsers.XRef
( startXRef
, tableXRef
, parseSubsectionHeader
, parseTrailerAfterTable
, parseTableEntry
)
where

import Pdf.Core.Object
import Pdf.Core.Parsers.Object
import Pdf.Core.Parsers.Util

import Data.Int
import Data.Attoparsec.ByteString (Parser)
import qualified Data.Attoparsec.ByteString.Char8 as P
import Control.Applicative (many)

-- for doctest
-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Data.Attoparsec.ByteString.Char8

-- | Offset of the very last xref table
--
-- Before calling it, make sure your are currently somewhere near
-- the end of pdf file. Otherwice it can eat all the memory.
-- E.g. examine only the last 1KB
--
-- >>> parseOnly startXRef "anything...startxref\n222\n%%EOF...blah\nstartxref\n123\n%%EOF"
-- Right 123
startXRef :: Parser Int64
startXRef :: Parser Int64
startXRef = do
  [Int64]
res <- Parser Int64 -> Parser ByteString [Int64]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser Int64 -> Parser ByteString [Int64])
-> Parser Int64 -> Parser ByteString [Int64]
forall a b. (a -> b) -> a -> b
$ do
    [Char]
_ <- Parser ByteString Char
-> Parser ByteString ByteString -> Parser ByteString [Char]
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
P.manyTill Parser ByteString Char
P.anyChar (Parser ByteString ByteString -> Parser ByteString [Char])
-> Parser ByteString ByteString -> Parser ByteString [Char]
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString ByteString
P.string ByteString
"startxref"
    Parser ()
P.skipSpace
    Int64
offset <- Parser Int64
forall a. Integral a => Parser a
P.decimal
    Parser ()
P.skipSpace
    ByteString
_ <- ByteString -> Parser ByteString ByteString
P.string ByteString
"%%EOF"
    Int64 -> Parser Int64
forall (m :: * -> *) a. Monad m => a -> m a
return Int64
offset
  case [Int64]
res of
    [] -> [Char] -> Parser Int64
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"Trailer not found"
    [Int64]
xs -> Int64 -> Parser Int64
forall (m :: * -> *) a. Monad m => a -> m a
return (Int64 -> Parser Int64) -> Int64 -> Parser Int64
forall a b. (a -> b) -> a -> b
$ [Int64] -> Int64
forall a. [a] -> a
last [Int64]
xs

-- | When current input position points to xref stream
-- (or doesn't point to xref at all), the parser will fail.
-- When it points to xref table, the parser will succeed
-- and input position will point to the first xref subsection
--
-- >>> parseOnly tableXRef "xref\n"
-- Right ()
-- >>> parseOnly tableXRef "not xref"
-- Left "Failed reading: takeWith"
tableXRef :: Parser ()
tableXRef :: Parser ()
tableXRef = do
  ByteString
_ <- ByteString -> Parser ByteString ByteString
P.string ByteString
"xref"
  Parser ()
endOfLine

-- | Parse subsection header, return (the first object index, number of object)
--
-- Input position will point to the first object
parseSubsectionHeader :: Parser (Int, Int)
parseSubsectionHeader :: Parser (Int, Int)
parseSubsectionHeader = do
  Int
start <- Parser Int
forall a. Integral a => Parser a
P.decimal
  Parser ()
P.skipSpace
  Int
count <- Parser Int
forall a. Integral a => Parser a
P.decimal
  Parser ()
endOfLine
  (Int, Int) -> Parser (Int, Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
start, Int
count)

-- | Parse trailer located after XRef table
--
-- Input position should point to the \"trailer\" keyword
parseTrailerAfterTable :: Parser Dict
parseTrailerAfterTable :: Parser Dict
parseTrailerAfterTable = do
  Parser ()
P.skipSpace
  ByteString
_ <- ByteString -> Parser ByteString ByteString
P.string ByteString
"trailer"
  Parser ()
endOfLine
  Parser ()
P.skipSpace
  Parser Dict
parseDict

-- | Parse XRef table entry. Returns offset, generation and whether the object is free.
parseTableEntry :: Parser (Int64, Int, Bool)
parseTableEntry :: Parser (Int64, Int, Bool)
parseTableEntry = do
  Int64
offset <- Parser Int64
forall a. Integral a => Parser a
P.decimal
  Parser ()
P.skipSpace
  Int
generation <- Parser Int
forall a. Integral a => Parser a
P.decimal
  Parser ()
P.skipSpace
  Char
c <- Parser ByteString Char
P.anyChar
  case Char
c of
    Char
'n' -> (Int64, Int, Bool) -> Parser (Int64, Int, Bool)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int64
offset, Int
generation, Bool
False)
    Char
'f' -> (Int64, Int, Bool) -> Parser (Int64, Int, Bool)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int64
offset, Int
generation, Bool
True)
    Char
_ -> [Char] -> Parser (Int64, Int, Bool)
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Parser (Int64, Int, Bool))
-> [Char] -> Parser (Int64, Int, Bool)
forall a b. (a -> b) -> a -> b
$ [Char]
"error parsing XRef table entry: unknown char: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char
c]