module Data.Rakhana.XRef
( XRef(..)
, XRefException(..)
, CObj(..)
, FObj(..)
, UObj(..)
, XRefStream(..)
, getXRef
, getXRefPos
) where
import Prelude hiding (take)
import Data.Bits (shiftL)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Internal as L
import Data.Foldable (traverse_)
import qualified Data.Map.Strict as M
import Data.Maybe (fromMaybe)
import Data.Word
import Codec.Compression.Zlib
import Codec.Compression.Zlib.Internal
import Control.Lens
import Control.Monad.State.Strict
import Data.Attoparsec.ByteString
import qualified Data.Attoparsec.ByteString.Lazy as PL
import Data.Attoparsec.ByteString.Char8
import Pipes.Safe ()
import Data.Rakhana.Internal.Parsers
import Data.Rakhana.Internal.Types
import Data.Rakhana.Tape
import Data.Rakhana.Util.Drive
data Entry
= FreeObject Integer Int
| UsedObject Integer Int
| CompressedObject Integer Int
deriving Show
data XRef
= XRef
{ xrefFirstNumber :: !Int
, xrefObjectCount :: !Int
, xrefUTable :: !UTable
, xrefFTable :: !FTable
, xrefCTable :: !CTable
, xrefTrailer :: !Dictionary
, xrefStream :: !(Maybe XRefStream)
}
deriving Show
data Predictor
= Png_Up
| Predictor_Unsupported Integer
deriving Show
data DecodeParms
= DecodeParms
{ decodeParmsColumns :: !Integer
, decodeParmsPredictor :: !Predictor
}
deriving Show
data XRefStream
= XRefStream
{ xrefStreamLength :: !Int
, xrefStreamSize :: !Integer
, xrefStreamFirstNumber :: !Integer
, xrefStreamEntryCount :: !Integer
, xrefStreamPrev :: !(Maybe Integer)
, xrefStreamW :: !(Integer, Integer, Integer)
, xrefStreamEntryWidth :: !Integer
, xrefStreamDecodeParms :: !(Maybe DecodeParms)
, xrefStreamFilter :: !(Maybe Filter)
, xrefStreamPos :: !Integer
, xrefStreamDict :: !Dictionary
}
deriving Show
data UObj
= UObj
{ uObjOff :: !Integer
, uObjGen :: !Int
}
deriving Show
data CObj
= CObj
{ cObjNum :: !Int
, cObjIdx :: !Integer
}
deriving Show
data FObj
= FObj
{ fObjNxtNum :: !Int
, fObjGen :: !Int
}
deriving Show
type FTable = M.Map (Int,Int) FObj
type UTable = M.Map (Int,Int) UObj
type CTable = M.Map (Int,Int) CObj
data ObjType
= Free
| Used
| Compressed
deriving Show
data XRefException
= XRefParsingException String
| InvalidXRefStream
| UnsupportedFilter B.ByteString
| UnsupportedPredictor Integer
| ZLibException String String
deriving Show
data ExtractState
= ExtractState
{ _extractType :: !ObjType
, _extractOffset :: !Integer
, _extractGen :: !Int
}
data UnpredictState
= UnpredictState
{ _unpredictPrev :: ![Word8]
, _unpredictId :: !Int
, _unpredictUTable :: !UTable
, _unpredictFTable :: !FTable
, _unpredictCTable :: !CTable
}
data NopredictState
= NopredictState
{ _nopredictId :: !Int
, _nopredictUTable :: !UTable
, _nopredictFTable :: !FTable
, _nopredictCTable :: !CTable
}
makeLenses ''ExtractState
makeLenses ''UnpredictState
makeLenses ''NopredictState
bufferSize :: Int
bufferSize = 4096
getXRefPos :: Monad m => Drive m (Either XRefException Integer)
getXRefPos
= do driveBottom
driveBackward
skipEOL
mE <- parseEOF
case mE of
Just e
-> return $ Left e
Nothing
-> do skipEOL
p <- parseXRefPosInteger
skipEOL
mR <- parseStartXRef
return $ maybe (Right p) Left mR
getXRef :: Monad m => Header -> Integer -> Drive m (Either XRefException XRef)
getXRef h pos
= do rE <- crossRef pos
case rE of
Left e
| headerMaj h == 1 && headerMin h < 5
-> return $ Left e
| otherwise
-> crossRefStream pos
_ -> return rE
crossRef :: Monad m => Integer -> Drive m (Either XRefException XRef)
crossRef pos
= do driveTop
driveForward
driveSeek pos
eR <- driveParse bufferSize parseXRef
return $ either (Left . XRefParsingException) Right eR
crossRefStream :: Monad m => Integer -> Drive m (Either XRefException XRef)
crossRefStream offset = loop (offset, Nothing)
where
loop (off, newerRefM)
= do xrefE <- crossRefStreamStep off
case xrefE of
Left e -> return $ Left e
Right xref
-> let prevM = xrefStream xref >>= xrefStreamPrev
upd nRef
= let nUTable
= M.union (xrefUTable nRef)
(xrefUTable xref)
nCTable
= M.union (xrefCTable nRef)
(xrefCTable xref) in
nRef { xrefUTable = nUTable
, xrefCTable = nCTable
} in
case prevM of
Nothing ->
let updRef = maybe xref upd newerRefM in
return $ Right updRef
Just prev
-> do xrefE' <- loop (prev, Just xref)
case xrefE' of
Left e'
-> return $ Left e'
Right xref'
-> let updRef = maybe xref' upd
newerRefM
in return $ Right updRef
crossRefStreamStep :: Monad m => Integer -> Drive m (Either XRefException XRef)
crossRefStreamStep offset
= do streamE <- parseXRefStream offset
let xstreamE = streamE >>= validateXRefStream
case xstreamE of
Left e -> return $ Left e
Right xstream
-> do let len = xrefStreamLength xstream
filt = xrefStreamFilter xstream
pos = xrefStreamPos xstream
driveSeek pos
bs <- driveGetLazy len
let dbsE = decodeBS filt bs
dparms = xrefStreamDecodeParms xstream
mPred = fmap decodeParmsPredictor dparms
res =
case mPred of
Nothing -> dbsE >>= noPredict xstream
Just prd -> dbsE >>= unpredict prd xstream
return res
parseXRefStream :: Monad m => Integer -> Drive m (Either XRefException Stream)
parseXRefStream offset
= do driveTop
driveForward
driveSeek offset
rE <- driveParseObject 128
case rE of
Left e -> return $ Left $ XRefParsingException e
Right r ->
let xstream = r ^. _3
expt = XRefParsingException "Expected a XRef Stream" in
return $ maybe (Left $ expt) Right (xstream ^? _Stream)
getFilter :: Dictionary -> Maybe Filter
getFilter dict
= dict ^? dictKey "Filter" . _Name . to toFilt
where
toFilt "FlateDecode" = FlateDecode
toFilt x = Filter_Unsupported x
decodeParms :: Dictionary -> Maybe DecodeParms
decodeParms dict
= do parms <- dict ^? dictKey "DecodeParms" . _Dict
col <- parms ^? dictKey "Columns" . _Number . _Natural
prd <- parms ^? dictKey "Predictor" . _Number . _Natural . to toPred
return DecodeParms
{ decodeParmsColumns = col
, decodeParmsPredictor = prd
}
where
toPred 12 = Png_Up
toPred x = Predictor_Unsupported x
decompressErrorStr :: DecompressError -> String
decompressErrorStr TruncatedInput = "TruncatedInput"
decompressErrorStr DictionaryRequired = "DictionaryRequired"
decompressErrorStr DataError = "DataError"
zlibDecompress :: L.ByteString -> Either XRefException L.ByteString
zlibDecompress bs
= foldDecompressStream go (Right L.Empty)
(\code msg -> Left $ ZLibException (decompressErrorStr code) msg) $
decompressWithErrors zlibFormat defaultDecompressParams bs
where
go b aE = fmap (\b' -> L.Chunk b b') aE
decodeBS :: Maybe Filter -> L.ByteString -> Either XRefException L.ByteString
decodeBS (Just filt) bs
= case filt of
FlateDecode -> zlibDecompress bs
Filter_Unsupported x -> Left $ UnsupportedFilter x
decodeBS _ bs
= Right bs
unpredict :: Predictor
-> XRefStream
-> L.ByteString
-> Either XRefException XRef
unpredict p xstream input
= case p of
Png_Up -> unpredictPngUp xstream input
Predictor_Unsupported x -> Left $ UnsupportedPredictor x
noPredict :: XRefStream -> L.ByteString -> Either XRefException XRef
noPredict xstream input
= case PL.parse parser input of
PL.Fail _ _ e -> Left $ XRefParsingException e
PL.Done _ bs -> Right bs
where
width = fromIntegral $ xrefStreamEntryWidth xstream
firstNumber = fromIntegral $ xrefStreamFirstNumber xstream
ecount = fromIntegral $ xrefStreamEntryCount xstream
start = NopredictState
{ _nopredictId = firstNumber 1
, _nopredictUTable = M.empty
, _nopredictFTable = M.empty
, _nopredictCTable = M.empty
}
parser = evalStateT aState start
aState = do replicateM_ ecount action
utable <- use nopredictUTable
ftable <- use nopredictFTable
ctable <- use nopredictCTable
return XRef
{ xrefFirstNumber = firstNumber
, xrefObjectCount = ecount
, xrefUTable = utable
, xrefFTable = ftable
, xrefCTable = ctable
, xrefTrailer = M.empty
, xrefStream = Just xstream
}
action
= do oid <- nopredictId <+= 1
row <- lift step
let (typ, off, gen) = extractTableEntry xstream row
ref = (oid,gen)
cref = (oid, 0)
fobj = FObj (fromIntegral off) gen
uobj = UObj off gen
cobj = CObj (fromIntegral off) (fromIntegral gen)
case typ of
Free -> nopredictFTable.at ref ?= fobj
Used -> nopredictUTable.at ref ?= uobj
Compressed -> nopredictCTable.at cref ?= cobj
step = do bs <- take width
let row = B.unpack bs
return row
unpredictPngUp :: XRefStream -> L.ByteString -> Either XRefException XRef
unpredictPngUp xstream input
= case PL.parse parser input of
PL.Fail _ _ e -> Left $ XRefParsingException e
PL.Done _ bs -> Right bs
where
width = fromIntegral $ xrefStreamEntryWidth xstream
firstNumber = fromIntegral $ xrefStreamFirstNumber xstream
ecount = fromIntegral $ xrefStreamEntryCount xstream
start = UnpredictState
{ _unpredictPrev = replicate width 0
, _unpredictId = firstNumber 1
, _unpredictUTable = M.empty
, _unpredictFTable = M.empty
, _unpredictCTable = M.empty
}
parser = evalStateT aState start
aState = do replicateM_ ecount action
utable <- use unpredictUTable
ftable <- use unpredictFTable
ctable <- use unpredictCTable
return XRef
{ xrefFirstNumber = firstNumber
, xrefObjectCount = ecount
, xrefUTable = utable
, xrefFTable = ftable
, xrefCTable = ctable
, xrefTrailer = M.empty
, xrefStream = Just xstream
}
action
= do prev <- use unpredictPrev
newPrev <- lift $ step prev
oid <- unpredictId <+= 1
unpredictPrev .= newPrev
let (typ, off, gen) = extractTableEntry xstream newPrev
ref = (oid, gen)
cref = (oid, 0)
fobj = FObj (fromIntegral off) gen
uobj = UObj off gen
cobj = CObj (fromIntegral off) (fromIntegral gen)
case typ of
Free -> unpredictFTable.at ref ?= fobj
Used -> unpredictUTable.at ref ?= uobj
Compressed -> unpredictCTable.at cref ?= cobj
step prev
= do _ <- anyWord8
bs <- take width
let newPrev = zipWith (+) (B.unpack bs) prev
return newPrev
extractTableEntry :: XRefStream -> [Word8] -> (ObjType, Integer, Int)
extractTableEntry xstream arr
= mkEntry $ execState (traverse_ action $ zip [1..width] arr) start
where
start = ExtractState Free 0 0
mkEntry s
= let off = s ^. extractOffset
gen = s ^. extractGen
typ = s ^. extractType in
(typ, off, gen)
action (i,w)
| i == 1
= case w of
0x00 -> extractType .= Free
0x01 -> extractType .= Used
0x02 -> extractType .= Compressed
_ -> error $ "Invalid entry type " ++ show w
| i <= oLen+1
= extractOffset += (fromIntegral w) `shiftL` (8*(oLeni+1))
| i <= oLen+gLen+1
= extractGen += (fromIntegral w) `shiftL` (8*(gLeni+1))
| otherwise
= return ()
(_,c2,c3) = xrefStreamW xstream
oLen = fromIntegral c2
gLen = fromIntegral c3
width = fromIntegral $ xrefStreamEntryWidth xstream
validateXRefStream :: Stream -> Either XRefException XRefStream
validateXRefStream s
= maybe (Left InvalidXRefStream) Right action
where
action
= do typ <- dict ^? dictKey "Type" . _Name
when (typ /= "XRef") Nothing
size <- dict ^? dictKey "Size" . _Number . _Natural
len <- dict ^? dictKey "Length" . _Number . _Natural
w@(c1,c2,c3) <- getW
let xstream = XRefStream
{ xrefStreamLength = fromIntegral len
, xrefStreamSize = size
, xrefStreamFirstNumber = fromMaybe 0 firstNumber
, xrefStreamEntryCount = fromMaybe size entryCount
, xrefStreamPrev = getPrev
, xrefStreamW = w
, xrefStreamDecodeParms = decodeParms dict
, xrefStreamFilter = getFilter dict
, xrefStreamEntryWidth = c1 + c2 + c3
, xrefStreamPos = s ^. streamPos
, xrefStreamDict = dict
}
return xstream
dict = s ^. streamDict
firstNumber
= do ar <- dict ^? dictKey "Index" . _Array
ar ^? nth 0 . _Number . _Natural
entryCount
= do ar <- dict ^? dictKey "Index" . _Array
ar ^? nth 1 . _Number . _Natural
getPrev = dict ^? dictKey "Prev" . _Number . _Natural
getW = do ar <- dict ^? dictKey "W" . _Array
one <- ar ^? nth 0 . _Number . _Natural
two <- ar ^? nth 1 . _Number . _Natural
three <- ar ^? nth 2 . _Number . _Natural
return (one, two, three)
skipEOL :: Monad m => Drive m ()
skipEOL
= do bs <- drivePeek 1
case B8.uncons bs of
Just (c, _)
| isSpace c -> driveDiscard 1 >> skipEOL
| otherwise -> return ()
_ -> return ()
parseEOF :: Monad m => Drive m (Maybe XRefException)
parseEOF
= do bs <- driveGet 5
case bs of
"%%EOF" -> return Nothing
_ -> return $ Just $ XRefParsingException "Expected %%EOF"
parseXRefPosInteger :: Monad m => Drive m Integer
parseXRefPosInteger = go []
where
go cs = do bs <- drivePeek 1
case B8.uncons bs of
Just (c,_)
| isDigit c -> driveDiscard 1 >> go (c:cs)
| otherwise -> return $ read cs
_ -> return $ read cs
parseStartXRef :: Monad m => Drive m (Maybe XRefException)
parseStartXRef
= do bs <- driveGet 9
case bs of
"startxref" -> return Nothing
_ -> return $ Just $
XRefParsingException "Expected startxref"
tableXRef :: Parser ()
tableXRef
= do _ <- string "xref"
pdfEndOfLine
parseXRef :: Parser XRef
parseXRef
= do skipSpace
tableXRef
(fnum, ecount) <- parseSubsectionHeader
(ftable, utable) <- parseTableEntries fnum
trailer <- parseTrailerAfterTable
return XRef
{ xrefFirstNumber = fnum
, xrefObjectCount = ecount
, xrefUTable = utable
, xrefFTable = ftable
, xrefCTable = M.empty
, xrefTrailer = trailer
, xrefStream = Nothing
}
parseSubsectionHeader :: Parser (Int, Int)
parseSubsectionHeader
= do start <- decimal
skipSpace
ecount <- decimal
pdfEndOfLine
return (start, ecount)
parseTrailerAfterTable :: Parser Dictionary
parseTrailerAfterTable
= do skipSpace
_ <- string "trailer"
pdfEndOfLine
skipSpace
Dict d <- parseDict
return d
parseTableEntries :: Int -> Parser (FTable, UTable)
parseTableEntries firstNumber
= loop start
where
loop (i, ftable, utable)
= do mT <- optional parseTableEntry
case mT of
Nothing -> return (ftable, utable)
Just (off, gen, used)
| used
-> let key = (i, gen)
obj = UObj off gen
utable' = M.insert key obj utable in
loop (i+1, ftable, utable')
| otherwise
-> let key = (i, gen)
obj = FObj (fromIntegral off) gen
ftable' = M.insert key obj ftable in
loop (i+1, ftable', utable)
start :: (Int, FTable, UTable)
start = (firstNumber, M.empty, M.empty)
parseTableEntry :: Parser (Integer, Int, Bool)
parseTableEntry
= do skipSpace
offset <- decimal
skipSpace
gen <- decimal
skipSpace
c <- anyChar
case c of
'n' -> return (offset, gen, True)
'f' -> return (offset, gen, False)
_ ->
let msg = "error parsing XRef table entry: unknown char: " ++
[c] in
fail msg