{-# LANGUAGE OverloadedStrings #-}

module PDF.Cmap
       ( parseCMap
       ) where

import Data.Char (chr)
import Data.List (intercalate)
import Numeric (readOct, readHex)

import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BSL
import qualified Data.Text as T
import Data.Text.Encoding (encodeUtf8)

import Text.Parsec hiding (many, (<|>))
import Control.Applicative
import Text.Parsec.ByteString.Lazy
import Codec.Compression.Zlib (decompress) 

import Debug.Trace

import PDF.Definition

parseCMap :: BSL.ByteString -> CMap
parseCMap :: ByteString -> CMap
parseCMap ByteString
str = case Parsec ByteString () CMap
-> () -> SourceName -> ByteString -> Either ParseError CMap
forall s t u a.
Stream s Identity t =>
Parsec s u a -> u -> SourceName -> s -> Either ParseError a
runParser (Parser ()
skipHeader Parser () -> Parsec ByteString () CMap -> Parsec ByteString () CMap
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
                                [CMap] -> CMap
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([CMap] -> CMap)
-> ParsecT ByteString () Identity [CMap]
-> Parsec ByteString () CMap
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
                                Parsec ByteString () CMap
-> ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity [CMap]
forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
manyTill
                                 ([Parsec ByteString () CMap] -> Parsec ByteString () CMap
forall s (m :: * -> *) t u a.
Stream s m t =>
[ParsecT s u m a] -> ParsecT s u m a
choice
                                 [ Parsec ByteString () CMap -> Parsec ByteString () CMap
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try Parsec ByteString () CMap
bfchar
                                 , Parsec ByteString () CMap -> Parsec ByteString () CMap
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (Parsec ByteString () CMap -> Parsec ByteString () CMap)
-> Parsec ByteString () CMap -> Parsec ByteString () CMap
forall a b. (a -> b) -> a -> b
$ [CMap] -> CMap
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([CMap] -> CMap)
-> ParsecT ByteString () Identity [CMap]
-> Parsec ByteString () CMap
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT ByteString () Identity [CMap]
bfrange
                                 ])
                                 (ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity SourceName
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (ParsecT ByteString () Identity SourceName
 -> ParsecT ByteString () Identity SourceName)
-> ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity SourceName
forall a b. (a -> b) -> a -> b
$ SourceName -> ParsecT ByteString () Identity SourceName
forall s (m :: * -> *) u.
Stream s m Char =>
SourceName -> ParsecT s u m SourceName
string SourceName
"endcmap"))
                               () SourceName
"" ByteString
str of
  Left ParseError
err -> SourceName -> CMap
forall a. HasCallStack => SourceName -> a
error (SourceName -> CMap) -> SourceName -> CMap
forall a b. (a -> b) -> a -> b
$ SourceName
"Can not parse CMap " SourceName -> SourceName -> SourceName
forall a. [a] -> [a] -> [a]
++ (ParseError -> SourceName
forall a. Show a => a -> SourceName
show ParseError
err)
  Right CMap
cmap -> CMap -> CMap
forall a. [a] -> [a]
mkUniq CMap
cmap

  where
    mkUniq :: [a] -> [a]
mkUniq = [a] -> [a]
forall a. [a] -> [a]
reverse


skipHeader :: Parser ()
skipHeader :: Parser ()
skipHeader = do
  ParsecT ByteString () Identity Char
-> ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity SourceName
forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
manyTill ParsecT ByteString () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
anyChar (ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity SourceName
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (ParsecT ByteString () Identity SourceName
 -> ParsecT ByteString () Identity SourceName)
-> ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity SourceName
forall a b. (a -> b) -> a -> b
$ SourceName -> ParsecT ByteString () Identity SourceName
forall s (m :: * -> *) u.
Stream s m Char =>
SourceName -> ParsecT s u m SourceName
string SourceName
"endcodespacerange")
  Parser ()
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
spaces
  () -> Parser ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()

bfchar :: Parser CMap
bfchar :: Parsec ByteString () CMap
bfchar = do
  ParsecT ByteString () Identity Char
-> ParsecT ByteString () Identity SourceName
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT ByteString () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  Parser ()
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
spaces 
  SourceName -> ParsecT ByteString () Identity SourceName
forall s (m :: * -> *) u.
Stream s m Char =>
SourceName -> ParsecT s u m SourceName
string SourceName
"beginbfchar"
  Parser ()
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
spaces
  CMap
ms <- ParsecT ByteString () Identity (Int, SourceName)
-> Parsec ByteString () CMap
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (SourceName -> SourceName -> (Int, SourceName)
forall a.
(Eq a, Num a) =>
SourceName -> SourceName -> (a, SourceName)
toCmap (SourceName -> SourceName -> (Int, SourceName))
-> ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity (SourceName -> (Int, SourceName))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT ByteString () Identity SourceName
hexletters ParsecT ByteString () Identity (SourceName -> (Int, SourceName))
-> ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity (Int, SourceName)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ParsecT ByteString () Identity SourceName
hexletters)
  Parser ()
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
spaces
  SourceName -> ParsecT ByteString () Identity SourceName
forall s (m :: * -> *) u.
Stream s m Char =>
SourceName -> ParsecT s u m SourceName
string SourceName
"endbfchar"
  Parser ()
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
spaces
  CMap -> Parsec ByteString () CMap
forall (m :: * -> *) a. Monad m => a -> m a
return CMap
ms
    where toCmap :: SourceName -> SourceName -> (a, SourceName)
toCmap SourceName
cid SourceName
ucs = (((a, SourceName) -> a
forall a b. (a, b) -> a
fst((a, SourceName) -> a)
-> (SourceName -> (a, SourceName)) -> SourceName -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
.[(a, SourceName)] -> (a, SourceName)
forall a. [a] -> a
head([(a, SourceName)] -> (a, SourceName))
-> (SourceName -> [(a, SourceName)])
-> SourceName
-> (a, SourceName)
forall b c a. (b -> c) -> (a -> b) -> a -> c
.SourceName -> [(a, SourceName)]
forall a. (Eq a, Num a) => ReadS a
readHex) SourceName
cid, ((Char -> SourceName -> SourceName
forall a. a -> [a] -> [a]
:[])(Char -> SourceName)
-> (SourceName -> Char) -> SourceName -> SourceName
forall b c a. (b -> c) -> (a -> b) -> a -> c
.Int -> Char
chr(Int -> Char) -> (SourceName -> Int) -> SourceName -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
.(Int, SourceName) -> Int
forall a b. (a, b) -> a
fst((Int, SourceName) -> Int)
-> (SourceName -> (Int, SourceName)) -> SourceName -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
.CMap -> (Int, SourceName)
forall a. [a] -> a
head(CMap -> (Int, SourceName))
-> (SourceName -> CMap) -> SourceName -> (Int, SourceName)
forall b c a. (b -> c) -> (a -> b) -> a -> c
.SourceName -> CMap
forall a. (Eq a, Num a) => ReadS a
readHex) SourceName
ucs)

bfrange :: Parser [CMap]
bfrange :: ParsecT ByteString () Identity [CMap]
bfrange = do
  SourceName
d <- ParsecT ByteString () Identity Char
-> ParsecT ByteString () Identity SourceName
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT ByteString () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  Parser ()
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
spaces 
  SourceName -> ParsecT ByteString () Identity SourceName
forall s (m :: * -> *) u.
Stream s m Char =>
SourceName -> ParsecT s u m SourceName
string SourceName
"beginbfrange"
  Parser ()
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
spaces
  [CMap]
ms <- Parsec ByteString () CMap -> ParsecT ByteString () Identity [CMap]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many ([Int] -> [Int] -> CMap
forall a. [a] -> [Int] -> [(a, SourceName)]
toCmap
              ([Int] -> [Int] -> CMap)
-> ParsecT ByteString () Identity [Int]
-> ParsecT ByteString () Identity ([Int] -> CMap)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (SourceName -> SourceName -> [Int]
getRange (SourceName -> SourceName -> [Int])
-> ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity (SourceName -> [Int])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT ByteString () Identity SourceName
hexletters ParsecT ByteString () Identity (SourceName -> [Int])
-> ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity [Int]
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ParsecT ByteString () Identity SourceName
hexletters)
              ParsecT ByteString () Identity ([Int] -> CMap)
-> ParsecT ByteString () Identity [Int]
-> Parsec ByteString () CMap
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ((SourceName -> [SourceName] -> [Int]
forall p. p -> [SourceName] -> [Int]
mkStrList SourceName
d ([SourceName] -> [Int])
-> (SourceName -> [SourceName]) -> SourceName -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SourceName -> [SourceName]
lines) (SourceName -> [Int])
-> ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity SourceName
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try ParsecT ByteString () Identity SourceName
hexletters ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity SourceName
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT ByteString () Identity SourceName
hexletterArray)))
  Parser ()
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
spaces
  SourceName -> ParsecT ByteString () Identity SourceName
forall s (m :: * -> *) u.
Stream s m Char =>
SourceName -> ParsecT s u m SourceName
string SourceName
"endbfrange"
  Parser ()
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
spaces
  [CMap] -> ParsecT ByteString () Identity [CMap]
forall (m :: * -> *) a. Monad m => a -> m a
return ([CMap] -> ParsecT ByteString () Identity [CMap])
-> [CMap] -> ParsecT ByteString () Identity [CMap]
forall a b. (a -> b) -> a -> b
$ [CMap]
ms
    where 
      gethex :: SourceName -> Int
gethex = (Int, SourceName) -> Int
forall a b. (a, b) -> a
fst((Int, SourceName) -> Int)
-> (SourceName -> (Int, SourceName)) -> SourceName -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
.CMap -> (Int, SourceName)
forall a. [a] -> a
head(CMap -> (Int, SourceName))
-> (SourceName -> CMap) -> SourceName -> (Int, SourceName)
forall b c a. (b -> c) -> (a -> b) -> a -> c
.SourceName -> CMap
forall a. (Eq a, Num a) => ReadS a
readHex
      getRange :: SourceName -> SourceName -> [Int]
getRange SourceName
cid SourceName
cid' = [SourceName -> Int
gethex SourceName
cid .. SourceName -> Int
gethex SourceName
cid']
      mkStrList :: p -> [SourceName] -> [Int]
mkStrList p
d [SourceName]
src = if ([SourceName] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [SourceName]
src) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1
                        then [SourceName -> Int
gethex (SourceName -> Int) -> SourceName -> Int
forall a b. (a -> b) -> a -> b
$ [SourceName] -> SourceName
forall a. [a] -> a
head [SourceName]
src .. ]
                        else (SourceName -> Int) -> [SourceName] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map SourceName -> Int
gethex [SourceName]
src
      toCmap :: [a] -> [Int] -> [(a, SourceName)]
toCmap [a]
range [Int]
ucs = [a] -> [SourceName] -> [(a, SourceName)]
forall a b. [a] -> [b] -> [(a, b)]
zip [a]
range ((Int -> SourceName) -> [Int] -> [SourceName]
forall a b. (a -> b) -> [a] -> [b]
map ((Char -> SourceName -> SourceName
forall a. a -> [a] -> [a]
:[])(Char -> SourceName) -> (Int -> Char) -> Int -> SourceName
forall b c a. (b -> c) -> (a -> b) -> a -> c
.Int -> Char
chr) [Int]
ucs)


hexletters :: Parser String
hexletters :: ParsecT ByteString () Identity SourceName
hexletters = do
  Char -> ParsecT ByteString () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'<'
  [SourceName]
lets <- [ParsecT ByteString () Identity [SourceName]]
-> ParsecT ByteString () Identity [SourceName]
forall s (m :: * -> *) t u a.
Stream s m t =>
[ParsecT s u m a] -> ParsecT s u m a
choice [ ParsecT ByteString () Identity [SourceName]
-> ParsecT ByteString () Identity [SourceName]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (ParsecT ByteString () Identity [SourceName]
 -> ParsecT ByteString () Identity [SourceName])
-> ParsecT ByteString () Identity [SourceName]
-> ParsecT ByteString () Identity [SourceName]
forall a b. (a -> b) -> a -> b
$ ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity Char
-> ParsecT ByteString () Identity [SourceName]
forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
manyTill (Int
-> ParsecT ByteString () Identity Char
-> ParsecT ByteString () Identity SourceName
forall s (m :: * -> *) t u a.
Stream s m t =>
Int -> ParsecT s u m a -> ParsecT s u m [a]
count Int
4 (ParsecT ByteString () Identity Char
 -> ParsecT ByteString () Identity SourceName)
-> ParsecT ByteString () Identity Char
-> ParsecT ByteString () Identity SourceName
forall a b. (a -> b) -> a -> b
$ ParsecT ByteString () Identity Char
hexletter) (ParsecT ByteString () Identity Char
-> ParsecT ByteString () Identity Char
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (ParsecT ByteString () Identity Char
 -> ParsecT ByteString () Identity Char)
-> ParsecT ByteString () Identity Char
-> ParsecT ByteString () Identity Char
forall a b. (a -> b) -> a -> b
$ Char -> ParsecT ByteString () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'>')
                 , (SourceName -> [SourceName] -> [SourceName]
forall a. a -> [a] -> [a]
:[]) (SourceName -> [SourceName])
-> ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity [SourceName]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Int
-> ParsecT ByteString () Identity Char
-> ParsecT ByteString () Identity SourceName
forall s (m :: * -> *) t u a.
Stream s m t =>
Int -> ParsecT s u m a -> ParsecT s u m [a]
count Int
2 (ParsecT ByteString () Identity Char
 -> ParsecT ByteString () Identity SourceName)
-> ParsecT ByteString () Identity Char
-> ParsecT ByteString () Identity SourceName
forall a b. (a -> b) -> a -> b
$ ParsecT ByteString () Identity Char
hexletter) ParsecT ByteString () Identity [SourceName]
-> ParsecT ByteString () Identity Char
-> ParsecT ByteString () Identity [SourceName]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT ByteString () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'>'
                 ]
  Parser ()
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
spaces
  SourceName -> ParsecT ByteString () Identity SourceName
forall (m :: * -> *) a. Monad m => a -> m a
return (SourceName -> ParsecT ByteString () Identity SourceName)
-> SourceName -> ParsecT ByteString () Identity SourceName
forall a b. (a -> b) -> a -> b
$ [SourceName] -> SourceName
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [SourceName]
lets

hexletter :: Parser Char
hexletter :: ParsecT ByteString () Identity Char
hexletter = SourceName -> ParsecT ByteString () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
SourceName -> ParsecT s u m Char
oneOf SourceName
"0123456789ABCDEFabcdef"

hexletterArray :: Parser String
hexletterArray :: ParsecT ByteString () Identity SourceName
hexletterArray = do
  Char -> ParsecT ByteString () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'['
  Parser ()
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
spaces
  [SourceName]
lets <- ParsecT ByteString () Identity SourceName
-> ParsecT ByteString () Identity Char
-> ParsecT ByteString () Identity [SourceName]
forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
manyTill ParsecT ByteString () Identity SourceName
hexletters (ParsecT ByteString () Identity Char
-> ParsecT ByteString () Identity Char
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (ParsecT ByteString () Identity Char
 -> ParsecT ByteString () Identity Char)
-> ParsecT ByteString () Identity Char
-> ParsecT ByteString () Identity Char
forall a b. (a -> b) -> a -> b
$ Parser ()
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
spaces Parser ()
-> ParsecT ByteString () Identity Char
-> ParsecT ByteString () Identity Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> ParsecT ByteString () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
']')
  Parser ()
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
spaces
  SourceName -> ParsecT ByteString () Identity SourceName
forall (m :: * -> *) a. Monad m => a -> m a
return (SourceName -> ParsecT ByteString () Identity SourceName)
-> SourceName -> ParsecT ByteString () Identity SourceName
forall a b. (a -> b) -> a -> b
$ SourceName -> [SourceName] -> SourceName
forall a. [a] -> [[a]] -> [a]
intercalate SourceName
"\n" [SourceName]
lets