module Text.LaTeX.Base.Parser (
parseLaTeX
, parseLaTeXFile
, latexParser
, latexBlockParser
, ParseError
, errorPos
, errorMessages
, Message (..)
, messageString
, SourcePos
, sourceLine
, sourceColumn
, sourceName
) where
import Text.Parsec hiding ((<|>),many)
import Text.Parsec.Text
import Text.Parsec.Error
import Data.Char (toLower,digitToInt)
import Data.Monoid
import Data.Maybe (fromMaybe)
import qualified Data.Text as T
import Control.Applicative
import Control.Monad (unless)
import Text.LaTeX.Base.Syntax
import Text.LaTeX.Base.Render
parseLaTeX :: Text -> Either ParseError LaTeX
parseLaTeX t | T.null t = return TeXEmpty
| otherwise = parse latexParser "parseLaTeX input" t
parseLaTeXFile :: FilePath -> IO (Either ParseError LaTeX)
parseLaTeXFile fp = parse latexParser fp <$> readFileTex fp
latexParser :: Parser LaTeX
latexParser = mconcat <$> latexBlockParser `manyTill` eof
latexBlockParser :: Parser LaTeX
latexBlockParser = foldr1 (<|>)
[ text <?> "text"
, dolMath <?> "inline math ($)"
, comment <?> "comment"
, text2 <?> "text2"
, try environment <?> "environment"
, command <?> "command"
]
text :: Parser LaTeX
text = do
mbC <- peekChar
case mbC of
Nothing -> fail "text: Empty input."
Just c | c `elem` "$%\\{]}" -> fail "not text"
| otherwise -> TeXRaw <$> takeTill (`elem` "$%\\{]}")
text2 :: Parser LaTeX
text2 = do
_ <- char ']'
t <- try (text <|> return (TeXRaw T.empty))
return $ TeXRaw (T.pack "]") <> t
environment :: Parser LaTeX
environment = anonym <|> env
anonym :: Parser LaTeX
anonym = do
_ <- char '{'
l <- TeXBraces . mconcat <$> many latexBlockParser
_ <- char '}'
return l
env :: Parser LaTeX
env = do
n <- char '\\' *> envName "begin"
sps <- many $ char ' '
let lsps = if null sps then mempty else TeXRaw $ T.pack sps
as <- cmdArgs
b <- envBody n
return $ TeXEnv n (fromMaybe [] as) $
case as of
Just [] -> lsps <> TeXBraces mempty <> b
Nothing -> lsps <> b
_ -> b
envName :: String -> Parser String
envName k = do
_ <- string k
_ <- char '{'
n <- takeTill (== '}')
_ <- char '}'
return $ T.unpack n
envBody :: String -> Parser LaTeX
envBody n = mconcat <$> (bodyBlock n) `manyTill` endenv
where endenv = try $ string ("\\end") >> spaces >> string ("{" <> n <> "}")
bodyBlock :: String -> Parser LaTeX
bodyBlock n = do
c <- peekChar
case c of
Just _ -> latexBlockParser
_ -> fail $ "Environment '" <> n <> "' not finalized."
command :: Parser LaTeX
command = do
_ <- char '\\'
mbX <- peekChar
case mbX of
Nothing -> return TeXEmpty
Just x -> if isSpecial x
then special
else do
c <- takeTill endCmd
maybe (TeXCommS $ T.unpack c) (TeXComm $ T.unpack c) <$> cmdArgs
cmdArgs :: Parser (Maybe [TeXArg])
cmdArgs = try (string "{}" >> return (Just []))
<|> fmap Just (many1 cmdArg)
<|> return Nothing
cmdArg :: Parser TeXArg
cmdArg = do
c <- char '[' <|> char '{'
let e = case c of
'[' -> "]"
'{' -> "}"
_ -> error "this cannot happen!"
b <- mconcat <$> manyTill latexBlockParser (string e)
case c of
'[' -> return $ OptArg b
'{' -> return $ FixArg b
_ -> error "this cannot happen!"
special :: Parser LaTeX
special = do
x <- anyChar
case x of
'(' -> math Parentheses "\\)"
'[' -> math Square "\\]"
'{' -> lbrace
'}' -> rbrace
'|' -> vert
'\\' -> lbreak
_ -> commS [x]
lbreak :: Parser LaTeX
lbreak = do
y <- try (char '[' <|> char '*' <|> return ' ')
case y of
'[' -> linebreak False
'*' -> do z <- try (char '[' <|> return ' ')
case z of
'[' -> linebreak True
_ -> return (TeXLineBreak Nothing True)
_ -> return (TeXLineBreak Nothing False)
linebreak :: Bool -> Parser LaTeX
linebreak t = do m <- measure <?> "measure"
_ <- char ']'
s <- try (char '*' <|> return ' ')
return $ TeXLineBreak (Just m) (t || s == '*')
measure :: Parser Measure
measure = try (floating >>= unit) <|> (CustomMeasure . mconcat) <$> manyTill latexBlockParser (lookAhead $ char ']')
unit :: Double -> Parser Measure
unit f = do
u1 <- anyChar
u2 <- anyChar
case map toLower [u1, u2] of
"pt" -> return $ Pt f
"mm" -> return $ Mm f
"cm" -> return $ Cm f
"in" -> return $ In f
"ex" -> return $ Ex f
"em" -> return $ Em f
_ -> fail "NaN"
rbrace, lbrace,vert :: Parser LaTeX
lbrace = brace "{"
rbrace = brace "}"
vert = brace "|"
brace :: String -> Parser LaTeX
brace = return . TeXCommS
commS :: String -> Parser LaTeX
commS = return . TeXCommS
dolMath :: Parser LaTeX
dolMath = do
_ <- char '$'
b <- mconcat <$> latexBlockParser `manyTill` char '$'
return $ TeXMath Dollar b
math :: MathType -> String -> Parser LaTeX
math t eMath = do
b <- mconcat <$> latexBlockParser `manyTill` try (string eMath)
return $ TeXMath t b
comment :: Parser LaTeX
comment = do
_ <- char '%'
c <- takeTill (== '\n')
e <- atEnd
unless e (char '\n' >>= \_ -> return ())
return $ TeXComment c
isSpecial :: Char -> Bool
isSpecial = (`elem` specials)
endCmd :: Char -> Bool
endCmd c = notLowercaseAlph && notUppercaseAlph
where c' = fromEnum c
notLowercaseAlph = c' < fromEnum 'a' || c' > fromEnum 'z'
notUppercaseAlph = c' < fromEnum 'A' || c' > fromEnum 'Z'
specials :: String
specials = "'(),.-\"!^$&#{}%~|/:;=[]\\` "
peekChar :: Parser (Maybe Char)
peekChar = Just <$> (try $ lookAhead anyChar) <|> pure Nothing
atEnd :: Parser Bool
atEnd = (eof *> pure True) <|> pure False
takeTill :: (Char -> Bool) -> Parser Text
takeTill p = T.pack <$> many (satisfy (not . p))
floating :: Parser Double
floating = decimal <**> fractExponent
fractExponent :: Parser (Integer -> Double)
fractExponent = (\fract expo n -> (fromInteger n + fract) * expo) <$> fraction <*> option 1.0 exponent'
<|> (\expo n -> fromInteger n * expo) <$> exponent' where
fraction = foldr op 0.0 <$> (char '.' *> (some digit <?> "fraction"))
op d f = (f + fromIntegral (digitToInt d))/10.0
exponent' = ((\f e -> power (f e)) <$ oneOf "eE" <*> sign <*> (decimal <?> "exponent")) <?> "exponent"
power e
| e < 0 = 1.0/power(e)
| otherwise = fromInteger (10^e)
decimal :: Parser Integer
decimal = read <$> many1 digit
sign :: Parser (Integer -> Integer)
sign = negate <$ char '-'
<|> id <$ char '+'
<|> pure id