{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE ExplicitForAll #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE IncoherentInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TypeSynonymInstances #-}
module Text.Pandoc.Parsing ( takeWhileP,
takeP,
anyLine,
anyLineNewline,
indentWith,
many1Till,
manyUntil,
sepBy1',
notFollowedBy',
oneOfStrings,
oneOfStringsCI,
spaceChar,
nonspaceChar,
skipSpaces,
blankline,
blanklines,
gobbleSpaces,
gobbleAtMostSpaces,
enclosed,
stringAnyCase,
parseFromString,
parseFromString',
lineClump,
charsInBalanced,
romanNumeral,
emailAddress,
uri,
mathInline,
mathDisplay,
withHorizDisplacement,
withRaw,
escaped,
characterReference,
upperRoman,
lowerRoman,
decimal,
lowerAlpha,
upperAlpha,
anyOrderedListMarker,
orderedListMarker,
charRef,
lineBlockLines,
tableWith,
widthsFromIndices,
gridTableWith,
gridTableWith',
readWith,
readWithM,
testStringWith,
guardEnabled,
guardDisabled,
updateLastStrPos,
notAfterString,
logMessage,
reportLogMessages,
ParserState (..),
HasReaderOptions (..),
HasHeaderMap (..),
HasIdentifierList (..),
HasMacros (..),
HasLogMessages (..),
HasLastStrPosition (..),
HasIncludeFiles (..),
defaultParserState,
HeaderType (..),
ParserContext (..),
QuoteContext (..),
HasQuoteContext (..),
NoteTable,
NoteTable',
KeyTable,
SubstTable,
Key (..),
toKey,
registerHeader,
smartPunctuation,
singleQuoteStart,
singleQuoteEnd,
doubleQuoteStart,
doubleQuoteEnd,
ellipses,
apostrophe,
dash,
nested,
citeKey,
Parser,
ParserT,
F,
Future(..),
runF,
askF,
asksF,
returnF,
trimInlinesF,
token,
(<+?>),
extractIdClass,
insertIncludedFile,
insertIncludedFileF,
Stream,
runParser,
runParserT,
parse,
tokenPrim,
anyToken,
getInput,
setInput,
unexpected,
char,
letter,
digit,
alphaNum,
skipMany,
skipMany1,
spaces,
space,
anyChar,
satisfy,
newline,
string,
count,
eof,
noneOf,
oneOf,
lookAhead,
notFollowedBy,
many,
many1,
manyTill,
(<|>),
(<?>),
choice,
try,
sepBy,
sepBy1,
sepEndBy,
sepEndBy1,
endBy,
endBy1,
option,
optional,
optionMaybe,
getState,
setState,
updateState,
SourcePos,
getPosition,
setPosition,
sourceColumn,
sourceLine,
setSourceColumn,
setSourceLine,
incSourceColumn,
newPos,
Line,
Column
)
where
import Prelude
import Control.Monad.Identity
import Control.Monad.Reader
import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper,
isPunctuation, isSpace, ord, toLower, toUpper)
import Data.Default
import Data.List (intercalate, isSuffixOf, transpose)
import qualified Data.Map as M
import Data.Maybe (mapMaybe, fromMaybe)
import qualified Data.Set as Set
import Data.String
import Data.Text (Text)
import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc.Asciify (toAsciiChar)
import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad, readFileFromDirs, report)
import Text.Pandoc.Definition
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Readers.LaTeX.Types (Macro)
import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.Pandoc.XML (fromEntities)
import Text.Parsec hiding (token)
import Text.Parsec.Pos (initialPos, newPos, updatePosString)
import Control.Monad.Except
import Text.Pandoc.Error
type Parser t s = Parsec t s
type ParserT = ParsecT
newtype Future s a = Future { runDelayed :: Reader s a }
deriving (Monad, Applicative, Functor)
type F = Future ParserState
runF :: Future s a -> s -> a
runF = runReader . runDelayed
askF :: Future s s
askF = Future ask
asksF :: (s -> a) -> Future s a
asksF f = Future $ asks f
returnF :: Monad m => a -> m (Future s a)
returnF = return . return
trimInlinesF :: Future s Inlines -> Future s Inlines
trimInlinesF = liftM trimInlines
instance Semigroup a => Semigroup (Future s a) where
(<>) = liftM2 (<>)
instance (Semigroup a, Monoid a) => Monoid (Future s a) where
mempty = return mempty
mappend = (<>)
takeWhileP :: Monad m
=> (Char -> Bool)
-> ParserT [Char] st m [Char]
takeWhileP f = do
inp <- getInput
pos <- getPosition
let (xs, rest) = span f inp
anyChar
setInput rest
setPosition $ updatePosString pos xs
return xs
takeP :: Monad m => Int -> ParserT [Char] st m [Char]
takeP n = do
guard (n > 0)
inp <- getInput
pos <- getPosition
let (xs, rest) = splitAt n inp
anyChar
setInput rest
setPosition $ updatePosString pos xs
return xs
anyLine :: Monad m => ParserT [Char] st m [Char]
anyLine = do
inp <- getInput
pos <- getPosition
case break (=='\n') inp of
(this, '\n':rest) -> do
anyChar
setInput rest
setPosition $ incSourceLine (setSourceColumn pos 1) 1
return this
_ -> mzero
anyLineNewline :: Monad m => ParserT [Char] st m [Char]
anyLineNewline = (++ "\n") <$> anyLine
indentWith :: Stream s m Char
=> HasReaderOptions st
=> Int -> ParserT s st m [Char]
indentWith num = do
tabStop <- getOption readerTabStop
if num < tabStop
then count num (char ' ')
else choice [ try (count num (char ' '))
, try (char '\t' >> indentWith (num - tabStop)) ]
many1Till :: (Show end, Stream s m t)
=> ParserT s st m a
-> ParserT s st m end
-> ParserT s st m [a]
many1Till p end = do
notFollowedBy' end
first <- p
rest <- manyTill p end
return (first:rest)
manyUntil :: (Stream s m t)
=> ParserT s u m a
-> ParserT s u m b
-> ParserT s u m ([a], b)
manyUntil p end = scan
where scan =
(do e <- end
return ([], e)
) <|>
(do x <- p
(xs, e) <- scan
return (x:xs, e))
sepBy1' :: (Stream s m t)
=> ParsecT s u m a
-> ParsecT s u m sep
-> ParsecT s u m [a]
sepBy1' p sep = (:) <$> p <*> many (try $ sep >> p)
notFollowedBy' :: (Show b, Stream s m a) => ParserT s st m b -> ParserT s st m ()
notFollowedBy' p = try $ join $ do a <- try p
return (unexpected (show a))
<|>
return (return ())
oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
oneOfStrings' _ [] = fail "no strings"
oneOfStrings' matches strs = try $ do
c <- anyChar
let strs' = [xs | (x:xs) <- strs, x `matches` c]
case strs' of
[] -> fail "not found"
_ -> (c:) <$> oneOfStrings' matches strs'
<|> if "" `elem` strs'
then return [c]
else fail "not found"
oneOfStrings :: Stream s m Char => [String] -> ParserT s st m String
oneOfStrings = oneOfStrings' (==)
oneOfStringsCI :: Stream s m Char => [String] -> ParserT s st m String
oneOfStringsCI = oneOfStrings' ciMatch
where ciMatch x y = toLower' x == toLower' y
toLower' c | isAsciiUpper c = chr (ord c + 32)
| isAscii c = c
| otherwise = toLower c
spaceChar :: Stream s m Char => ParserT s st m Char
spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
nonspaceChar :: Stream s m Char => ParserT s st m Char
nonspaceChar = noneOf ['\t', '\n', ' ', '\r']
skipSpaces :: Stream s m Char => ParserT s st m ()
skipSpaces = skipMany spaceChar
blankline :: Stream s m Char => ParserT s st m Char
blankline = try $ skipSpaces >> newline
blanklines :: Stream s m Char => ParserT s st m [Char]
blanklines = many1 blankline
gobbleSpaces :: (HasReaderOptions st, Monad m)
=> Int -> ParserT [Char] st m ()
gobbleSpaces 0 = return ()
gobbleSpaces n
| n < 0 = error "gobbleSpaces called with negative number"
| otherwise = try $ do
char ' ' <|> eatOneSpaceOfTab
gobbleSpaces (n - 1)
eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Char
eatOneSpaceOfTab = do
char '\t'
tabstop <- getOption readerTabStop
inp <- getInput
setInput $ replicate (tabstop - 1) ' ' ++ inp
return ' '
gobbleAtMostSpaces :: (HasReaderOptions st, Monad m)
=> Int -> ParserT [Char] st m Int
gobbleAtMostSpaces 0 = return 0
gobbleAtMostSpaces n
| n < 0 = error "gobbleAtMostSpaces called with negative number"
| otherwise = option 0 $ do
char ' ' <|> eatOneSpaceOfTab
(+ 1) <$> gobbleAtMostSpaces (n - 1)
enclosed :: (Show end, Stream s m Char) => ParserT s st m t
-> ParserT s st m end
-> ParserT s st m a
-> ParserT s st m [a]
enclosed start end parser = try $
start >> notFollowedBy space >> many1Till parser end
stringAnyCase :: Stream s m Char => [Char] -> ParserT s st m String
stringAnyCase [] = string ""
stringAnyCase (x:xs) = do
firstChar <- char (toUpper x) <|> char (toLower x)
rest <- stringAnyCase xs
return (firstChar:rest)
parseFromString :: (Monad m, Stream s m Char, IsString s)
=> ParserT s st m r
-> String
-> ParserT s st m r
parseFromString parser str = do
oldPos <- getPosition
setPosition $ initialPos "chunk"
oldInput <- getInput
setInput $ fromString str
result <- parser
spaces
eof
setInput oldInput
setPosition oldPos
return result
parseFromString' :: (Monad m, Stream s m Char, IsString s)
=> ParserT s ParserState m a
-> String
-> ParserT s ParserState m a
parseFromString' parser str = do
oldStrPos <- stateLastStrPos <$> getState
res <- parseFromString parser str
updateState $ \st -> st{ stateLastStrPos = oldStrPos }
return res
lineClump :: Monad m => ParserT [Char] st m String
lineClump = blanklines
<|> (unlines <$> many1 (notFollowedBy blankline >> anyLine))
charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char
-> ParserT s st m String
charsInBalanced open close parser = try $ do
char open
let isDelim c = c == open || c == close
raw <- many $ many1 (notFollowedBy (satisfy isDelim) >> parser)
<|> (do res <- charsInBalanced open close parser
return $ [open] ++ res ++ [close])
char close
return $ concat raw
romanNumeral :: Stream s m Char => Bool
-> ParserT s st m Int
romanNumeral upperCase = do
let rchar uc = char $ if upperCase then uc else toLower uc
let one = rchar 'I'
let five = rchar 'V'
let ten = rchar 'X'
let fifty = rchar 'L'
let hundred = rchar 'C'
let fivehundred = rchar 'D'
let thousand = rchar 'M'
lookAhead $ choice [one, five, ten, fifty, hundred, fivehundred, thousand]
thousands <- ((1000 *) . length) <$> many thousand
ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
fivehundreds <- option 0 $ 500 <$ fivehundred
fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
hundreds <- ((100 *) . length) <$> many hundred
nineties <- option 0 $ try $ ten >> hundred >> return 90
fifties <- option 0 (50 <$ fifty)
forties <- option 0 $ try $ ten >> fifty >> return 40
tens <- ((10 *) . length) <$> many ten
nines <- option 0 $ try $ one >> ten >> return 9
fives <- option 0 (5 <$ five)
fours <- option 0 $ try $ one >> five >> return 4
ones <- length <$> many one
let total = thousands + ninehundreds + fivehundreds + fourhundreds +
hundreds + nineties + fifties + forties + tens + nines +
fives + fours + ones
if total == 0
then fail "not a roman numeral"
else return total
emailAddress :: Stream s m Char => ParserT s st m (String, String)
emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
where toResult mbox dom = let full = fromEntities $ mbox ++ '@':dom
in (full, escapeURI $ "mailto:" ++ full)
mailbox = intercalate "." <$> (emailWord `sepBy1'` dot)
domain = intercalate "." <$> (subdomain `sepBy1'` dot)
dot = char '.'
subdomain = many1 $ alphaNum <|> innerPunct
innerPunct = try (satisfy (\c -> isEmailPunct c || c == '@')
<* notFollowedBy space
<* notFollowedBy (satisfy isPunctuation))
emailWord = do x <- satisfy isAlphaNum
xs <- many (satisfy isEmailChar)
return (x:xs)
isEmailChar c = isAlphaNum c || isEmailPunct c
isEmailPunct c = c `elem` "!\"#$%&'*+-/=?^_{|}~;"
uriScheme :: Stream s m Char => ParserT s st m String
uriScheme = oneOfStringsCI (Set.toList schemes)
uri :: Stream s m Char => ParserT s st m (String, String)
uri = try $ do
scheme <- uriScheme
char ':'
notFollowedBy (oneOf "*_]")
str <- concat <$> many1 (uriChunkBetween '(' ')'
<|> uriChunkBetween '{' '}'
<|> uriChunkBetween '[' ']'
<|> uriChunk)
str' <- option str $ char '/' >> return (str ++ "/")
let uri' = scheme ++ ":" ++ fromEntities str'
return (uri', escapeURI uri')
where
wordChar = alphaNum <|> oneOf "#$%+/@\\_-&="
percentEscaped = try $ (:) <$> char '%' <*> many1 hexDigit
entity = try $ pure <$> characterReference
punct = try $ many1 (char ',') <|> fmap pure (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>'))
uriChunk = many1 wordChar
<|> percentEscaped
<|> entity
<|> try (punct <* lookAhead (void wordChar <|> void percentEscaped))
uriChunkBetween l r = try $ do chunk <- between (char l) (char r) uriChunk
return ([l] ++ chunk ++ [r])
mathInlineWith :: Stream s m Char => String -> String -> ParserT s st m String
mathInlineWith op cl = try $ do
string op
when (op == "$") $ notFollowedBy space
words' <- many1Till (count 1 (noneOf " \t\n\\")
<|> (char '\\' >>
(try (string "text" >>
(("\\text" ++) <$> inBalancedBraces 0 ""))
<|> (\c -> ['\\',c]) <$> anyChar))
<|> do (blankline <* notFollowedBy' blankline) <|>
(oneOf " \t" <* skipMany (oneOf " \t"))
notFollowedBy (char '$')
return " "
) (try $ string cl)
notFollowedBy digit
return $ trimMath $ concat words'
where
inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String
inBalancedBraces 0 "" = do
c <- anyChar
if c == '{'
then inBalancedBraces 1 "{"
else mzero
inBalancedBraces 0 s = return $ reverse s
inBalancedBraces numOpen ('\\':xs) = do
c <- anyChar
inBalancedBraces numOpen (c:'\\':xs)
inBalancedBraces numOpen xs = do
c <- anyChar
case c of
'}' -> inBalancedBraces (numOpen - 1) (c:xs)
'{' -> inBalancedBraces (numOpen + 1) (c:xs)
_ -> inBalancedBraces numOpen (c:xs)
mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String
mathDisplayWith op cl = try $ do
string op
many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ string cl)
mathDisplay :: (HasReaderOptions st, Stream s m Char)
=> ParserT s st m String
mathDisplay =
(guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
<|> (guardEnabled Ext_tex_math_single_backslash >>
mathDisplayWith "\\[" "\\]")
<|> (guardEnabled Ext_tex_math_double_backslash >>
mathDisplayWith "\\\\[" "\\\\]")
mathInline :: (HasReaderOptions st , Stream s m Char)
=> ParserT s st m String
mathInline =
(guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
<|> (guardEnabled Ext_tex_math_single_backslash >>
mathInlineWith "\\(" "\\)")
<|> (guardEnabled Ext_tex_math_double_backslash >>
mathInlineWith "\\\\(" "\\\\)")
withHorizDisplacement :: Stream s m Char
=> ParserT s st m a
-> ParserT s st m (a, Int)
withHorizDisplacement parser = do
pos1 <- getPosition
result <- parser
pos2 <- getPosition
return (result, sourceColumn pos2 - sourceColumn pos1)
withRaw :: Monad m
=> ParsecT [Char] st m a
-> ParsecT [Char] st m (a, [Char])
withRaw parser = do
pos1 <- getPosition
inp <- getInput
result <- parser
pos2 <- getPosition
let (l1,c1) = (sourceLine pos1, sourceColumn pos1)
let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
let inplines = take ((l2 - l1) + 1) $ lines inp
let raw = case inplines of
[] -> ""
[l] -> take (c2 - c1) l
ls -> unlines (init ls) ++ take (c2 - 1) (last ls)
return (result, raw)
escaped :: Stream s m Char
=> ParserT s st m Char
-> ParserT s st m Char
escaped parser = try $ char '\\' >> parser
characterReference :: Stream s m Char => ParserT s st m Char
characterReference = try $ do
char '&'
ent <- many1Till nonspaceChar (char ';')
let ent' = case ent of
'#':'X':xs -> '#':'x':xs
'#':_ -> ent
_ -> ent ++ ";"
case lookupEntity ent' of
Just (c : _) -> return c
_ -> fail "entity not found"
upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
upperRoman = do
num <- romanNumeral True
return (UpperRoman, num)
lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
lowerRoman = do
num <- romanNumeral False
return (LowerRoman, num)
decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
decimal = do
num <- many1 digit
return (Decimal, read num)
exampleNum :: Stream s m Char
=> ParserT s ParserState m (ListNumberStyle, Int)
exampleNum = do
char '@'
lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-'))
st <- getState
let num = stateNextExample st
let newlabels = if null lab
then stateExamples st
else M.insert lab num $ stateExamples st
updateState $ \s -> s{ stateNextExample = num + 1
, stateExamples = newlabels }
return (Example, num)
defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
defaultNum = do
char '#'
return (DefaultStyle, 1)
lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
lowerAlpha = do
ch <- oneOf ['a'..'z']
return (LowerAlpha, ord ch - ord 'a' + 1)
upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
upperAlpha = do
ch <- oneOf ['A'..'Z']
return (UpperAlpha, ord ch - ord 'A' + 1)
romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
(char 'I' >> return (UpperRoman, 1))
anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
anyOrderedListMarker = choice
[delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
numParser <- [decimal, exampleNum, defaultNum, romanOne,
lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
inPeriod :: Stream s m Char
=> ParserT s st m (ListNumberStyle, Int)
-> ParserT s st m ListAttributes
inPeriod num = try $ do
(style, start) <- num
char '.'
let delim = if style == DefaultStyle
then DefaultDelim
else Period
return (start, style, delim)
inOneParen :: Stream s m Char
=> ParserT s st m (ListNumberStyle, Int)
-> ParserT s st m ListAttributes
inOneParen num = try $ do
(style, start) <- num
char ')'
return (start, style, OneParen)
inTwoParens :: Stream s m Char
=> ParserT s st m (ListNumberStyle, Int)
-> ParserT s st m ListAttributes
inTwoParens num = try $ do
char '('
(style, start) <- num
char ')'
return (start, style, TwoParens)
orderedListMarker :: Stream s m Char
=> ListNumberStyle
-> ListNumberDelim
-> ParserT s ParserState m Int
orderedListMarker style delim = do
let num = defaultNum <|>
case style of
DefaultStyle -> decimal
Example -> exampleNum
Decimal -> decimal
UpperRoman -> upperRoman
LowerRoman -> lowerRoman
UpperAlpha -> upperAlpha
LowerAlpha -> lowerAlpha
let context = case delim of
DefaultDelim -> inPeriod
Period -> inPeriod
OneParen -> inOneParen
TwoParens -> inTwoParens
(start, _, _) <- context num
return start
charRef :: Stream s m Char => ParserT s st m Inline
charRef = do
c <- characterReference
return $ Str [c]
lineBlockLine :: Monad m => ParserT [Char] st m String
lineBlockLine = try $ do
char '|'
char ' '
white <- many (spaceChar >> return '\160')
notFollowedBy newline
line <- anyLine
continuations <- many (try $ char ' ' >> anyLine)
return $ white ++ unwords (line : continuations)
blankLineBlockLine :: Stream s m Char => ParserT s st m Char
blankLineBlockLine = try (char '|' >> blankline)
lineBlockLines :: Monad m => ParserT [Char] st m [String]
lineBlockLines = try $ do
lines' <- many1 (lineBlockLine <|> ((:[]) <$> blankLineBlockLine))
skipMany blankline
return lines'
tableWith :: (Stream s m Char, HasReaderOptions st,
Functor mf, Applicative mf, Monad mf)
=> ParserT s st m (mf [Blocks], [Alignment], [Int])
-> ([Int] -> ParserT s st m (mf [Blocks]))
-> ParserT s st m sep
-> ParserT s st m end
-> ParserT s st m (mf Blocks)
tableWith headerParser rowParser lineParser footerParser = try $ do
(aligns, widths, heads, rows) <- tableWith' headerParser rowParser
lineParser footerParser
return $ B.table mempty (zip aligns widths) <$> heads <*> rows
type TableComponents mf = ([Alignment], [Double], mf [Blocks], mf [[Blocks]])
tableWith' :: (Stream s m Char, HasReaderOptions st,
Functor mf, Applicative mf, Monad mf)
=> ParserT s st m (mf [Blocks], [Alignment], [Int])
-> ([Int] -> ParserT s st m (mf [Blocks]))
-> ParserT s st m sep
-> ParserT s st m end
-> ParserT s st m (TableComponents mf)
tableWith' headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser
footerParser
numColumns <- getOption readerColumns
let widths = if null indices
then replicate (length aligns) 0.0
else widthsFromIndices numColumns indices
return (aligns, widths, heads, lines')
widthsFromIndices :: Int
-> [Int]
-> [Double]
widthsFromIndices _ [] = []
widthsFromIndices numColumns' indices =
let numColumns = max numColumns' (if null indices then 0 else last indices)
lengths' = zipWith (-) indices (0:indices)
lengths = reverse $
case reverse lengths' of
[] -> []
[x] -> [x]
(x:y:zs) -> if x < y && y - x <= 2
then y:y:zs
else x:y:zs
totLength = sum lengths
quotient = if totLength > numColumns
then fromIntegral totLength
else fromIntegral numColumns
fracs = map (\l -> fromIntegral l / quotient) lengths in
tail fracs
gridTableWith :: (Stream s m Char, HasReaderOptions st,
Functor mf, Applicative mf, Monad mf, IsString s)
=> ParserT s st m (mf Blocks)
-> Bool
-> ParserT s st m (mf Blocks)
gridTableWith blocks headless =
tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
(gridTableSep '-') gridTableFooter
gridTableWith' :: (Stream s m Char, HasReaderOptions st,
Functor mf, Applicative mf, Monad mf, IsString s)
=> ParserT s st m (mf Blocks)
-> Bool
-> ParserT s st m (TableComponents mf)
gridTableWith' blocks headless =
tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
(gridTableSep '-') gridTableFooter
gridTableSplitLine :: [Int] -> String -> [String]
gridTableSplitLine indices line = map removeFinalBar $ tail $
splitStringByIndices (init indices) $ trimr line
gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment)
gridPart ch = do
leftColon <- option False (True <$ char ':')
dashes <- many1 (char ch)
rightColon <- option False (True <$ char ':')
char '+'
let lengthDashes = length dashes + (if leftColon then 1 else 0) +
(if rightColon then 1 else 0)
let alignment = case (leftColon, rightColon) of
(True, True) -> AlignCenter
(True, False) -> AlignLeft
(False, True) -> AlignRight
(False, False) -> AlignDefault
return ((lengthDashes, lengthDashes + 1), alignment)
gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)]
gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
removeFinalBar :: String -> String
removeFinalBar =
reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
gridTableSep :: Stream s m Char => Char -> ParserT s st m Char
gridTableSep ch = try $ gridDashedLines ch >> return '\n'
gridTableHeader :: (Stream s m Char, Functor mf, Applicative mf, Monad mf,
IsString s)
=> Bool
-> ParserT s st m (mf Blocks)
-> ParserT s st m (mf [Blocks], [Alignment], [Int])
gridTableHeader headless blocks = try $ do
optional blanklines
dashes <- gridDashedLines '-'
rawContent <- if headless
then return $ repeat ""
else many1
(notFollowedBy (gridTableSep '=') >> char '|' >>
many1Till anyChar newline)
underDashes <- if headless
then return dashes
else gridDashedLines '='
guard $ length dashes == length underDashes
let lines' = map (snd . fst) underDashes
let indices = scanl (+) 0 lines'
let aligns = map snd underDashes
let rawHeads = if headless
then replicate (length underDashes) ""
else map (unlines . map trim) $ transpose
$ map (gridTableSplitLine indices) rawContent
heads <- sequence <$> mapM (parseFromString blocks . trim) rawHeads
return (heads, aligns, indices)
gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [String]
gridTableRawLine indices = do
char '|'
line <- many1Till anyChar newline
return (gridTableSplitLine indices line)
gridTableRow :: (Stream s m Char, Functor mf, Applicative mf, Monad mf,
IsString s)
=> ParserT s st m (mf Blocks)
-> [Int]
-> ParserT s st m (mf [Blocks])
gridTableRow blocks indices = do
colLines <- many1 (gridTableRawLine indices)
let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
transpose colLines
compactifyCell bs = case compactify [bs] of
[] -> mempty
x:_ -> x
cells <- sequence <$> mapM (parseFromString blocks) cols
return $ fmap (map compactifyCell) cells
removeOneLeadingSpace :: [String] -> [String]
removeOneLeadingSpace xs =
if all startsWithSpace xs
then map (drop 1) xs
else xs
where startsWithSpace "" = True
startsWithSpace (y:_) = y == ' '
gridTableFooter :: Stream s m Char => ParserT s st m [Char]
gridTableFooter = blanklines
readWithM :: (Monad m, Stream s m Char, ToString s)
=> ParserT s st m a
-> st
-> s
-> m (Either PandocError a)
readWithM parser state input =
mapLeft (PandocParsecError $ toString input) `liftM` runParserT parser state "source" input
readWith :: Parser [Char] st a
-> st
-> String
-> Either PandocError a
readWith p t inp = runIdentity $ readWithM p t inp
testStringWith :: Show a
=> ParserT [Char] ParserState Identity a
-> [Char]
-> IO ()
testStringWith parser str = UTF8.putStrLn $ show $
readWith parser defaultParserState str
data ParserState = ParserState
{ stateOptions :: ReaderOptions,
stateParserContext :: ParserContext,
stateQuoteContext :: QuoteContext,
stateAllowLinks :: Bool,
stateMaxNestingLevel :: Int,
stateLastStrPos :: Maybe SourcePos,
stateKeys :: KeyTable,
stateHeaderKeys :: KeyTable,
stateSubstitutions :: SubstTable,
stateNotes :: NoteTable,
stateNotes' :: NoteTable',
stateNoteRefs :: Set.Set String,
stateMeta :: Meta,
stateMeta' :: F Meta,
stateCitations :: M.Map String String,
stateHeaderTable :: [HeaderType],
stateHeaders :: M.Map Inlines String,
stateIdentifiers :: Set.Set String,
stateNextExample :: Int,
stateExamples :: M.Map String Int,
stateMacros :: M.Map Text Macro,
stateRstDefaultRole :: String,
stateRstCustomRoles :: M.Map String (String, Maybe String, Attr),
stateCaption :: Maybe Inlines,
stateInHtmlBlock :: Maybe String,
stateFencedDivLevel :: Int,
stateContainers :: [String],
stateLogMessages :: [LogMessage],
stateMarkdownAttribute :: Bool
}
instance Default ParserState where
def = defaultParserState
instance HasMeta ParserState where
setMeta field val st =
st{ stateMeta = setMeta field val $ stateMeta st }
deleteMeta field st =
st{ stateMeta = deleteMeta field $ stateMeta st }
class HasReaderOptions st where
extractReaderOptions :: st -> ReaderOptions
getOption :: (Stream s m t) => (ReaderOptions -> b) -> ParserT s st m b
getOption f = (f . extractReaderOptions) <$> getState
instance HasReaderOptions ParserState where
extractReaderOptions = stateOptions
class HasQuoteContext st m where
getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext
withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a
instance Monad m => HasQuoteContext ParserState m where
getQuoteContext = stateQuoteContext <$> getState
withQuoteContext context parser = do
oldState <- getState
let oldQuoteContext = stateQuoteContext oldState
setState oldState { stateQuoteContext = context }
result <- parser
newState <- getState
setState newState { stateQuoteContext = oldQuoteContext }
return result
class HasHeaderMap st where
extractHeaderMap :: st -> M.Map Inlines String
updateHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) ->
st -> st
instance HasHeaderMap ParserState where
extractHeaderMap = stateHeaders
updateHeaderMap f st = st{ stateHeaders = f $ stateHeaders st }
class HasIdentifierList st where
extractIdentifierList :: st -> Set.Set String
updateIdentifierList :: (Set.Set String -> Set.Set String) -> st -> st
instance HasIdentifierList ParserState where
extractIdentifierList = stateIdentifiers
updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st }
class HasMacros st where
extractMacros :: st -> M.Map Text Macro
updateMacros :: (M.Map Text Macro -> M.Map Text Macro) -> st -> st
instance HasMacros ParserState where
extractMacros = stateMacros
updateMacros f st = st{ stateMacros = f $ stateMacros st }
class HasLastStrPosition st where
setLastStrPos :: SourcePos -> st -> st
getLastStrPos :: st -> Maybe SourcePos
instance HasLastStrPosition ParserState where
setLastStrPos pos st = st{ stateLastStrPos = Just pos }
getLastStrPos st = stateLastStrPos st
class HasLogMessages st where
addLogMessage :: LogMessage -> st -> st
getLogMessages :: st -> [LogMessage]
instance HasLogMessages ParserState where
addLogMessage msg st = st{ stateLogMessages = msg : stateLogMessages st }
getLogMessages st = reverse $ stateLogMessages st
class HasIncludeFiles st where
getIncludeFiles :: st -> [String]
addIncludeFile :: String -> st -> st
dropLatestIncludeFile :: st -> st
instance HasIncludeFiles ParserState where
getIncludeFiles = stateContainers
addIncludeFile f s = s{ stateContainers = f : stateContainers s }
dropLatestIncludeFile s = s { stateContainers = drop 1 $ stateContainers s }
defaultParserState :: ParserState
defaultParserState =
ParserState { stateOptions = def,
stateParserContext = NullState,
stateQuoteContext = NoQuote,
stateAllowLinks = True,
stateMaxNestingLevel = 6,
stateLastStrPos = Nothing,
stateKeys = M.empty,
stateHeaderKeys = M.empty,
stateSubstitutions = M.empty,
stateNotes = [],
stateNotes' = M.empty,
stateNoteRefs = Set.empty,
stateMeta = nullMeta,
stateMeta' = return nullMeta,
stateCitations = M.empty,
stateHeaderTable = [],
stateHeaders = M.empty,
stateIdentifiers = Set.empty,
stateNextExample = 1,
stateExamples = M.empty,
stateMacros = M.empty,
stateRstDefaultRole = "title-reference",
stateRstCustomRoles = M.empty,
stateCaption = Nothing,
stateInHtmlBlock = Nothing,
stateFencedDivLevel = 0,
stateContainers = [],
stateLogMessages = [],
stateMarkdownAttribute = False
}
logMessage :: (Stream s m a, HasLogMessages st)
=> LogMessage -> ParserT s st m ()
logMessage msg = updateState (addLogMessage msg)
reportLogMessages :: (PandocMonad m, HasLogMessages st) => ParserT s st m ()
reportLogMessages = do
msgs <- getLogMessages <$> getState
mapM_ report msgs
guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
guardEnabled ext = getOption readerExtensions >>= guard . extensionEnabled ext
guardDisabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
guardDisabled ext = getOption readerExtensions >>= guard . not . extensionEnabled ext
updateLastStrPos :: (Stream s m a, HasLastStrPosition st) => ParserT s st m ()
updateLastStrPos = getPosition >>= updateState . setLastStrPos
notAfterString :: (Stream s m a, HasLastStrPosition st) => ParserT s st m Bool
notAfterString = do
pos <- getPosition
st <- getState
return $ getLastStrPos st /= Just pos
data HeaderType
= SingleHeader Char
| DoubleHeader Char
deriving (Eq, Show)
data ParserContext
= ListItemState
| NullState
deriving (Eq, Show)
data QuoteContext
= InSingleQuote
| InDoubleQuote
| NoQuote
deriving (Eq, Show)
type NoteTable = [(String, String)]
type NoteTable' = M.Map String (SourcePos, F Blocks)
newtype Key = Key String deriving (Show, Read, Eq, Ord)
toKey :: String -> Key
toKey = Key . map toLower . unwords . words . unbracket
where unbracket ('[':xs) | "]" `isSuffixOf` xs = take (length xs - 1) xs
unbracket xs = xs
type KeyTable = M.Map Key (Target, Attr)
type SubstTable = M.Map Key Inlines
registerHeader :: (Stream s m a, HasReaderOptions st,
HasHeaderMap st, HasLogMessages st, HasIdentifierList st)
=> Attr -> Inlines -> ParserT s st m Attr
registerHeader (ident,classes,kvs) header' = do
ids <- extractIdentifierList <$> getState
exts <- getOption readerExtensions
let insert' = M.insertWith (\_new old -> old)
if null ident && Ext_auto_identifiers `extensionEnabled` exts
then do
let id' = uniqueIdent (B.toList header') ids
let id'' = if Ext_ascii_identifiers `extensionEnabled` exts
then mapMaybe toAsciiChar id'
else id'
updateState $ updateIdentifierList $ Set.insert id'
updateState $ updateIdentifierList $ Set.insert id''
updateState $ updateHeaderMap $ insert' header' id'
return (id'',classes,kvs)
else do
unless (null ident) $ do
when (ident `Set.member` ids) $ do
pos <- getPosition
logMessage $ DuplicateIdentifier ident pos
updateState $ updateIdentifierList $ Set.insert ident
updateState $ updateHeaderMap $ insert' header' ident
return (ident,classes,kvs)
smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
smartPunctuation inlineParser = do
guardEnabled Ext_smart
choice [ quoted inlineParser, apostrophe, dash, ellipses ]
apostrophe :: Stream s m Char => ParserT s st m Inlines
apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
singleQuoted inlineParser = try $ B.singleQuoted . mconcat
<$ singleQuoteStart
<*> withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)
doubleQuoted :: (HasQuoteContext st m, Stream s m Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
doubleQuoted inlineParser = try $ B.doubleQuoted . mconcat
<$ doubleQuoteStart
<*> withQuoteContext InDoubleQuote (manyTill inlineParser doubleQuoteEnd)
failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
=> QuoteContext
-> ParserT s st m ()
failIfInQuoteContext context = do
context' <- getQuoteContext
when (context' == context) $ fail "already inside quotes"
charOrRef :: Stream s m Char => String -> ParserT s st m Char
charOrRef cs =
oneOf cs <|> try (do c <- characterReference
guard (c `elem` cs)
return c)
singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
=> ParserT s st m ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
guard =<< notAfterString
try $ do
charOrRef "'\8216\145"
notFollowedBy (oneOf [' ', '\t', '\n'])
singleQuoteEnd :: Stream s m Char
=> ParserT s st m ()
singleQuoteEnd = try $ do
charOrRef "'\8217\146"
notFollowedBy alphaNum
doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char)
=> ParserT s st m ()
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
try $ do charOrRef "\"\8220\147"
notFollowedBy (oneOf [' ', '\t', '\n'])
doubleQuoteEnd :: Stream s m Char
=> ParserT s st m ()
doubleQuoteEnd = void (charOrRef "\"\8221\148")
ellipses :: Stream s m Char
=> ParserT s st m Inlines
ellipses = try (string "..." >> return (B.str "\8230"))
dash :: (HasReaderOptions st, Stream s m Char)
=> ParserT s st m Inlines
dash = try $ do
oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions
if oldDashes
then do
char '-'
(char '-' >> return (B.str "\8212"))
<|> (lookAhead digit >> return (B.str "\8211"))
else do
string "--"
(char '-' >> return (B.str "\8212"))
<|> return (B.str "\8211")
nested :: Stream s m a
=> ParserT s ParserState m a
-> ParserT s ParserState m a
nested p = do
nestlevel <- stateMaxNestingLevel <$> getState
guard $ nestlevel > 0
updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
res <- p
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res
citeKey :: (Stream s m Char, HasLastStrPosition st)
=> ParserT s st m (Bool, String)
citeKey = try $ do
guard =<< notAfterString
suppress_author <- option False (True <$ char '-')
char '@'
firstChar <- alphaNum <|> char '_' <|> char '*'
let regchar = satisfy (\c -> isAlphaNum c || c == '_')
let internal p = try $ p <* lookAhead regchar
rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|>
try (oneOf ":/" <* lookAhead (char '/'))
let key = firstChar:rest
return (suppress_author, key)
token :: (Stream s m t)
=> (t -> String)
-> (t -> SourcePos)
-> (t -> Maybe a)
-> ParsecT s st m a
token pp pos match = tokenPrim pp (\_ t _ -> pos t) match
infixr 5 <+?>
(<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a
a <+?> b = a >>= flip fmap (try b <|> return mempty) . mappend
extractIdClass :: Attr -> Attr
extractIdClass (ident, cls, kvs) = (ident', cls', kvs')
where
ident' = fromMaybe ident (lookup "id" kvs)
cls' = case lookup "class" kvs of
Just cl -> words cl
Nothing -> cls
kvs' = filter (\(k,_) -> k /= "id" || k /= "class") kvs
insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st,
Functor mf, Applicative mf, Monad mf)
=> ParserT [a] st m (mf Blocks)
-> (String -> [a])
-> [FilePath] -> FilePath
-> ParserT [a] st m (mf Blocks)
insertIncludedFile' blocks totoks dirs f = do
oldPos <- getPosition
oldInput <- getInput
containers <- getIncludeFiles <$> getState
when (f `elem` containers) $
throwError $ PandocParseError $ "Include file loop at " ++ show oldPos
updateState $ addIncludeFile f
mbcontents <- readFileFromDirs dirs f
contents <- case mbcontents of
Just s -> return s
Nothing -> do
report $ CouldNotLoadIncludeFile f oldPos
return ""
setPosition $ newPos f 1 1
setInput $ totoks contents
bs <- blocks
setInput oldInput
setPosition oldPos
updateState dropLatestIncludeFile
return bs
insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
=> ParserT [a] st m Blocks
-> (String -> [a])
-> [FilePath] -> FilePath
-> ParserT [a] st m Blocks
insertIncludedFile blocks totoks dirs f =
runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f
insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
=> ParserT String st m (Future st Blocks)
-> [FilePath] -> FilePath
-> ParserT String st m (Future st Blocks)
insertIncludedFileF p = insertIncludedFile' p id