{-# LANGUAGE OverloadedStrings #-}
module Cheapskate.Inlines (
parseInlines
, pHtmlTag
, pReference
, pLinkLabel)
where
import Cheapskate.ParserCombinators
import Cheapskate.Util
import Cheapskate.Types
import Data.Char hiding (Space)
import qualified Data.Sequence as Seq
import Data.Sequence (singleton, (<|), viewl, ViewL(..))
import Prelude hiding (takeWhile)
import Control.Applicative
import Data.Monoid
import Control.Monad
import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Set as Set
pHtmlTag :: Parser (HtmlTagType, Text)
pHtmlTag = do
char '<'
closing <- (char '/' >> return True) <|> return False
tagname <- takeWhile1 (\c -> isAsciiAlphaNum c || c == '?' || c == '!')
let tagname' = T.toLower tagname
let attr = do ss <- takeWhile isSpace
x <- satisfy isLetter
xs <- takeWhile (\c -> isAsciiAlphaNum c || c == ':')
skip (=='=')
v <- pQuoted '"' <|> pQuoted '\'' <|> takeWhile1 isAlphaNum
<|> return ""
return $ ss <> T.singleton x <> xs <> "=" <> v
attrs <- T.concat <$> many attr
final <- takeWhile (\c -> isSpace c || c == '/')
char '>'
let tagtype = if closing
then Closing tagname'
else case T.stripSuffix "/" final of
Just _ -> SelfClosing tagname'
Nothing -> Opening tagname'
return (tagtype,
T.pack ('<' : ['/' | closing]) <> tagname <> attrs <> final <> ">")
pQuoted :: Char -> Parser Text
pQuoted c = do
skip (== c)
contents <- takeTill (== c)
skip (== c)
return (T.singleton c <> contents <> T.singleton c)
pHtmlComment :: Parser Text
pHtmlComment = do
string "<!--"
rest <- manyTill anyChar (string "-->")
return $ "<!--" <> T.pack rest <> "-->"
pLinkLabel :: Parser Text
pLinkLabel = char '[' *> (T.concat <$>
(manyTill (regChunk <|> pEscaped <|> bracketed <|> codeChunk) (char ']')))
where regChunk = takeWhile1 (\c -> c /='`' && c /='[' && c /=']' && c /='\\')
codeChunk = snd <$> pCode'
bracketed = inBrackets <$> pLinkLabel
inBrackets t = "[" <> t <> "]"
pLinkUrl :: Parser Text
pLinkUrl = do
inPointy <- (char '<' >> return True) <|> return False
if inPointy
then T.pack <$> manyTill
(pSatisfy (\c -> c /='\r' && c /='\n')) (char '>')
else T.concat <$> many (regChunk <|> parenChunk)
where regChunk = takeWhile1 (notInClass " \n()\\") <|> pEscaped
parenChunk = parenthesize . T.concat <$> (char '(' *>
manyTill (regChunk <|> parenChunk) (char ')'))
parenthesize x = "(" <> x <> ")"
pLinkTitle :: Parser Text
pLinkTitle = do
c <- satisfy (\c -> c == '"' || c == '\'' || c == '(')
next <- peekChar
case next of
Nothing -> mzero
Just x
| isWhitespace x -> mzero
| x == ')' -> mzero
| otherwise -> return ()
let ender = if c == '(' then ')' else c
let pEnder = char ender <* nfb (skip isAlphaNum)
let regChunk = takeWhile1 (\x -> x /= ender && x /= '\\') <|> pEscaped
let nestedChunk = (\x -> T.singleton c <> x <> T.singleton ender)
<$> pLinkTitle
T.concat <$> manyTill (regChunk <|> nestedChunk) pEnder
pReference :: Parser (Text, Text, Text)
pReference = do
lab <- pLinkLabel
char ':'
scanSpnl
url <- pLinkUrl
tit <- option T.empty $ scanSpnl >> pLinkTitle
endOfInput
return (lab, url, tit)
pEscaped :: Parser Text
pEscaped = T.singleton <$> (skip (=='\\') *> satisfy isEscapable)
pSatisfy :: (Char -> Bool) -> Parser Char
pSatisfy p =
satisfy (\c -> c /= '\\' && p c)
<|> (char '\\' *> satisfy (\c -> isEscapable c && p c))
parseInlines :: ReferenceMap -> Text -> Inlines
parseInlines refmap t =
case parse (msum <$> many (pInline refmap) <* endOfInput) t of
Left e -> error ("parseInlines: " ++ show e)
Right r -> r
pInline :: ReferenceMap -> Parser Inlines
pInline refmap =
pAsciiStr
<|> pSpace
<|> pEnclosure '*' refmap
<|> (notAfter isAlphaNum *> pEnclosure '_' refmap)
<|> pCode
<|> pLink refmap
<|> pImage refmap
<|> pRawHtml
<|> pAutolink
<|> pEntity
<|> pSym
pSpace :: Parser Inlines
pSpace = do
ss <- takeWhile1 isWhitespace
return $ singleton
$ if T.any (=='\n') ss
then if " " `T.isPrefixOf` ss
then LineBreak
else SoftBreak
else Space
isAsciiAlphaNum :: Char -> Bool
isAsciiAlphaNum c =
(c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9')
pAsciiStr :: Parser Inlines
pAsciiStr = do
t <- takeWhile1 isAsciiAlphaNum
mbc <- peekChar
case mbc of
Just ':' -> if t `Set.member` schemeSet
then pUri t
else return $ singleton $ Str t
_ -> return $ singleton $ Str t
pSym :: Parser Inlines
pSym = do
c <- anyChar
let ch = singleton . Str . T.singleton
if c == '\\'
then ch <$> satisfy isEscapable
<|> singleton LineBreak <$ satisfy (=='\n')
<|> return (ch '\\')
else return (ch c)
schemes :: [Text]
schemes = [
"coap","doi","javascript"
,"aaa","aaas","about","acap"
,"cap","cid","crid","data","dav","dict","dns","file","ftp"
,"geo","go","gopher","h323","http","https","iax","icap","im"
,"imap","info","ipp","iris","iris.beep","iris.xpc","iris.xpcs"
,"iris.lwz","ldap","mailto","mid","msrp","msrps","mtqp"
,"mupdate","news","nfs","ni","nih","nntp","opaquelocktoken","pop"
,"pres","rtsp","service","session","shttp","sieve","sip","sips"
,"sms","snmp","soap.beep","soap.beeps","tag","tel","telnet","tftp"
,"thismessage","tn3270","tip","tv","urn","vemmi","ws","wss"
,"xcon","xcon-userid","xmlrpc.beep","xmlrpc.beeps","xmpp","z39.50r"
,"z39.50s"
,"adiumxtra","afp","afs","aim","apt","attachment","aw"
,"beshare","bitcoin","bolo","callto","chrome","chrome-extension"
,"com-eventbrite-attendee","content","cvs","dlna-playsingle"
,"dlna-playcontainer","dtn","dvb","ed2k","facetime","feed"
,"finger","fish","gg","git","gizmoproject","gtalk"
,"hcp","icon","ipn","irc","irc6","ircs","itms","jar"
,"jms","keyparc","lastfm","ldaps","magnet","maps","market"
,"message","mms","ms-help","msnim","mumble","mvn","notes"
,"oid","palm","paparazzi","platform","proxy","psyc","query"
,"res","resource","rmi","rsync","rtmp","secondlife","sftp"
,"sgn","skype","smb","soldat","spotify","ssh","steam","svn"
,"teamspeak","things","udp","unreal","ut2004","ventrilo"
,"view-source","webcal","wtai","wyciwyg","xfire","xri"
,"ymsgr" ]
schemeSet :: Set.Set Text
schemeSet = Set.fromList $ schemes ++ map T.toUpper schemes
pUri :: Text -> Parser Inlines
pUri scheme = do
char ':'
x <- scan (OpenParens 0) uriScanner
guard $ not $ T.null x
let (rawuri, endingpunct) =
case T.last x of
c | c `elem` (".;?!:," :: String) ->
(scheme <> ":" <> T.init x, singleton (Str (T.singleton c)))
_ -> (scheme <> ":" <> x, mempty)
return $ autoLink rawuri <> endingpunct
data OpenParens = OpenParens Int
uriScanner :: OpenParens -> Char -> Maybe OpenParens
uriScanner _ ' ' = Nothing
uriScanner _ '\n' = Nothing
uriScanner (OpenParens n) '(' = Just (OpenParens (n + 1))
uriScanner (OpenParens n) ')'
| n > 0 = Just (OpenParens (n - 1))
| otherwise = Nothing
uriScanner st '+' = Just st
uriScanner st '/' = Just st
uriScanner _ c | isSpace c = Nothing
uriScanner st _ = Just st
pEnclosure :: Char -> ReferenceMap -> Parser Inlines
pEnclosure c refmap = do
cs <- takeWhile1 (== c)
(Str cs <|) <$> pSpace
<|> case T.length cs of
3 -> pThree c refmap
2 -> pTwo c refmap mempty
1 -> pOne c refmap mempty
_ -> return (singleton $ Str cs)
single :: (Inlines -> Inline) -> Inlines -> Inlines
single constructor ils = if Seq.null ils
then mempty
else singleton (constructor ils)
pOne :: Char -> ReferenceMap -> Inlines -> Parser Inlines
pOne c refmap prefix = do
contents <- msum <$> many ( (nfbChar c >> pInline refmap)
<|> (string (T.pack [c,c]) >>
nfbChar c >> pTwo c refmap mempty) )
(char c >> return (single Emph $ prefix <> contents))
<|> return (singleton (Str (T.singleton c)) <> (prefix <> contents))
pTwo :: Char -> ReferenceMap -> Inlines -> Parser Inlines
pTwo c refmap prefix = do
let ender = string $ T.pack [c,c]
contents <- msum <$> many (nfb ender >> pInline refmap)
(ender >> return (single Strong $ prefix <> contents))
<|> return (singleton (Str $ T.pack [c,c]) <> (prefix <> contents))
pThree :: Char -> ReferenceMap -> Parser Inlines
pThree c refmap = do
contents <- msum <$> (many (nfbChar c >> pInline refmap))
(string (T.pack [c,c]) >> (pOne c refmap (single Strong contents)))
<|> (char c >> (pTwo c refmap (single Emph contents)))
<|> return (singleton (Str $ T.pack [c,c,c]) <> contents)
pCode :: Parser Inlines
pCode = fst <$> pCode'
pCode' :: Parser (Inlines, Text)
pCode' = do
ticks <- takeWhile1 (== '`')
let end = string ticks >> nfb (char '`')
let nonBacktickSpan = takeWhile1 (/= '`')
let backtickSpan = takeWhile1 (== '`')
contents <- T.concat <$> manyTill (nonBacktickSpan <|> backtickSpan) end
return (singleton . Code . T.strip $ contents, ticks <> contents <> ticks)
pLink :: ReferenceMap -> Parser Inlines
pLink refmap = do
lab <- pLinkLabel
let lab' = parseInlines refmap lab
pInlineLink lab' <|> pReferenceLink refmap lab lab'
<|> return (singleton (Str "[") <> lab' <> singleton (Str "]"))
pInlineLink :: Inlines -> Parser Inlines
pInlineLink lab = do
char '('
scanSpaces
url <- pLinkUrl
tit <- option "" $ scanSpnl *> pLinkTitle <* scanSpaces
char ')'
return $ singleton $ Link lab url tit
lookupLinkReference :: ReferenceMap
-> Text
-> Maybe (Text, Text)
lookupLinkReference refmap key = M.lookup (normalizeReference key) refmap
pReferenceLink :: ReferenceMap -> Text -> Inlines -> Parser Inlines
pReferenceLink refmap rawlab lab = do
ref <- option rawlab $ scanSpnl >> pLinkLabel
let ref' = if T.null ref then rawlab else ref
case lookupLinkReference refmap ref' of
Just (url,tit) -> return $ singleton $ Link lab url tit
Nothing -> fail "Reference not found"
pImage :: ReferenceMap -> Parser Inlines
pImage refmap = do
char '!'
(linkToImage <$> pLink refmap) <|> return (singleton (Str "!"))
linkToImage :: Inlines -> Inlines
linkToImage ils =
case viewl ils of
(Link lab url tit :< x)
| Seq.null x -> singleton (Image lab url tit)
_ -> singleton (Str "!") <> ils
pEntity :: Parser Inlines
pEntity = do
char '&'
res <- pCharEntity <|> pDecEntity <|> pHexEntity
char ';'
return $ singleton $ Entity $ "&" <> res <> ";"
pCharEntity :: Parser Text
pCharEntity = takeWhile1 (\c -> isAscii c && isLetter c)
pDecEntity :: Parser Text
pDecEntity = do
char '#'
res <- takeWhile1 isDigit
return $ "#" <> res
pHexEntity :: Parser Text
pHexEntity = do
char '#'
x <- char 'X' <|> char 'x'
res <- takeWhile1 isHexDigit
return $ "#" <> T.singleton x <> res
pRawHtml :: Parser Inlines
pRawHtml = singleton . RawHtml <$> (snd <$> pHtmlTag <|> pHtmlComment)
pAutolink :: Parser Inlines
pAutolink = do
skip (=='<')
s <- takeWhile1 (\c -> c /= ':' && c /= '@')
rest <- takeWhile1 (\c -> c /='>' && c /= ' ')
skip (=='>')
case True of
_ | "@" `T.isPrefixOf` rest -> return $ emailLink (s <> rest)
| s `Set.member` schemeSet -> return $ autoLink (s <> rest)
| otherwise -> fail "Unknown contents of <>"
autoLink :: Text -> Inlines
autoLink t = singleton $ Link (toInlines t) t (T.empty)
where toInlines t' = case parse pToInlines t' of
Right r -> r
Left e -> error $ "autolink: " ++ show e
pToInlines = mconcat <$> many strOrEntity
strOrEntity = ((singleton . Str) <$> takeWhile1 (/='&'))
<|> pEntity
<|> ((singleton . Str) <$> string "&")
emailLink :: Text -> Inlines
emailLink t = singleton $ Link (singleton $ Str t)
("mailto:" <> t) (T.empty)