module Text.Highlighter.Lexers.Html (lexer) where
import qualified Text.Highlighter.Lexers.Css as Css
import qualified Text.Highlighter.Lexers.Javascript as Javascript
import Text.Regex.PCRE.Light
import Text.Highlighter.Types
lexer :: Lexer
lexer = Lexer
{ lName = "HTML"
, lAliases = ["html"]
, lExtensions = [".html", ".htm", ".xhtml", ".xslt"]
, lMimetypes = ["text/html", "application/xhtml+xml"]
, lStart = root'
, lFlags = [caseless, dotall]
}
comment' :: TokenMatcher
comment' =
[ tok "[^-]+" (Arbitrary "Comment")
, tokNext "-->" (Arbitrary "Comment") Pop
, tok "-" (Arbitrary "Comment")
]
styleContent' :: TokenMatcher
styleContent' =
[ tokNext "<\\s*/\\s*style\\s*>" (Arbitrary "Name" :. Arbitrary "Tag") Pop
, tok ".+?(?=<\\s*/\\s*style\\s*>)" (Using Css.lexer)
]
attr' :: TokenMatcher
attr' =
[ tokNext "\".*?\"" (Arbitrary "Literal" :. Arbitrary "String") Pop
, tokNext "'.*?'" (Arbitrary "Literal" :. Arbitrary "String") Pop
, tokNext "[^\\s>]+" (Arbitrary "Literal" :. Arbitrary "String") Pop
]
scriptContent' :: TokenMatcher
scriptContent' =
[ tokNext "<\\s*/\\s*script\\s*>" (Arbitrary "Name" :. Arbitrary "Tag") Pop
, tok ".+?(?=<\\s*/\\s*script\\s*>)" (Using Javascript.lexer)
]
tag' :: TokenMatcher
tag' =
[ tok "\\s+" (Arbitrary "Text")
, tokNext "[a-zA-Z0-9_:-]+\\s*=" (Arbitrary "Name" :. Arbitrary "Attribute") (GoTo attr')
, tok "[a-zA-Z0-9_:-]+" (Arbitrary "Name" :. Arbitrary "Attribute")
, tokNext "/?\\s*>" (Arbitrary "Name" :. Arbitrary "Tag") Pop
]
root' :: TokenMatcher
root' =
[ tok "[^<&]+" (Arbitrary "Text")
, tok "&\\S*?;" (Arbitrary "Name" :. Arbitrary "Entity")
, tok "\\<\\!\\[CDATA\\[.*?\\]\\]\\>" (Arbitrary "Comment" :. Arbitrary "Preproc")
, tokNext "