module AnchorParser where import ParsOps1 import HtmlEntities(decode) import Utils2(strToUpper) import Data.Char(isSpace,isAlphaNum) parseTag s = parse tag s where tag = (,) <$> tagname <*> many attribute <* space attribute = attr <$> attrname <*> optvalue attr n Nothing = (n,n) attr n (Just v) = (n,v) optvalue = maybeP (kw "=" *> value) value = (decode <$> string) <|> optional "" name tagname = (tagnamefix.strToUpper) <$> name attrname = strToUpper <$> name name = space *> chars isNameChar space = chars0 isSpace string = stringq '\'' <|> stringq '"' stringq q = space *> lit q *> chars0 (/=q) <* lit q kw s = space *> lits s tagnamefix "HEADER" = "HEAD" tagnamefix name = name isNameChar c = isAlphaNum c || c `elem` "~'%/:.-?&#_@+" -- what is allowed?