{-# LANGUAGE OverloadedStrings #-}
module Text.XmlHtml.XML.Parse where
import Control.Applicative
import Control.Monad
import Data.Char
import Data.List
import Data.Maybe
import Text.XmlHtml.Common
import Text.XmlHtml.TextParser
import qualified Text.Parsec as P
import Data.Map (Map)
import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
docFragment :: Encoding -> Parser Document
docFragment :: Encoding -> Parser Document
docFragment Encoding
e = do
(Maybe DocType
dt, [Node]
nodes1) <- Parser (Maybe DocType, [Node])
prolog
[Node]
nodes2 <- Parser [Node]
content
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Encoding -> Maybe DocType -> [Node] -> Document
XmlDocument Encoding
e Maybe DocType
dt ([Node]
nodes1 forall a. [a] -> [a] -> [a]
++ [Node]
nodes2)
whiteSpace :: Parser ()
whiteSpace :: Parser ()
whiteSpace = forall (f :: * -> *) a. Alternative f => f a -> f [a]
some (forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
' ',Char
'\t',Char
'\r',Char
'\n'])) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. Monad m => a -> m a
return ()
isNameStartChar :: Char -> Bool
isNameStartChar :: Char -> Bool
isNameStartChar Char
c | Char
c forall a. Eq a => a -> a -> Bool
== Char
':' = Bool
True
| Char
c forall a. Eq a => a -> a -> Bool
== Char
'_' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'z' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'Z' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xc0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xd6' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xd8' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xf6' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xf8' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2ff' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x370' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x37d' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x37f' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x1fff' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x200c' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x200d' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x2070' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x218f' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x2c00' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2fef' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x3001' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xd7ff' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xf900' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xfdcf' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xfdf0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xfffd' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x10000' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xeffff' = Bool
True
| Bool
otherwise = Bool
False
isNameChar :: Char -> Bool
isNameChar :: Char -> Bool
isNameChar Char
c | Char -> Bool
isNameStartChar Char
c = Bool
True
| Char
c forall a. Eq a => a -> a -> Bool
== Char
'-' = Bool
True
| Char
c forall a. Eq a => a -> a -> Bool
== Char
'.' = Bool
True
| Char
c forall a. Eq a => a -> a -> Bool
== Char
'\xb7' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'9' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x300' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x36f' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x203f' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2040' = Bool
True
| Bool
otherwise = Bool
False
name :: Parser Text
name :: Parser Text
name = do
Char
c <- forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy Char -> Bool
isNameStartChar
Text
r <- (Char -> Bool) -> Parser Text
takeWhile0 Char -> Bool
isNameChar
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> Text -> Text
T.cons Char
c Text
r
attrValue :: Parser Text
attrValue :: Parser Text
attrValue = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Text] -> Text
T.concat (ParsecT Text () Identity [Text]
singleQuoted forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Text () Identity [Text]
doubleQuoted)
where
singleQuoted :: ParsecT Text () Identity [Text]
singleQuoted = forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\'' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall {t :: * -> *}.
Foldable t =>
t Char -> ParsecT Text () Identity [Text]
refTill [Char
'<',Char
'&',Char
'\''] forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
doubleQuoted :: ParsecT Text () Identity [Text]
doubleQuoted = forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'"' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall {t :: * -> *}.
Foldable t =>
t Char -> ParsecT Text () Identity [Text]
refTill [Char
'<',Char
'&',Char
'"'] forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'"'
refTill :: t Char -> ParsecT Text () Identity [Text]
refTill t Char
end = forall (f :: * -> *) a. Alternative f => f a -> f [a]
many ((Char -> Bool) -> Parser Text
takeWhile1 (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
end)) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
reference)
systemLiteral :: Parser Text
systemLiteral :: Parser Text
systemLiteral = Parser Text
singleQuoted forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
doubleQuoted
where
singleQuoted :: Parser Text
singleQuoted = do
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
Text
x <- (Char -> Bool) -> Parser Text
takeWhile0 (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Eq a => a -> a -> Bool
== Char
'\''))
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
forall (m :: * -> *) a. Monad m => a -> m a
return Text
x
doubleQuoted :: Parser Text
doubleQuoted = do
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"'
Text
x <- (Char -> Bool) -> Parser Text
takeWhile0 (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Eq a => a -> a -> Bool
== Char
'\"'))
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"'
forall (m :: * -> *) a. Monad m => a -> m a
return Text
x
pubIdLiteral :: Parser Text
pubIdLiteral :: Parser Text
pubIdLiteral = Parser Text
singleQuoted forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
doubleQuoted
where
singleQuoted :: Parser Text
singleQuoted = do
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
Text
x <- (Char -> Bool) -> Parser Text
takeWhile0 (\Char
c -> Char -> Bool
isPubIdChar Char
c Bool -> Bool -> Bool
&& Char
c forall a. Eq a => a -> a -> Bool
/= Char
'\'')
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
forall (m :: * -> *) a. Monad m => a -> m a
return Text
x
doubleQuoted :: Parser Text
doubleQuoted = do
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"'
Text
x <- (Char -> Bool) -> Parser Text
takeWhile0 Char -> Bool
isPubIdChar
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"'
forall (m :: * -> *) a. Monad m => a -> m a
return Text
x
isPubIdChar :: Char -> Bool
isPubIdChar :: Char -> Bool
isPubIdChar Char
c | Char
c forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'z' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'Z' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'9' = Bool
True
| Char
c forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
otherChars = Bool
True
| Bool
otherwise = Bool
False
where
otherChars :: String
otherChars = String
" \r\n-\'()+,./:=?;!*#@$_%" :: [Char]
charData :: Parser Node
charData :: Parser Node
charData = Text -> Node
TextNode forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeWhile1 (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'<',Char
'&']))
comment :: Parser (Maybe Node)
= Text -> Parser Text
text Text
"<!--" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Node
Comment forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
commentText) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Text -> Parser Text
text Text
"-->"
where
commentText :: Parser Text
commentText = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Text] -> Text
T.concat forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => f a -> f [a]
many forall a b. (a -> b) -> a -> b
$
Parser Text
nonDash forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (Char -> Text -> Text
T.cons forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'-' forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Text
nonDash)
nonDash :: Parser Text
nonDash = (Char -> Bool) -> Parser Text
takeWhile1 (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Eq a => a -> a -> Bool
== Char
'-'))
processingInstruction :: Parser (Maybe Node)
processingInstruction :: Parser (Maybe Node)
processingInstruction = do
Text
_ <- Text -> Parser Text
text Text
"<?"
()
_ <- Parser ()
piTarget
String
_ <- forall {u}. ParsecT Text u Identity String
emptyEnd forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Text () Identity String
contentEnd
forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
where
emptyEnd :: ParsecT Text u Identity String
emptyEnd = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
P.string String
"?>")
contentEnd :: ParsecT Text () Identity String
contentEnd = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try forall a b. (a -> b) -> a -> b
$ do
()
_ <- Parser ()
whiteSpace
forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
P.manyTill forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
P.anyChar (forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try forall a b. (a -> b) -> a -> b
$ Text -> Parser Text
text Text
"?>")
piTarget :: Parser ()
piTarget :: Parser ()
piTarget = do
Text
n <- Parser Text
name
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ((Char -> Char) -> Text -> Text
T.map Char -> Char
toLower Text
n forall a. Eq a => a -> a -> Bool
== Text
"xml") forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"xml declaration can't occur here"
cdata :: [Char] -> Parser a -> Parser Node
cdata :: forall a. String -> Parser a -> Parser Node
cdata String
cs Parser a
end = Text -> Node
TextNode forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Text] -> Text
T.concat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
P.manyTill Parser Text
part Parser a
end
where part :: Parser Text
part = (Char -> Bool) -> Parser Text
takeWhile1 (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
cs))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Text
T.singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
P.anyChar
cdSect :: Parser (Maybe Node)
cdSect :: Parser (Maybe Node)
cdSect = forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> do
Text
_ <- Text -> Parser Text
text Text
"<![CDATA["
forall a. String -> Parser a -> Parser Node
cdata String
"]" (Text -> Parser Text
text Text
"]]>")
prolog :: Parser (Maybe DocType, [Node])
prolog :: Parser (Maybe DocType, [Node])
prolog = do
Maybe (Maybe Text)
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser (Maybe Text)
xmlDecl
[Maybe Node]
nodes1 <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser (Maybe Node)
misc
Maybe (DocType, [Maybe Node])
rest <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ do
DocType
dt <- Parser DocType
docTypeDecl
[Maybe Node]
nodes2 <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser (Maybe Node)
misc
forall (m :: * -> *) a. Monad m => a -> m a
return (DocType
dt, [Maybe Node]
nodes2)
case Maybe (DocType, [Maybe Node])
rest of
Maybe (DocType, [Maybe Node])
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. Maybe a
Nothing, forall a. [Maybe a] -> [a]
catMaybes [Maybe Node]
nodes1)
Just (DocType
dt, [Maybe Node]
nodes2) -> forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> Maybe a
Just DocType
dt, forall a. [Maybe a] -> [a]
catMaybes ([Maybe Node]
nodes1 forall a. [a] -> [a] -> [a]
++ [Maybe Node]
nodes2))
xmlDecl :: Parser (Maybe Text)
xmlDecl :: Parser (Maybe Text)
xmlDecl = do
Text
_ <- Text -> Parser Text
text Text
"<?xml"
()
_ <- Parser ()
versionInfo
Maybe Text
e <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Text
encodingDecl
Maybe ()
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
sdDecl
Maybe ()
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace
Text
_ <- Text -> Parser Text
text Text
"?>"
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Text
e
versionInfo :: Parser ()
versionInfo :: Parser ()
versionInfo = do
Parser ()
whiteSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser Text
text Text
"version" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
eq forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Parser ()
singleQuoted forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ()
doubleQuoted)
where
singleQuoted :: Parser ()
singleQuoted = forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\'' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
versionNum forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
doubleQuoted :: Parser ()
doubleQuoted = forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
versionNum forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"'
versionNum :: Parser ()
versionNum = do
Text
_ <- Text -> Parser Text
text Text
"1."
String
_ <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
some (forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char
c forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'9'))
forall (m :: * -> *) a. Monad m => a -> m a
return ()
eq :: Parser ()
eq :: Parser ()
eq = forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'=' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. Monad m => a -> m a
return ()
misc :: Parser (Maybe Node)
misc :: Parser (Maybe Node)
misc = Parser (Maybe Node)
comment forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (Maybe Node)
processingInstruction forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser ()
whiteSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing)
docTypeDecl :: Parser DocType
docTypeDecl :: Parser DocType
docTypeDecl = do
Text
_ <- Text -> Parser Text
text Text
"<!DOCTYPE"
Parser ()
whiteSpace
Text
tag <- Parser Text
name
Maybe ()
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace
ExternalID
extid <- Parser ExternalID
externalID
Maybe ()
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace
InternalSubset
intsub <- Parser InternalSubset
internalDoctype
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'>'
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> ExternalID -> InternalSubset -> DocType
DocType Text
tag ExternalID
extid InternalSubset
intsub)
data InternalDoctypeState = IDSStart
| IDSScanning Int
| IDSInQuote Int Char
| Int
| Int
| Int
| Int
| Int
| Int
internalDoctype :: Parser InternalSubset
internalDoctype :: Parser InternalSubset
internalDoctype = Text -> InternalSubset
InternalText forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Text
T.pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> ScanState) -> ParsecT Text () Identity String
scanText (InternalDoctypeState -> Char -> ScanState
dfa InternalDoctypeState
IDSStart)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return InternalSubset
NoInternalSubset
where dfa :: InternalDoctypeState -> Char -> ScanState
dfa InternalDoctypeState
IDSStart Char
'[' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
0))
dfa InternalDoctypeState
IDSStart Char
_ = String -> ScanState
ScanFail String
"Not a DOCTYPE internal subset"
dfa (IDSInQuote Int
n Char
c) Char
d
| Char
c forall a. Eq a => a -> a -> Bool
== Char
d = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
| Bool
otherwise = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
c))
dfa (IDSScanning Int
n) Char
'[' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nforall a. Num a => a -> a -> a
+Int
1)))
dfa (IDSScanning Int
0) Char
']' = ScanState
ScanFinish
dfa (IDSScanning Int
n) Char
']' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nforall a. Num a => a -> a -> a
-Int
1)))
dfa (IDSScanning Int
n) Char
'\'' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
'\''))
dfa (IDSScanning Int
n) Char
'\"' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
'\"'))
dfa (IDSScanning Int
n) Char
'<' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSCommentS1 Int
n))
dfa (IDSScanning Int
n) Char
_ = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
dfa (IDSCommentS1 Int
n) Char
'[' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nforall a. Num a => a -> a -> a
+Int
1)))
dfa (IDSCommentS1 Int
0) Char
']' = ScanState
ScanFinish
dfa (IDSCommentS1 Int
n) Char
']' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nforall a. Num a => a -> a -> a
-Int
1)))
dfa (IDSCommentS1 Int
n) Char
'\'' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
'\''))
dfa (IDSCommentS1 Int
n) Char
'\"' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
'\"'))
dfa (IDSCommentS1 Int
n) Char
'!' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSCommentS2 Int
n))
dfa (IDSCommentS1 Int
n) Char
_ = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
dfa (IDSCommentS2 Int
n) Char
'[' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nforall a. Num a => a -> a -> a
+Int
1)))
dfa (IDSCommentS2 Int
0) Char
']' = ScanState
ScanFinish
dfa (IDSCommentS2 Int
n) Char
']' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nforall a. Num a => a -> a -> a
-Int
1)))
dfa (IDSCommentS2 Int
n) Char
'\'' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
'\''))
dfa (IDSCommentS2 Int
n) Char
'\"' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
'\"'))
dfa (IDSCommentS2 Int
n) Char
'-' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSCommentS3 Int
n))
dfa (IDSCommentS2 Int
n) Char
_ = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
dfa (IDSCommentS3 Int
n) Char
'[' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nforall a. Num a => a -> a -> a
+Int
1)))
dfa (IDSCommentS3 Int
0) Char
']' = ScanState
ScanFinish
dfa (IDSCommentS3 Int
n) Char
']' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nforall a. Num a => a -> a -> a
-Int
1)))
dfa (IDSCommentS3 Int
n) Char
'\'' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
'\''))
dfa (IDSCommentS3 Int
n) Char
'\"' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
'\"'))
dfa (IDSCommentS3 Int
n) Char
'-' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSComment Int
n))
dfa (IDSCommentS3 Int
n) Char
_ = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
dfa (IDSComment Int
n) Char
'-' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSCommentD1 Int
n))
dfa (IDSComment Int
n) Char
_ = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSComment Int
n))
dfa (IDSCommentD1 Int
n) Char
'-' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSCommentE1 Int
n))
dfa (IDSCommentD1 Int
n) Char
_ = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSComment Int
n))
dfa (IDSCommentE1 Int
n) Char
'>' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
dfa (IDSCommentE1 Int
_) Char
_ = String -> ScanState
ScanFail String
"Poorly formatted comment"
sdDecl :: Parser ()
sdDecl :: Parser ()
sdDecl = do
Text
_ <- forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try forall a b. (a -> b) -> a -> b
$ Parser ()
whiteSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser Text
text Text
"standalone"
Parser ()
eq
Text
_ <- Parser Text
single forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
double
forall (m :: * -> *) a. Monad m => a -> m a
return ()
where
single :: Parser Text
single = forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\'' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
yesno forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
double :: Parser Text
double = forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
yesno forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"'
yesno :: Parser Text
yesno = Text -> Parser Text
text Text
"yes" forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text
text Text
"no"
element :: Parser Node
element :: Parser Node
element = do
(Text
t,[(Text, Text)]
a,Bool
b) <- Parser (Text, [(Text, Text)], Bool)
emptyOrStartTag
if Bool
b then forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [(Text, Text)] -> [Node] -> Node
Element Text
t [(Text, Text)]
a [])
else Text -> [(Text, Text)] -> Parser Node
nonEmptyElem Text
t [(Text, Text)]
a
where
nonEmptyElem :: Text -> [(Text, Text)] -> Parser Node
nonEmptyElem Text
t [(Text, Text)]
a = do
[Node]
c <- Parser [Node]
content
Text -> Parser ()
endTag Text
t
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [(Text, Text)] -> [Node] -> Node
Element Text
t [(Text, Text)]
a [Node]
c)
emptyOrStartTag :: Parser (Text, [(Text, Text)], Bool)
emptyOrStartTag :: Parser (Text, [(Text, Text)], Bool)
emptyOrStartTag = do
Text
t <- forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try forall a b. (a -> b) -> a -> b
$ forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'<' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
name
[(Text, Text)]
a <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many forall a b. (a -> b) -> a -> b
$ forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try forall a b. (a -> b) -> a -> b
$ do
Parser ()
whiteSpace
ParsecT Text () Identity (Text, Text)
attribute
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (forall {a} {b}. Eq a => [(a, b)] -> Bool
hasDups [(Text, Text)]
a) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Duplicate attribute names in element"
Maybe ()
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace
Maybe Char
e <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'/')
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'>'
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
t, [(Text, Text)]
a, forall a. Maybe a -> Bool
isJust Maybe Char
e)
where
hasDups :: [(a, b)] -> Bool
hasDups [(a, b)]
a = forall (t :: * -> *) a. Foldable t => t a -> Int
length (forall a. Eq a => [a] -> [a]
nub (forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> a
fst [(a, b)]
a)) forall a. Ord a => a -> a -> Bool
< forall (t :: * -> *) a. Foldable t => t a -> Int
length [(a, b)]
a
attribute :: Parser (Text, Text)
attribute :: ParsecT Text () Identity (Text, Text)
attribute = do
Text
n <- Parser Text
name
Parser ()
eq
Text
v <- Parser Text
attrValue
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
n,Text
v)
endTag :: Text -> Parser ()
endTag :: Text -> Parser ()
endTag Text
s = do
Text
_ <- Text -> Parser Text
text Text
"</"
Text
t <- Parser Text
name
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Text
s forall a. Eq a => a -> a -> Bool
/= Text
t) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadFail m => String -> m a
fail forall a b. (a -> b) -> a -> b
$ String
"mismatched tags: </" forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
t forall a. [a] -> [a] -> [a]
++
String
"> found inside <" forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
s forall a. [a] -> [a] -> [a]
++ String
"> tag"
Maybe ()
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace
Text
_ <- Text -> Parser Text
text Text
">"
forall (m :: * -> *) a. Monad m => a -> m a
return ()
content :: Parser [Node]
content :: Parser [Node]
content = do
Maybe Node
n <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Node
charData
[Maybe Node]
ns <- forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => f a -> f [a]
many forall a b. (a -> b) -> a -> b
$ do
Maybe Node
s <- ((forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Node
TextNode forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
reference)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (Maybe Node)
cdSect
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (Maybe Node)
processingInstruction
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (Maybe Node)
comment
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. a -> Maybe a
Just Parser Node
element)
Maybe Node
t <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Node
charData
forall (m :: * -> *) a. Monad m => a -> m a
return [Maybe Node
s,Maybe Node
t]
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Node] -> [Node]
coalesceText forall a b. (a -> b) -> a -> b
$ forall a. [Maybe a] -> [a]
catMaybes (Maybe Node
nforall a. a -> [a] -> [a]
:[Maybe Node]
ns)
where
coalesceText :: [Node] -> [Node]
coalesceText (TextNode Text
s : TextNode Text
t : [Node]
ns)
= [Node] -> [Node]
coalesceText (Text -> Node
TextNode (Text -> Text -> Text
T.append Text
s Text
t) forall a. a -> [a] -> [a]
: [Node]
ns)
coalesceText (Node
n:[Node]
ns)
= Node
n forall a. a -> [a] -> [a]
: [Node] -> [Node]
coalesceText [Node]
ns
coalesceText []
= []
charRef :: Parser Text
charRef :: Parser Text
charRef = Parser Text
hexCharRef forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
decCharRef
where
decCharRef :: Parser Text
decCharRef = do
Text
_ <- Text -> Parser Text
text Text
"&#"
[Int]
ds <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
some forall {u}. ParsecT Text u Identity Int
digit
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
';'
let c :: Char
c = Int -> Char
chr forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\Int
a Int
b -> Int
10 forall a. Num a => a -> a -> a
* Int
a forall a. Num a => a -> a -> a
+ Int
b) Int
0 [Int]
ds
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Bool -> Bool
not (Char -> Bool
isValidChar Char
c)) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadFail m => String -> m a
fail forall a b. (a -> b) -> a -> b
$
String
"Reference is not a valid character"
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> Text
T.singleton Char
c
where
digit :: ParsecT Text u Identity Int
digit = do
Char
d <- forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char
c forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'9')
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Int
ord Char
d forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0')
hexCharRef :: Parser Text
hexCharRef = do
Text
_ <- Text -> Parser Text
text Text
"&#x"
[Int]
ds <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
some forall {u}. ParsecT Text u Identity Int
digit
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
';'
let c :: Char
c = Int -> Char
chr forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\Int
a Int
b -> Int
16 forall a. Num a => a -> a -> a
* Int
a forall a. Num a => a -> a -> a
+ Int
b) Int
0 [Int]
ds
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Bool -> Bool
not (Char -> Bool
isValidChar Char
c)) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadFail m => String -> m a
fail forall a b. (a -> b) -> a -> b
$
String
"Reference is not a valid character"
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> Text
T.singleton Char
c
where
digit :: ParsecT Text u Identity Int
digit = forall {u}. ParsecT Text u Identity Int
num forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall {u}. ParsecT Text u Identity Int
upper forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall {u}. ParsecT Text u Identity Int
lower
num :: ParsecT Text u Identity Int
num = do
Char
d <- forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char
c forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'9')
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Int
ord Char
d forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0')
upper :: ParsecT Text u Identity Int
upper = do
Char
d <- forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char
c forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'F')
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
10 forall a. Num a => a -> a -> a
+ Char -> Int
ord Char
d forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'A')
lower :: ParsecT Text u Identity Int
lower = do
Char
d <- forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char
c forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'f')
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
10 forall a. Num a => a -> a -> a
+ Char -> Int
ord Char
d forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'a')
reference :: Parser Text
reference :: Parser Text
reference = Parser Text
charRef forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
entityRef
entityRef :: Parser Text
entityRef :: Parser Text
entityRef = do
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'&'
Text
n <- Parser Text
name
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
';'
case forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
n Map Text Text
entityRefLookup of
Maybe Text
Nothing -> forall (m :: * -> *) a. MonadFail m => String -> m a
fail forall a b. (a -> b) -> a -> b
$ String
"Unknown entity reference: " forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
n
Just Text
t -> forall (m :: * -> *) a. Monad m => a -> m a
return Text
t
where
entityRefLookup :: Map Text Text
entityRefLookup :: Map Text Text
entityRefLookup = forall k a. Ord k => [(k, a)] -> Map k a
M.fromList [
(Text
"amp", Text
"&"),
(Text
"lt", Text
"<"),
(Text
"gt", Text
">"),
(Text
"apos", Text
"\'"),
(Text
"quot", Text
"\"")
]
externalID :: Parser ExternalID
externalID :: Parser ExternalID
externalID = Parser ExternalID
systemID forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ExternalID
publicID forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return ExternalID
NoExternalID
where
systemID :: Parser ExternalID
systemID = do
Text
_ <- Text -> Parser Text
text Text
"SYSTEM"
Parser ()
whiteSpace
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> ExternalID
System Parser Text
systemLiteral
publicID :: Parser ExternalID
publicID = do
Text
_ <- Text -> Parser Text
text Text
"PUBLIC"
Parser ()
whiteSpace
Text
pid <- Parser Text
pubIdLiteral
Parser ()
whiteSpace
Text
sid <- Parser Text
systemLiteral
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Text -> ExternalID
Public Text
pid Text
sid)
encodingDecl :: Parser Text
encodingDecl :: Parser Text
encodingDecl = do
Text
_ <- forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try forall a b. (a -> b) -> a -> b
$ Parser ()
whiteSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser Text
text Text
"encoding"
()
_ <- Parser ()
eq
Parser Text
singleQuoted forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
doubleQuoted
where
singleQuoted :: Parser Text
singleQuoted = forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\'' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
encName forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
doubleQuoted :: Parser Text
doubleQuoted = forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
encName forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\"'
encName :: Parser Text
encName = do
Char
c <- forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy Char -> Bool
isEncStart
Text
cs <- (Char -> Bool) -> Parser Text
takeWhile0 Char -> Bool
isEnc
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Text -> Text
T.cons Char
c Text
cs)
isEncStart :: Char -> Bool
isEncStart Char
c | Char
c forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'Z' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'z' = Bool
True
| Bool
otherwise = Bool
False
isEnc :: Char -> Bool
isEnc Char
c | Char
c forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'Z' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'z' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'9' = Bool
True
| Char
c forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'.',Char
'_',Char
'-'] = Bool
True
| Bool
otherwise = Bool
False