module Text.XML.WraXML.Document.Tagchup where
import qualified Text.XML.WraXML.Tree.Tagchup as TreeTagchup
import qualified Text.XML.WraXML.Document as XmlDoc
import qualified Text.HTML.Tagchup.Parser as TagParser
import qualified Text.HTML.Tagchup.Tag as Tag
import qualified Text.HTML.Tagchup.PositionTag as PosTag
import qualified Text.XML.Basic.Position as Position
import qualified Text.HTML.Basic.Tag as TagH
import qualified Text.HTML.Basic.Character as HtmlChar
import qualified Text.XML.Basic.Name.LowerCase as NameLC
import qualified Text.XML.Basic.Name as Name
import qualified Text.XML.Basic.ProcessingInstruction as PI
import Data.List.HT (viewL, )
import Data.Maybe (fromMaybe, )
import Control.Monad (guard, )
import Control.Monad.Trans.State (State, state, evalState, modify, gets, )
import qualified Data.Char as Char
type XmlDoc = XmlDoc.T Position.T
class TagParser.CharType char => CharSpace char where
isSpace :: char -> Bool
instance CharSpace Char where
isSpace :: Char -> Bool
isSpace = Char -> Bool
Char.isSpace
instance CharSpace HtmlChar.T where
isSpace :: T -> Bool
isSpace T
c =
case T
c of
HtmlChar.Unicode Char
chr -> Char -> Bool
Char.isSpace Char
chr
HtmlChar.EntityRef String
"nbsp" -> Bool
True
T
_ -> Bool
False
class StringSpace string where
isAllSpace :: string -> Bool
instance CharSpace char => StringSpace [char] where
isAllSpace :: [char] -> Bool
isAllSpace = forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all forall char. CharSpace char => char -> Bool
isSpace
dropSpace :: StringSpace string =>
[PosTag.T name string] -> [PosTag.T name string]
dropSpace :: forall string name.
StringSpace string =>
[T name string] -> [T name string]
dropSpace =
forall a. (a -> Bool) -> [a] -> [a]
dropWhile
(\T name string
tag ->
case forall name string. T name string -> T name string
PosTag.tag_ T name string
tag of
Tag.Text string
text -> forall string. StringSpace string => string -> Bool
isAllSpace string
text
T name string
_ -> Bool
False)
withoutLeadingSpace :: (StringSpace string) =>
([PosTag.T name string] -> (a, [PosTag.T name string])) ->
State [PosTag.T name string] a
withoutLeadingSpace :: forall string name a.
StringSpace string =>
([T name string] -> (a, [T name string]))
-> State [T name string] a
withoutLeadingSpace [T name string] -> (a, [T name string])
f =
forall (m :: * -> *) s. Monad m => (s -> s) -> StateT s m ()
modify forall string name.
StringSpace string =>
[T name string] -> [T name string]
dropSpace forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) s a. Monad m => (s -> (a, s)) -> StateT s m a
state [T name string] -> (a, [T name string])
f
toXmlDocument ::
(Name.Tag name, Name.Attribute name, StringSpace string) =>
[PosTag.T name string] -> XmlDoc name string
toXmlDocument :: forall name string.
(Tag name, Attribute name, StringSpace string) =>
[T name string] -> XmlDoc name string
toXmlDocument [T name string]
ts =
forall a b c. (a -> b -> c) -> b -> a -> c
flip forall s a. State s a -> s -> a
evalState [T name string]
ts forall a b. (a -> b) -> a -> b
$
do Maybe [T name string]
xml <- forall string name a.
StringSpace string =>
([T name string] -> (a, [T name string]))
-> State [T name string] a
withoutLeadingSpace forall a b. (a -> b) -> a -> b
$ \[T name string]
ts0 ->
forall a. a -> Maybe a -> a
fromMaybe (forall a. Maybe a
Nothing, [T name string]
ts0) forall a b. (a -> b) -> a -> b
$
do (T name string
t,[T name string]
ts1) <- forall a. [a] -> Maybe (a, [a])
viewL [T name string]
ts0
(Name name
name, PI.Known [T name string]
attrs) <- forall name string.
T name string -> Maybe (Name name, T name string)
Tag.maybeProcessing (forall name string. T name string -> T name string
PosTag.tag_ T name string
t)
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (forall name. C name => String -> name -> Bool
Name.match String
"xml" Name name
name)
forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> Maybe a
Just [T name string]
attrs, [T name string]
ts1)
Maybe String
docType <- forall string name a.
StringSpace string =>
([T name string] -> (a, [T name string]))
-> State [T name string] a
withoutLeadingSpace forall a b. (a -> b) -> a -> b
$ \[T name string]
ts0 ->
forall a. a -> Maybe a -> a
fromMaybe (forall a. Maybe a
Nothing, [T name string]
ts0) forall a b. (a -> b) -> a -> b
$
do (T name string
t,[T name string]
ts1) <- forall a. [a] -> Maybe (a, [a])
viewL [T name string]
ts0
(Name name
name, String
dtd) <- forall name string. T name string -> Maybe (Name name, String)
Tag.maybeSpecial (forall name string. T name string -> T name string
PosTag.tag_ T name string
t)
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (forall name. C name => String -> name -> Bool
Name.match String
TagH.doctypeString Name name
name)
forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> Maybe a
Just String
dtd, [T name string]
ts1)
forall (m :: * -> *) s a. Monad m => (s -> a) -> StateT s m a
gets (forall i name str.
Maybe [T name str]
-> Maybe String -> [T i name str] -> T i name str
XmlDoc.Cons Maybe [T name string]
xml Maybe String
docType forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall name string.
(Tag name, Attribute name) =>
[T name string] -> [T T name string]
TreeTagchup.toXmlTrees)
example :: IO ()
example :: IO ()
example =
forall a. Show a => a -> IO ()
print forall b c a. (b -> c) -> (a -> b) -> a -> c
.
(forall name string.
(Tag name, Attribute name, StringSpace string) =>
[T name string] -> XmlDoc name string
toXmlDocument :: [PosTag.T NameLC.T String] -> XmlDoc NameLC.T String) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
forall source sink name.
(C source, StringType sink, Attribute name, Tag name) =>
source -> [T name sink]
TagParser.runSoupWithPositions
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< String -> IO String
readFile String
"/home/thielema/public_html/index.html"