module Text.XML.WraXML.Document.Tagchup where

import qualified Text.XML.WraXML.Tree.Tagchup as TreeTagchup
import qualified Text.XML.WraXML.Document as XmlDoc
-- import qualified Text.XML.WraXML.Tree     as XmlTree

import qualified Text.HTML.Tagchup.Parser      as TagParser
import qualified Text.HTML.Tagchup.Tag         as Tag
import qualified Text.HTML.Tagchup.PositionTag as PosTag
import qualified Text.XML.Basic.Position    as Position
import qualified Text.HTML.Basic.Tag as TagH
import qualified Text.HTML.Basic.Character    as HtmlChar

import qualified Text.XML.Basic.Name.LowerCase as NameLC
import qualified Text.XML.Basic.Name           as Name
import qualified Text.XML.Basic.ProcessingInstruction as PI

import Data.List.HT (viewL, )
import Data.Maybe (fromMaybe, )
import Control.Monad (guard, )

import Control.Monad.Trans.State (State, state, evalState, modify, gets, )

import qualified Data.Char as Char



type XmlDoc = XmlDoc.T Position.T


class TagParser.CharType char => CharSpace char where
   isSpace :: char -> Bool

instance CharSpace Char where
   isSpace :: Char -> Bool
isSpace = Char -> Bool
Char.isSpace

instance CharSpace HtmlChar.T where
   isSpace :: T -> Bool
isSpace T
c =
      case T
c of
         HtmlChar.Unicode Char
chr -> Char -> Bool
Char.isSpace Char
chr
         HtmlChar.EntityRef String
"nbsp" -> Bool
True
         T
_ -> Bool
False

class StringSpace string where
   isAllSpace :: string -> Bool

instance CharSpace char => StringSpace [char] where
   isAllSpace :: [char] -> Bool
isAllSpace = forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all forall char. CharSpace char => char -> Bool
isSpace


dropSpace :: StringSpace string =>
   [PosTag.T name string] -> [PosTag.T name string]
dropSpace :: forall string name.
StringSpace string =>
[T name string] -> [T name string]
dropSpace =
   forall a. (a -> Bool) -> [a] -> [a]
dropWhile
      (\T name string
tag ->
          case forall name string. T name string -> T name string
PosTag.tag_ T name string
tag of
             Tag.Text string
text -> forall string. StringSpace string => string -> Bool
isAllSpace string
text
             T name string
_ -> Bool
False)

withoutLeadingSpace :: (StringSpace string) =>
   ([PosTag.T name string] -> (a, [PosTag.T name string])) ->
   State [PosTag.T name string] a
withoutLeadingSpace :: forall string name a.
StringSpace string =>
([T name string] -> (a, [T name string]))
-> State [T name string] a
withoutLeadingSpace [T name string] -> (a, [T name string])
f =
   forall (m :: * -> *) s. Monad m => (s -> s) -> StateT s m ()
modify forall string name.
StringSpace string =>
[T name string] -> [T name string]
dropSpace forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) s a. Monad m => (s -> (a, s)) -> StateT s m a
state [T name string] -> (a, [T name string])
f

toXmlDocument ::
   (Name.Tag name, Name.Attribute name, StringSpace string) =>
   [PosTag.T name string] -> XmlDoc name string
toXmlDocument :: forall name string.
(Tag name, Attribute name, StringSpace string) =>
[T name string] -> XmlDoc name string
toXmlDocument [T name string]
ts =
   forall a b c. (a -> b -> c) -> b -> a -> c
flip forall s a. State s a -> s -> a
evalState [T name string]
ts forall a b. (a -> b) -> a -> b
$
   do Maybe [T name string]
xml <- forall string name a.
StringSpace string =>
([T name string] -> (a, [T name string]))
-> State [T name string] a
withoutLeadingSpace forall a b. (a -> b) -> a -> b
$ \[T name string]
ts0 ->
         forall a. a -> Maybe a -> a
fromMaybe (forall a. Maybe a
Nothing, [T name string]
ts0) forall a b. (a -> b) -> a -> b
$
         do (T name string
t,[T name string]
ts1) <- forall a. [a] -> Maybe (a, [a])
viewL [T name string]
ts0
            (Name name
name, PI.Known [T name string]
attrs) <- forall name string.
T name string -> Maybe (Name name, T name string)
Tag.maybeProcessing (forall name string. T name string -> T name string
PosTag.tag_ T name string
t)
            forall (f :: * -> *). Alternative f => Bool -> f ()
guard (forall name. C name => String -> name -> Bool
Name.match String
"xml" Name name
name)
            forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> Maybe a
Just [T name string]
attrs, [T name string]
ts1)
      Maybe String
docType <- forall string name a.
StringSpace string =>
([T name string] -> (a, [T name string]))
-> State [T name string] a
withoutLeadingSpace forall a b. (a -> b) -> a -> b
$ \[T name string]
ts0 ->
         forall a. a -> Maybe a -> a
fromMaybe (forall a. Maybe a
Nothing, [T name string]
ts0) forall a b. (a -> b) -> a -> b
$
         do (T name string
t,[T name string]
ts1) <- forall a. [a] -> Maybe (a, [a])
viewL [T name string]
ts0
            (Name name
name, String
dtd) <- forall name string. T name string -> Maybe (Name name, String)
Tag.maybeSpecial (forall name string. T name string -> T name string
PosTag.tag_ T name string
t)
            forall (f :: * -> *). Alternative f => Bool -> f ()
guard (forall name. C name => String -> name -> Bool
Name.match String
TagH.doctypeString Name name
name)
            forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> Maybe a
Just String
dtd, [T name string]
ts1)
      forall (m :: * -> *) s a. Monad m => (s -> a) -> StateT s m a
gets (forall i name str.
Maybe [T name str]
-> Maybe String -> [T i name str] -> T i name str
XmlDoc.Cons Maybe [T name string]
xml Maybe String
docType forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall name string.
(Tag name, Attribute name) =>
[T name string] -> [T T name string]
TreeTagchup.toXmlTrees)


example :: IO ()
example :: IO ()
example =
  forall a. Show a => a -> IO ()
print forall b c a. (b -> c) -> (a -> b) -> a -> c
.
  (forall name string.
(Tag name, Attribute name, StringSpace string) =>
[T name string] -> XmlDoc name string
toXmlDocument :: [PosTag.T NameLC.T String] -> XmlDoc NameLC.T String) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
  forall source sink name.
(C source, StringType sink, Attribute name, Tag name) =>
source -> [T name sink]
TagParser.runSoupWithPositions
    forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< String -> IO String
readFile String
"/home/thielema/public_html/index.html"