module Text.XML.HXT.Arrow.Edit
( canonicalizeAllNodes
, canonicalizeForXPath
, canonicalizeContents
, collapseAllXText
, collapseXText
, xshowEscapeXml
, escapeXmlRefs
, escapeHtmlRefs
, haskellRepOfXmlDoc
, treeRepOfXmlDoc
, addHeadlineToXmlDoc
, indentDoc
, numberLinesInXmlDoc
, preventEmptyElements
, removeComment
, removeAllComment
, removeWhiteSpace
, removeAllWhiteSpace
, removeDocWhiteSpace
, transfCdata
, transfAllCdata
, transfCharRef
, transfAllCharRef
, substAllXHTMLEntityRefs
, substXHTMLEntityRef
, rememberDTDAttrl
, addDefaultDTDecl
, hasXmlPi
, addXmlPi
, addXmlPiEncoding
, addDoctypeDecl
, addXHtmlDoctypeStrict
, addXHtmlDoctypeTransitional
, addXHtmlDoctypeFrameset
)
where
import Control.Arrow
import Control.Arrow.ArrowList
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowTree
import Control.Arrow.ListArrow
import Control.Arrow.NTreeEdit
import Data.Char.Properties.XMLCharProps ( isXmlSpaceChar )
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.DOM.Interface
import qualified Text.XML.HXT.DOM.XmlNode as XN
import qualified Text.XML.HXT.DOM.ShowXml as XS
import Text.XML.HXT.DOM.FormatXmlTree ( formatXmlTree )
import Text.XML.HXT.Parser.HtmlParsec ( emptyHtmlTags )
import Text.XML.HXT.Parser.XmlEntities ( xmlEntities )
import Text.XML.HXT.Parser.XhtmlEntities ( xhtmlEntities )
import Data.List ( isPrefixOf )
import qualified Data.Map as M
import Data.Maybe
canonicalizeTree' :: LA XmlTree XmlTree -> LA XmlTree XmlTree
canonicalizeTree' toBeRemoved
= processChildren
( (none `when` (isText <+> isXmlPi))
>>>
(deep isPi `when` isDTD)
)
>>>
canonicalizeNodes toBeRemoved
canonicalizeNodes :: LA XmlTree XmlTree -> LA XmlTree XmlTree
canonicalizeNodes toBeRemoved
= editNTreeA $
[ toBeRemoved :-> none
, ( isElem >>> getAttrl >>> getChildren >>> isCharRef )
:-> ( processAttrl
( processChildren transfCharRef
>>>
collapseXText'
)
>>>
( collapseXText'
`when`
(getChildren >>. has2XText)
)
)
, ( isElem >>> (getChildren >>. has2XText) )
:-> collapseXText'
, isCharRef :-> ( getCharRef
>>>
arr (\ i -> [toEnum i])
>>>
mkText
)
, isCdata :-> ( getCdata
>>>
mkText
)
]
canonicalizeAllNodes :: ArrowList a => a XmlTree XmlTree
canonicalizeAllNodes = fromLA $
canonicalizeTree' isCmt
canonicalizeForXPath :: ArrowList a => a XmlTree XmlTree
canonicalizeForXPath = fromLA $ canonicalizeTree' none
canonicalizeContents :: ArrowList a => a XmlTree XmlTree
canonicalizeContents = fromLA $
canonicalizeNodes none
has2XText :: XmlTrees -> XmlTrees
has2XText ts0@(t1 : ts1@(t2 : ts2))
| XN.isText t1 = if XN.isText t2
then ts0
else has2XText ts2
| otherwise = has2XText ts1
has2XText _ = []
collapseXText' :: LA XmlTree XmlTree
collapseXText'
= replaceChildren ( listA getChildren >>> arrL (foldr mergeText' []) )
where
mergeText' :: XmlTree -> XmlTrees -> XmlTrees
mergeText' t1 (t2 : ts2)
| XN.isText t1 && XN.isText t2
= let
s1 = fromJust . XN.getText $ t1
s2 = fromJust . XN.getText $ t2
t = XN.mkText (s1 ++ s2)
in
t : ts2
mergeText' t1 ts
= t1 : ts
collapseXText :: ArrowList a => a XmlTree XmlTree
collapseXText = fromLA collapseXText'
collapseAllXText :: ArrowList a => a XmlTree XmlTree
collapseAllXText = fromLA $ processBottomUp collapseXText'
xshowEscapeXml :: ArrowXml a => a n XmlTree -> a n String
xshowEscapeXml f = f >. (uncurry XS.xshow'' escapeXmlRefs)
type EntityRefTable = M.Map Int String
xmlEntityRefTable
, xhtmlEntityRefTable :: EntityRefTable
xmlEntityRefTable = buildEntityRefTable $ xmlEntities
xhtmlEntityRefTable = buildEntityRefTable $ xhtmlEntities
buildEntityRefTable :: [(String, Int)] -> EntityRefTable
buildEntityRefTable = M.fromList . map (\ (x,y) -> (y,x) )
type EntitySubstTable = M.Map String String
xhtmlEntitySubstTable :: EntitySubstTable
xhtmlEntitySubstTable = M.fromList . map (second $ (:[]) . toEnum) $ xhtmlEntities
substXHTMLEntityRef :: LA XmlTree XmlTree
substXHTMLEntityRef
= ( getEntityRef
>>>
arrL subst
>>>
mkText
)
`orElse` this
where
subst name
= maybe [] (:[]) $ M.lookup name xhtmlEntitySubstTable
substAllXHTMLEntityRefs :: ArrowXml a => a XmlTree XmlTree
substAllXHTMLEntityRefs
= fromLA $
processBottomUp substXHTMLEntityRef
escapeXmlRefs :: (Char -> String -> String, Char -> String -> String)
escapeXmlRefs = (cquote, aquote)
where
cquote c
| c `elem` "<&" = ('&' :)
. ((lookupRef c xmlEntityRefTable) ++)
. (';' :)
| otherwise = (c :)
aquote c
| c `elem` "<>\"\'&\n\r\t"
= ('&' :)
. ((lookupRef c xmlEntityRefTable) ++)
. (';' :)
| otherwise = (c :)
escapeHtmlRefs :: (Char -> String -> String, Char -> String -> String)
escapeHtmlRefs = (cquote, aquote)
where
cquote c
| isHtmlTextEsc c
= ('&' :)
. ((lookupRef c xhtmlEntityRefTable) ++)
. (';' :)
| otherwise = (c :)
aquote c
| isHtmlAttrEsc c
= ('&' :)
. ((lookupRef c xhtmlEntityRefTable) ++)
. (';' :)
| otherwise = (c :)
isHtmlTextEsc c = c >= toEnum(128) || ( c `elem` "<&" )
isHtmlAttrEsc c = c >= toEnum(128) || ( c `elem` "<>\"\'&\n\r\t" )
lookupRef :: Char -> EntityRefTable -> String
lookupRef c = fromMaybe ('#' : show (fromEnum c))
. M.lookup (fromEnum c)
preventEmptyElements :: ArrowList a => [String] -> Bool -> a XmlTree XmlTree
preventEmptyElements ns isHtml
= fromLA $
editNTreeA [ ( isElem
>>>
isNoneEmpty
>>>
neg getChildren
)
:-> replaceChildren (txt "")
]
where
isNoneEmpty
| not (null ns) = hasNameWith (localPart >>> (`elem` ns))
| isHtml = hasNameWith (localPart >>> (`notElem` emptyHtmlTags))
| otherwise = this
haskellRepOfXmlDoc :: ArrowList a => a XmlTree XmlTree
haskellRepOfXmlDoc
= fromLA $
root [getAttrl] [show ^>> mkText]
numberLinesInXmlDoc :: ArrowList a => a XmlTree XmlTree
numberLinesInXmlDoc
= fromLA $
processChildren (changeText numberLines)
where
numberLines :: String -> String
numberLines str
= concat $
zipWith (\ n l -> lineNr n ++ l ++ "\n") [1..] (lines str)
where
lineNr :: Int -> String
lineNr n = (reverse (take 6 (reverse (show n) ++ replicate 6 ' '))) ++ " "
treeRepOfXmlDoc :: ArrowList a => a XmlTree XmlTree
treeRepOfXmlDoc
= fromLA $
root [getAttrl] [formatXmlTree ^>> mkText]
addHeadlineToXmlDoc :: ArrowXml a => a XmlTree XmlTree
addHeadlineToXmlDoc
= fromLA $ ( addTitle $< (getAttrValue a_source >>^ formatTitle) )
where
addTitle str
= replaceChildren ( txt str <+> getChildren <+> txt "\n" )
formatTitle str
= "\n" ++ headline ++ "\n" ++ underline ++ "\n\n"
where
headline = "content of: " ++ str
underline = map (const '=') headline
removeComment :: ArrowXml a => a XmlTree XmlTree
removeComment = none `when` isCmt
removeAllComment :: ArrowXml a => a XmlTree XmlTree
removeAllComment = fromLA $ editNTreeA [isCmt :-> none]
removeWhiteSpace :: ArrowXml a => a XmlTree XmlTree
removeWhiteSpace = fromLA $ none `when` isWhiteSpace
removeAllWhiteSpace :: ArrowXml a => a XmlTree XmlTree
removeAllWhiteSpace = fromLA $ editNTreeA [isWhiteSpace :-> none]
removeDocWhiteSpace :: ArrowXml a => a XmlTree XmlTree
removeDocWhiteSpace = fromLA $ removeRootWhiteSpace
removeRootWhiteSpace :: LA XmlTree XmlTree
removeRootWhiteSpace
= processChildren processRootElement
`when`
isRoot
where
processRootElement :: LA XmlTree XmlTree
processRootElement
= removeWhiteSpace >>> processChild
where
processChild
= choiceA [ isDTD
:-> removeAllWhiteSpace
, this
:-> replaceChildren ( getChildren
>>. indentTrees insertNothing False 1
)
]
indentDoc :: ArrowXml a => a XmlTree XmlTree
indentDoc = fromLA $
( ( isRoot `guards` indentRoot )
`orElse`
(root [] [this] >>> indentRoot >>> getChildren)
)
indentRoot :: LA XmlTree XmlTree
indentRoot = processChildren indentRootChildren
where
indentRootChildren
= removeText >>> indentChild >>> insertNL
where
removeText = none `when` isText
insertNL = this <+> txt "\n"
indentChild = ( replaceChildren
( getChildren
>>.
indentTrees (insertIndentation 2) False 1
)
`whenNot` isDTD
)
indentTrees :: (Int -> LA XmlTree XmlTree) -> Bool -> Int -> XmlTrees -> XmlTrees
indentTrees _ _ _ []
= []
indentTrees indentFilter preserveSpace level ts
= runLAs lsf ls
++
indentRest rs
where
runLAs f l
= runLA (constL l >>> f) undefined
(ls, rs)
= break XN.isElem ts
isSignificant :: Bool
isSignificant
= preserveSpace
||
(not . null . runLAs isSignificantPart) ls
isSignificantPart :: LA XmlTree XmlTree
isSignificantPart
= catA
[ isText `guards` neg isWhiteSpace
, isCdata
, isCharRef
, isEntityRef
]
lsf :: LA XmlTree XmlTree
lsf
| isSignificant
= this
| otherwise
= (none `when` isWhiteSpace)
>>>
(indentFilter level <+> this)
indentRest :: XmlTrees -> XmlTrees
indentRest []
| isSignificant
= []
| otherwise
= runLA (indentFilter (level 1)) undefined
indentRest (t':ts')
= runLA ( ( indentElem
>>>
lsf
)
`when` isElem
) t'
++
( if null ts'
then indentRest
else indentTrees indentFilter preserveSpace level
) ts'
where
indentElem
= replaceChildren ( getChildren
>>.
indentChildren
)
xmlSpaceAttrValue :: String
xmlSpaceAttrValue
= concat . runLA (getAttrValue "xml:space") $ t'
preserveSpace' :: Bool
preserveSpace'
= ( fromMaybe preserveSpace
.
lookup xmlSpaceAttrValue
) [ ("preserve", True)
, ("default", False)
]
indentChildren :: XmlTrees -> XmlTrees
indentChildren cs'
| all (maybe False (all isXmlSpaceChar) . XN.getText) cs'
= []
| otherwise
= indentTrees indentFilter preserveSpace' (level + 1) cs'
insertIndentation :: Int -> Int -> LA a XmlTree
insertIndentation indentWidth level
= txt ('\n' : replicate (level * indentWidth) ' ')
insertNothing :: Int -> LA a XmlTree
insertNothing _ = none
transfCdata :: ArrowXml a => a XmlTree XmlTree
transfCdata = fromLA $
(getCdata >>> mkText) `when` isCdata
transfAllCdata :: ArrowXml a => a XmlTree XmlTree
transfAllCdata = fromLA $ editNTreeA [isCdata :-> (getCdata >>> mkText)]
transfCharRef :: ArrowXml a => a XmlTree XmlTree
transfCharRef = fromLA $
( getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText )
`when`
isCharRef
transfAllCharRef :: ArrowXml a => a XmlTree XmlTree
transfAllCharRef = fromLA $ editNTreeA [isCharRef :-> (getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText)]
rememberDTDAttrl :: ArrowList a => a XmlTree XmlTree
rememberDTDAttrl
= fromLA $
( ( addDTDAttrl $< ( getChildren >>> isDTDDoctype >>> getDTDAttrl ) )
`orElse`
this
)
where
addDTDAttrl al
= seqA . map (uncurry addAttr) . map (first (dtdPrefix ++)) $ al
addDefaultDTDecl :: ArrowList a => a XmlTree XmlTree
addDefaultDTDecl
= fromLA $
( addDTD $< listA (getAttrl >>> (getName &&& xshow getChildren) >>> hasDtdPrefix) )
where
hasDtdPrefix
= isA (fst >>> (dtdPrefix `isPrefixOf`))
>>>
arr (first (drop (length dtdPrefix)))
addDTD []
= this
addDTD al
= replaceChildren
( mkDTDDoctype al none
<+>
txt "\n"
<+>
( getChildren >>> (none `when` isDTDDoctype) )
)
hasXmlPi :: ArrowXml a => a XmlTree XmlTree
hasXmlPi
= fromLA
( getChildren
>>>
isPi
>>>
hasName t_xml
)
addXmlPi :: ArrowXml a => a XmlTree XmlTree
addXmlPi
= fromLA
( insertChildrenAt 0 ( ( mkPi (mkName t_xml) none
>>>
addAttr a_version "1.0"
)
<+>
txt "\n"
)
`whenNot`
hasXmlPi
)
addXmlPiEncoding :: ArrowXml a => String -> a XmlTree XmlTree
addXmlPiEncoding enc
= fromLA $
processChildren ( addAttr a_encoding enc
`when`
( isPi >>> hasName t_xml )
)
addXHtmlDoctypeStrict
, addXHtmlDoctypeTransitional
, addXHtmlDoctypeFrameset :: ArrowXml a => a XmlTree XmlTree
addXHtmlDoctypeStrict
= addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
addXHtmlDoctypeTransitional
= addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
addXHtmlDoctypeFrameset
= addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"
addDoctypeDecl :: ArrowXml a => String -> String -> String -> a XmlTree XmlTree
addDoctypeDecl rootElem public system
= fromLA $
replaceChildren
( mkDTDDoctype ( ( if null public then id else ( (k_public, public) : ) )
.
( if null system then id else ( (k_system, system) : ) )
$ [ (a_name, rootElem) ]
) none
<+>
txt "\n"
<+>
getChildren
)