module Text.XML.HXT.Arrow.Edit
    ( canonicalizeAllNodes
    , canonicalizeForXPath
    , canonicalizeContents
    , collapseAllXText
    , collapseXText
    , xshowEscapeXml
    , escapeXmlRefs
    , escapeHtmlRefs
    , haskellRepOfXmlDoc
    , treeRepOfXmlDoc
    , addHeadlineToXmlDoc
    , indentDoc
    , numberLinesInXmlDoc
    , preventEmptyElements
    , removeComment
    , removeAllComment
    , removeWhiteSpace
    , removeAllWhiteSpace
    , removeDocWhiteSpace
    , transfCdata
    , transfAllCdata
    , transfCharRef
    , transfAllCharRef
    , substAllXHTMLEntityRefs
    , substXHTMLEntityRef
    , rememberDTDAttrl
    , addDefaultDTDecl
    , hasXmlPi
    , addXmlPi
    , addXmlPiEncoding
    , addDoctypeDecl
    , addXHtmlDoctypeStrict
    , addXHtmlDoctypeTransitional
    , addXHtmlDoctypeFrameset
    )
where
import           Control.Arrow
import           Control.Arrow.ArrowList
import           Control.Arrow.ArrowIf
import           Control.Arrow.ArrowTree
import           Control.Arrow.ListArrow
import           Control.Arrow.NTreeEdit
import           Data.Char.Properties.XMLCharProps      ( isXmlSpaceChar )
import           Text.XML.HXT.Arrow.XmlArrow
import           Text.XML.HXT.DOM.Interface
import qualified Text.XML.HXT.DOM.XmlNode               as XN
import qualified Text.XML.HXT.DOM.ShowXml               as XS
import           Text.XML.HXT.DOM.FormatXmlTree         ( formatXmlTree )
import           Text.XML.HXT.Parser.HtmlParsec         ( emptyHtmlTags )
import           Text.XML.HXT.Parser.XmlEntities        ( xmlEntities )
import           Text.XML.HXT.Parser.XhtmlEntities      ( xhtmlEntities )
import           Data.List                              ( isPrefixOf )
import qualified Data.Map                               as M
import           Data.Maybe
canonicalizeTree'       :: LA XmlTree XmlTree -> LA XmlTree XmlTree
canonicalizeTree' toBeRemoved
    = ( processChildren
        ( (none `when` (isText <+> isXmlPi))    
          >>>
          (deep isPi `when` isDTD)              
        )
        `when` isRoot
      )
      >>>
      canonicalizeNodes toBeRemoved
canonicalizeNodes       :: LA XmlTree XmlTree -> LA XmlTree XmlTree
canonicalizeNodes toBeRemoved
    = editNTreeA $
      [ toBeRemoved     :-> none
      , ( isElem >>> getAttrl >>> getChildren >>> isCharRef )   
                        :-> ( processAttrl
                              ( processChildren transfCharRef
                                >>>
                                collapseXText'                  
                              )
                              >>>
                              ( collapseXText'                  
                                `when`
                                (getChildren >>. has2XText)
                              )
                            )
      , ( isElem >>> (getChildren >>. has2XText) )
                        :-> collapseXText'                      
      , isCharRef       :-> ( getCharRef
                              >>>
                              arr (\ i -> [toEnum i])
                              >>>
                              mkText
                            )
      , isCdata         :-> ( getCdata
                              >>>
                              mkText
                            )
      ]
canonicalizeAllNodes    :: ArrowList a => a XmlTree XmlTree
canonicalizeAllNodes    = fromLA $
                          canonicalizeTree' isCmt                       
canonicalizeForXPath    :: ArrowList a => a XmlTree XmlTree
canonicalizeForXPath    = fromLA $ canonicalizeTree' none               
canonicalizeContents    :: ArrowList a => a XmlTree XmlTree
canonicalizeContents    = fromLA $
                          canonicalizeNodes none
has2XText               :: XmlTrees -> XmlTrees
has2XText ts0@(t1 : ts1@(t2 : ts2))
    | XN.isText t1      = if XN.isText t2
                          then ts0
                          else has2XText ts2
    | otherwise         = has2XText ts1
has2XText _             = []
collapseXText'          :: LA XmlTree XmlTree
collapseXText'
    = replaceChildren ( listA getChildren >>> arrL (foldr mergeText' []) )
    where
    mergeText'  :: XmlTree -> XmlTrees -> XmlTrees
    mergeText' t1 (t2 : ts2)
        | XN.isText t1 && XN.isText t2
            = let
              s1 = fromJust . XN.getText $ t1
              s2 = fromJust . XN.getText $ t2
              t  = XN.mkText (s1 ++ s2)
              in
              t : ts2
    mergeText' t1 ts
        = t1 : ts
collapseXText           :: ArrowList a => a XmlTree XmlTree
collapseXText           = fromLA collapseXText'
collapseAllXText        :: ArrowList a => a XmlTree XmlTree
collapseAllXText        = fromLA $ processBottomUp collapseXText'
xshowEscapeXml          :: ArrowXml a => a n XmlTree -> a n String
xshowEscapeXml f        = f >. (uncurry XS.xshow'' escapeXmlRefs)
type EntityRefTable     = M.Map Int String
xmlEntityRefTable
 , xhtmlEntityRefTable  :: EntityRefTable
xmlEntityRefTable       = buildEntityRefTable $ xmlEntities
xhtmlEntityRefTable     = buildEntityRefTable $ xhtmlEntities
buildEntityRefTable     :: [(String, Int)] -> EntityRefTable
buildEntityRefTable     = M.fromList . map (\ (x,y) -> (y,x) )
type EntitySubstTable   = M.Map String String
xhtmlEntitySubstTable   :: EntitySubstTable
xhtmlEntitySubstTable   = M.fromList . map (second $ (:[]) . toEnum) $ xhtmlEntities
substXHTMLEntityRef	:: LA XmlTree XmlTree
substXHTMLEntityRef
    = ( getEntityRef
        >>>
        arrL subst
        >>>
        mkText
      )
      `orElse` this
    where
      subst name
          = maybe [] (:[]) $ M.lookup name xhtmlEntitySubstTable
substAllXHTMLEntityRefs	:: ArrowXml a => a XmlTree XmlTree
substAllXHTMLEntityRefs
    = fromLA $
      processBottomUp substXHTMLEntityRef
escapeXmlRefs           :: (Char -> String -> String, Char -> String -> String)
escapeXmlRefs           = (cquote, aquote)
    where
    cquote c
        | c `elem` "<&" = ('&' :)
                          . ((lookupRef c xmlEntityRefTable) ++)
                          . (';' :)
        | otherwise     = (c :)
    aquote c
        | c `elem` "<>\"\'&\n\r\t"
                        = ('&' :)
                          . ((lookupRef c xmlEntityRefTable) ++)
                          . (';' :)
        | otherwise     = (c :)
escapeHtmlRefs          :: (Char -> String -> String, Char -> String -> String)
escapeHtmlRefs          = (cquote, aquote)
    where
    cquote c
        | isHtmlTextEsc c
                        = ('&' :)
                          . ((lookupRef c xhtmlEntityRefTable) ++)
                          . (';' :)
        | otherwise     = (c :)
    aquote c
        | isHtmlAttrEsc c
                        = ('&' :)
                          . ((lookupRef c xhtmlEntityRefTable) ++)
                          . (';' :)
        | otherwise     = (c :)
    isHtmlTextEsc c     = c >= toEnum(128) || ( c `elem` "<&" )
    isHtmlAttrEsc c     = c >= toEnum(128) || ( c `elem` "<>\"\'&\n\r\t" )
lookupRef               :: Char -> EntityRefTable -> String
lookupRef c             = fromMaybe ('#' : show (fromEnum c))
                          . M.lookup (fromEnum c)
preventEmptyElements    :: ArrowList a => [String] -> Bool -> a XmlTree XmlTree
preventEmptyElements ns isHtml
    = fromLA $
      editNTreeA [ ( isElem
                     >>>
                     isNoneEmpty
                     >>>
                     neg getChildren
                   )
                   :-> replaceChildren (txt "")
                 ]
    where
    isNoneEmpty
        | not (null ns) = hasNameWith (localPart >>> (`elem` ns))
        | isHtml        = hasNameWith (localPart >>> (`notElem` emptyHtmlTags))
        | otherwise     = this
haskellRepOfXmlDoc      :: ArrowList a => a XmlTree XmlTree
haskellRepOfXmlDoc
    = fromLA $
      root [getAttrl] [show ^>> mkText]
numberLinesInXmlDoc     :: ArrowList a => a XmlTree XmlTree
numberLinesInXmlDoc
    = fromLA $
      processChildren (changeText numberLines)
    where
    numberLines :: String -> String
    numberLines str
        = concat $
          zipWith (\ n l -> lineNr n ++ l ++ "\n") [1..] (lines str)
        where
        lineNr   :: Int -> String
        lineNr n = (reverse (take 6 (reverse (show n) ++ replicate 6 ' '))) ++ "  "
treeRepOfXmlDoc :: ArrowList a => a XmlTree XmlTree
treeRepOfXmlDoc
    = fromLA $
      root [getAttrl] [formatXmlTree ^>> mkText]
addHeadlineToXmlDoc     :: ArrowXml a => a XmlTree XmlTree
addHeadlineToXmlDoc
    = fromLA $ ( addTitle $< (getAttrValue a_source >>^ formatTitle) )
    where
    addTitle str
        = replaceChildren ( txt str <+> getChildren <+> txt "\n" )
    formatTitle str
        = "\n" ++ headline ++ "\n" ++ underline ++ "\n\n"
        where
        headline  = "content of: " ++ str
        underline = map (const '=') headline
removeComment           :: ArrowXml a => a XmlTree XmlTree
removeComment           = none `when` isCmt
removeAllComment        :: ArrowXml a => a XmlTree XmlTree
removeAllComment        = fromLA $ editNTreeA [isCmt :-> none]
removeWhiteSpace        :: ArrowXml a => a XmlTree XmlTree
removeWhiteSpace        = fromLA $ none `when` isWhiteSpace
removeAllWhiteSpace     :: ArrowXml a => a XmlTree XmlTree
removeAllWhiteSpace     = fromLA $ editNTreeA [isWhiteSpace :-> none]
                       
removeDocWhiteSpace     :: ArrowXml a => a XmlTree XmlTree
removeDocWhiteSpace     = fromLA $ removeRootWhiteSpace
removeRootWhiteSpace    :: LA XmlTree XmlTree
removeRootWhiteSpace
    =  processChildren processRootElement
       `when`
       isRoot
    where
    processRootElement  :: LA XmlTree XmlTree
    processRootElement
        = removeWhiteSpace >>> processChild
        where
        processChild
            = choiceA [ isDTD
                        :-> removeAllWhiteSpace                 
                      , this
                        :-> replaceChildren ( getChildren
                                              >>. indentTrees insertNothing False 1
                                            )
                      ]
indentDoc               :: ArrowXml a => a XmlTree XmlTree
indentDoc               = fromLA $
                          ( ( isRoot `guards` indentRoot )
                            `orElse`
                            (root [] [this] >>> indentRoot >>> getChildren)
                          )
indentRoot              :: LA XmlTree XmlTree
indentRoot              = processChildren indentRootChildren
    where
    indentRootChildren
        = removeText >>> indentChild >>> insertNL
        where
        removeText      = none `when` isText
        insertNL        = this <+> txt "\n"
        indentChild     = ( replaceChildren
                            ( getChildren
                              >>.
                              indentTrees (insertIndentation 2) False 1
                            )
                            `whenNot` isDTD
                          )
indentTrees     :: (Int -> LA XmlTree XmlTree) -> Bool -> Int -> XmlTrees -> XmlTrees
indentTrees _ _ _ []
    = []
indentTrees indentFilter preserveSpace level ts
    = runLAs lsf ls
      ++
      indentRest rs
      where
      runLAs f l
          = runLA (constL l >>> f) undefined
      (ls, rs)
          = break XN.isElem ts
      isSignificant     :: Bool
      isSignificant
          = preserveSpace
            ||
            (not . null . runLAs isSignificantPart) ls
      isSignificantPart :: LA XmlTree XmlTree
      isSignificantPart
          = catA
            [ isText `guards` neg isWhiteSpace
            , isCdata
            , isCharRef
            , isEntityRef
            ]
      lsf       :: LA XmlTree XmlTree
      lsf
          | isSignificant
              = this
          | otherwise
              = (none `when` isWhiteSpace)
                >>>
                (indentFilter level <+> this)
      indentRest        :: XmlTrees -> XmlTrees
      indentRest []
          | isSignificant
              = []
          | otherwise
              = runLA (indentFilter (level  1)) undefined
      indentRest (t':ts')
          = runLA ( ( indentElem
                      >>>
                      lsf
                    )
                    `when` isElem
                  ) t'
            ++
            ( if null ts'
              then indentRest
              else indentTrees indentFilter preserveSpace level
            ) ts'
          where
          indentElem
              = replaceChildren ( getChildren
                                  >>.
                                  indentChildren
                                )
          xmlSpaceAttrValue     :: String
          xmlSpaceAttrValue
              = concat . runLA (getAttrValue "xml:space") $ t'
          preserveSpace'        :: Bool
          preserveSpace'
              = ( fromMaybe preserveSpace
                  .
                  lookup xmlSpaceAttrValue
                ) [ ("preserve", True)
                  , ("default",  False)
                  ]
          indentChildren        :: XmlTrees -> XmlTrees
          indentChildren cs'
              | all (maybe False (all isXmlSpaceChar) . XN.getText) cs'
                  = []
              | otherwise
                  = indentTrees indentFilter preserveSpace' (level + 1) cs'
insertIndentation       :: Int -> Int -> LA a XmlTree
insertIndentation indentWidth level
    = txt ('\n' : replicate (level * indentWidth) ' ')
insertNothing           :: Int -> LA a XmlTree
insertNothing _         = none
transfCdata             :: ArrowXml a => a XmlTree XmlTree
transfCdata             = fromLA $
                          (getCdata >>> mkText) `when` isCdata
transfAllCdata          :: ArrowXml a => a XmlTree XmlTree
transfAllCdata          = fromLA $ editNTreeA [isCdata :-> (getCdata >>> mkText)]
transfCharRef           :: ArrowXml a => a XmlTree XmlTree
transfCharRef           = fromLA $
                          ( getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText )
                          `when`
                          isCharRef
transfAllCharRef        :: ArrowXml a => a XmlTree XmlTree
transfAllCharRef        = fromLA $ editNTreeA [isCharRef :-> (getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText)]
rememberDTDAttrl        :: ArrowList a => a XmlTree XmlTree
rememberDTDAttrl
    = fromLA $
      ( ( addDTDAttrl $< ( getChildren >>> isDTDDoctype >>> getDTDAttrl ) )
        `orElse`
        this
      )
    where
    addDTDAttrl al
        = seqA . map (uncurry addAttr) . map (first (dtdPrefix ++)) $ al
addDefaultDTDecl        :: ArrowList a => a XmlTree XmlTree
addDefaultDTDecl
    = fromLA $
      ( addDTD $< listA (getAttrl >>> (getName &&& xshow getChildren) >>> hasDtdPrefix) )
    where
    hasDtdPrefix
        = isA (fst >>> (dtdPrefix `isPrefixOf`))
          >>>
          arr (first (drop (length dtdPrefix)))
    addDTD []
        = this
    addDTD al
        = replaceChildren
          ( mkDTDDoctype al none
            <+>
            txt "\n"
            <+>
            ( getChildren >>> (none `when` isDTDDoctype) )      
          )
hasXmlPi                :: ArrowXml a => a XmlTree XmlTree
hasXmlPi
    = fromLA
      ( getChildren
        >>>
        isPi
        >>>
        hasName t_xml
      )
addXmlPi                :: ArrowXml a => a XmlTree XmlTree
addXmlPi
    = fromLA
      ( insertChildrenAt 0 ( ( mkPi (mkName t_xml) none
                               >>>
                               addAttr a_version "1.0"
                             )
                             <+>
                             txt "\n"
                           )
        `whenNot`
        hasXmlPi
      )
addXmlPiEncoding        :: ArrowXml a => String -> a XmlTree XmlTree
addXmlPiEncoding enc
    = fromLA $
      processChildren ( addAttr a_encoding enc
                        `when`
                        ( isPi >>> hasName t_xml )
                      )
addXHtmlDoctypeStrict
  , addXHtmlDoctypeTransitional
  , addXHtmlDoctypeFrameset     :: ArrowXml a => a XmlTree XmlTree
addXHtmlDoctypeStrict
    = addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
addXHtmlDoctypeTransitional
    = addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
addXHtmlDoctypeFrameset
    = addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"
addDoctypeDecl  :: ArrowXml a => String -> String -> String -> a XmlTree XmlTree
addDoctypeDecl rootElem public system
    = fromLA $
      replaceChildren
      ( mkDTDDoctype ( ( if null public then id else ( (k_public, public) : ) )
                       .
                       ( if null system then id else ( (k_system, system) : ) )
                       $  [ (a_name, rootElem) ]
                     ) none
        <+>
        txt "\n"
        <+>
        getChildren
      )