{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.Readers.Docx.Util
   Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
                   2014-2024 John MacFarlane <jgm@berkeley.edu>,
                   2015 Nikolay Yakimov <root@livid.pp.ru>
   License     : GNU GPL, version 2 or above

   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
   Stability   : alpha
   Portability : portable

Docx reader utility functions.
-}
module Text.Pandoc.Readers.Docx.Util (
                                        NameSpaces
                                      , elemName
                                      , isElem
                                      , elemToNameSpaces
                                      , findChildByName
                                      , findChildrenByName
                                      , findElementByName
                                      , findAttrByName
                                      , extractChildren
                                      ) where

import qualified Data.Text as T
import Data.Text (Text)
import Text.Pandoc.XML.Light
import qualified Data.Map as M
import Data.List (partition)

type NameSpaces = M.Map Text Text

elemToNameSpaces :: Element -> NameSpaces
elemToNameSpaces :: Element -> NameSpaces
elemToNameSpaces = (Attr -> NameSpaces -> NameSpaces)
-> NameSpaces -> [Attr] -> NameSpaces
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\(Attr QName
qn Text
val) ->
                             case QName
qn of
                               QName Text
s Maybe Text
_ (Just Text
"xmlns") -> Text -> Text -> NameSpaces -> NameSpaces
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert Text
s Text
val
                               QName
_ -> NameSpaces -> NameSpaces
forall a. a -> a
id) NameSpaces
forall a. Monoid a => a
mempty ([Attr] -> NameSpaces)
-> (Element -> [Attr]) -> Element -> NameSpaces
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Element -> [Attr]
elAttribs

elemName :: NameSpaces -> Text -> Text -> QName
elemName :: NameSpaces -> Text -> Text -> QName
elemName NameSpaces
ns Text
prefix Text
name =
  Text -> Maybe Text -> Maybe Text -> QName
QName Text
name (Text -> NameSpaces -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
prefix NameSpaces
ns)
             (if Text -> Bool
T.null Text
prefix then Maybe Text
forall a. Maybe a
Nothing else Text -> Maybe Text
forall a. a -> Maybe a
Just Text
prefix)

isElem :: NameSpaces -> Text -> Text -> Element -> Bool
isElem :: NameSpaces -> Text -> Text -> Element -> Bool
isElem NameSpaces
ns Text
prefix Text
name Element
element =
  let ns' :: NameSpaces
ns' = NameSpaces
ns NameSpaces -> NameSpaces -> NameSpaces
forall a. Semigroup a => a -> a -> a
<> Element -> NameSpaces
elemToNameSpaces Element
element
  in QName -> Text
qName (Element -> QName
elName Element
element) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
name Bool -> Bool -> Bool
&&
     QName -> Maybe Text
qURI (Element -> QName
elName Element
element) Maybe Text -> Maybe Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> NameSpaces -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
prefix NameSpaces
ns'

findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
pref Text
name Element
el =
  let ns' :: NameSpaces
ns' = NameSpaces
ns NameSpaces -> NameSpaces -> NameSpaces
forall a. Semigroup a => a -> a -> a
<> Element -> NameSpaces
elemToNameSpaces Element
el
  in  QName -> Element -> Maybe Element
findChild (NameSpaces -> Text -> Text -> QName
elemName NameSpaces
ns' Text
pref Text
name) Element
el

findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element]
findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element]
findChildrenByName NameSpaces
ns Text
pref Text
name Element
el =
  let ns' :: NameSpaces
ns' = NameSpaces
ns NameSpaces -> NameSpaces -> NameSpaces
forall a. Semigroup a => a -> a -> a
<> Element -> NameSpaces
elemToNameSpaces Element
el
  in  QName -> Element -> [Element]
findChildren (NameSpaces -> Text -> Text -> QName
elemName NameSpaces
ns' Text
pref Text
name) Element
el

-- | Like 'findChildrenByName', but searches descendants.
findElementByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
findElementByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
findElementByName NameSpaces
ns Text
pref Text
name Element
el =
  let ns' :: NameSpaces
ns' = NameSpaces
ns NameSpaces -> NameSpaces -> NameSpaces
forall a. Semigroup a => a -> a -> a
<> Element -> NameSpaces
elemToNameSpaces Element
el
  in  QName -> Element -> Maybe Element
findElement (NameSpaces -> Text -> Text -> QName
elemName NameSpaces
ns' Text
pref Text
name) Element
el

findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text
findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text
findAttrByName NameSpaces
ns Text
pref Text
name Element
el =
  let ns' :: NameSpaces
ns' = NameSpaces
ns NameSpaces -> NameSpaces -> NameSpaces
forall a. Semigroup a => a -> a -> a
<> Element -> NameSpaces
elemToNameSpaces Element
el
  in  QName -> Element -> Maybe Text
findAttr (NameSpaces -> Text -> Text -> QName
elemName NameSpaces
ns' Text
pref Text
name) Element
el


-- | Removes child elements that satisfy a given condition.
-- Returns the modified element and the list of removed children.
extractChildren :: Element -> (Element -> Bool) -> Maybe (Element, [Element])
extractChildren :: Element -> (Element -> Bool) -> Maybe (Element, [Element])
extractChildren Element
el Element -> Bool
condition
  | [Element] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Element]
removedChildren = Maybe (Element, [Element])
forall a. Maybe a
Nothing  -- No children removed, return Nothing
  | Bool
otherwise = (Element, [Element]) -> Maybe (Element, [Element])
forall a. a -> Maybe a
Just (Element
modifiedElement, [Element]
removedChildren)  -- Children removed, return Just
  where
    -- Separate the children based on the condition
    ([Element]
removedChildren, [Element]
keptChildren) = (Element -> Bool) -> [Element] -> ([Element], [Element])
forall a. (a -> Bool) -> [a] -> ([a], [a])
partition Element -> Bool
condition ([Content] -> [Element]
onlyElems' ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
el)

    -- Helper function to filter only Element types from Content
    onlyElems' :: [Content] -> [Element]
    onlyElems' :: [Content] -> [Element]
onlyElems' = (Content -> [Element] -> [Element])
-> [Element] -> [Content] -> [Element]
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\Content
c [Element]
acc -> case Content
c of
                                   Elem Element
e -> Element
e Element -> [Element] -> [Element]
forall a. a -> [a] -> [a]
: [Element]
acc
                                   Content
_      -> [Element]
acc) []

    -- Reconstruct the element with the kept children
    modifiedElement :: Element
modifiedElement = Element
el { elContent = map Elem keptChildren }