module HaskellWorks.Data.Xml.Succinct.Index
( XmlIndex(..)
, XmlIndexAt(..)
)
where
import Control.Arrow
import Data.Monoid
import HaskellWorks.Data.Bits.BitWise
import HaskellWorks.Data.Drop
import HaskellWorks.Data.Positioning
import HaskellWorks.Data.RankSelect.Base.Rank0
import HaskellWorks.Data.RankSelect.Base.Rank1
import HaskellWorks.Data.RankSelect.Base.Select1
import HaskellWorks.Data.TreeCursor
import HaskellWorks.Data.Uncons
import HaskellWorks.Data.Xml.CharLike
import HaskellWorks.Data.Xml.Grammar
import HaskellWorks.Data.Xml.Succinct
import Prelude hiding (drop)
import qualified Data.Attoparsec.ByteString.Char8 as ABC
import qualified Data.ByteString as BS
import qualified Data.List as L
import qualified HaskellWorks.Data.BalancedParens as BP
data XmlIndex
= XmlIndexDocument [XmlIndex]
| XmlIndexElement String [XmlIndex]
| XmlIndexCData BS.ByteString
| XmlIndexComment BS.ByteString
| XmlIndexMeta String [XmlIndex]
| XmlIndexAttrList [XmlIndex]
| XmlIndexValue BS.ByteString
| XmlIndexAttrName BS.ByteString
| XmlIndexAttrValue BS.ByteString
| XmlIndexError String
deriving (Eq, Show)
data XmlIndexState
= InAttrList
| InElement
| Unknown
deriving (Eq, Show)
class XmlIndexAt a where
xmlIndexAt :: a -> XmlIndex
pos :: (Select1 v, Rank1 w) => XmlCursor t v w -> Position
pos c = lastPositionOf (select1 (interests c) (rank1 (balancedParens c) (cursorRank c)))
remText :: (Drop v, Select1 v1, Rank1 w) => XmlCursor v v1 w -> v
remText c = drop (toCount (pos c)) (cursorText c)
instance (BP.BalancedParens w, Rank0 w, Rank1 w, Select1 v, TestBit w) => XmlIndexAt (XmlCursor BS.ByteString v w) where
xmlIndexAt :: XmlCursor BS.ByteString v w -> XmlIndex
xmlIndexAt = getIndexAt Unknown
getIndexAt :: (BP.BalancedParens w, Rank0 w, Rank1 w, Select1 v, TestBit w) => XmlIndexState -> XmlCursor BS.ByteString v w -> XmlIndex
getIndexAt state k = case uncons remainder of
Just (!c, cs) | isElementStart c -> parseElem cs
Just (!c, _ ) | isSpace c -> XmlIndexAttrList $ mapValuesFrom InAttrList (firstChild k)
Just (!c, _ ) | isAttribute && isQuote c -> XmlIndexAttrValue remainder
Just _ | isAttribute -> XmlIndexAttrName remainder
Just _ -> XmlIndexValue remainder
Nothing -> XmlIndexError "End of data"
where remainder = remText k
mapValuesFrom s = L.unfoldr (fmap (getIndexAt s &&& nextSibling))
isAttribute = case state of
InAttrList -> True
InElement -> False
Unknown -> case remText <$> parent k >>= uncons of
Just (!c, _) | isSpace c -> True
_ -> False
parseElem bs =
case ABC.parse parseXmlElement bs of
ABC.Fail {} -> decodeErr "Unable to parse element name" bs
ABC.Partial _ -> decodeErr "Unexpected end of string" bs
ABC.Done i r -> case r of
XmlElementTypeCData -> XmlIndexCData i
XmlElementTypeComment -> XmlIndexComment i
XmlElementTypeMeta s -> XmlIndexMeta s (mapValuesFrom InElement $ firstChild k)
XmlElementTypeElement s -> XmlIndexElement s (mapValuesFrom InElement $ firstChild k)
XmlElementTypeDocument -> XmlIndexDocument (mapValuesFrom InElement (firstChild k) <> mapValuesFrom InElement (nextSibling k))
decodeErr :: String -> BS.ByteString -> XmlIndex
decodeErr reason bs =
XmlIndexError $ reason <>": " <> show (BS.take 20 bs) <> "...'"