module Text.SXML
(
readFile
,parseString
,parseText
,parseLazyText
,writeFile
,renderString
,renderText
,renderLazyText
) where
import Data.Monoid
import Data.Maybe
import Data.List
import Text.SXML.Internal
import Text.ParserCombinators.Poly.State
import Prelude hiding (readFile, writeFile)
import Control.Applicative hiding ((<|>),many)
import qualified System.IO as IO
import qualified Data.XML.Types as XT
import qualified Data.Text.Lazy as T
import qualified Data.Text.Lazy.IO as TIO
import qualified Data.Text.Lazy.Builder as TB
import qualified Data.Text as TS
import qualified Data.Map as M
type SXMLParser a = Parser [M.Map TS.Text (TS.Text, Maybe TS.Text)] Token a
readFile :: FilePath -> IO (Either String XT.Document)
readFile file = do
sxml <- IO.readFile file
return $ parseString sxml
parseText :: TS.Text -> Either String XT.Document
parseText = parseString . TS.unpack
parseLazyText :: T.Text -> Either String XT.Document
parseLazyText = parseString . T.unpack
parseString :: String -> Either String XT.Document
parseString txt = f3 $ runParser getSXML [M.empty] $ lexer txt
where f3 (x,_,_) = x
writeFile :: FilePath -> XT.Document -> IO ()
writeFile file doc = TIO.writeFile file (renderLazyText doc)
renderText :: XT.Document -> TS.Text
renderText = T.toStrict . renderLazyText
renderString :: XT.Document -> String
renderString = T.unpack . renderLazyText
renderLazyText :: XT.Document -> T.Text
renderLazyText = TB.toLazyText . builderDoc
where tagend = TB.singleton ')'
tagstart = TB.singleton '('
space = TB.singleton ' '
empty = TB.fromString ""
builderString txt =
let toSchemeStringBuilder txt = fold_mod ((<>) . (<> TB.fromString "\\\\")) splitDoubleQuotes (TS.split (=='\\') txt)
splitDoubleQuotes txt = fold_mod ((<>) . (<> TB.fromString "\\\"")) splitCarriageReturns (TS.split (=='"') txt)
splitCarriageReturns txt = fold_mod ((<>) . (<> TB.fromString "\\r")) splitLineFeeds (TS.split (=='\r') txt)
splitLineFeeds txt = fold_mod ((<>) . (<> TB.fromString "\\n")) splitFormFeeds (TS.split (=='\n') txt)
splitFormFeeds txt = fold_mod ((<>) . (<> TB.fromString "\\f")) splitBackspaces (TS.split (=='\f') txt)
splitBackspaces txt = fold_mod ((<>) . (<> TB.fromString "\\b")) splitTabulations (TS.split (=='\b') txt)
splitTabulations txt = fold_mod ((<>) . (<> TB.fromString "\\t")) splitVerticalTab (TS.split (=='\t') txt)
splitVerticalTab txt = fold_mod ((<>) . (<> TB.fromString "\\v")) TB.fromText (TS.split (=='\v') txt)
fold_mod f t l = foldr1 f (map t l)
stringsep = TB.singleton '"'
in stringsep <> toSchemeStringBuilder txt <> stringsep
nameToBuilder n = maybe empty ((<> TB.singleton ':') . TB.fromText) (XT.nameNamespace n) <> (TB.fromText . XT.nameLocalName $ n)
builderDoc doc = TB.fromString "(*TOP*" <> (builderPrologue . sortBy prologueSorter $ ((XT.prologueBefore . XT.documentPrologue $ doc) ++ (XT.prologueAfter . XT.documentPrologue $ doc) ++ XT.documentEpilogue doc)) <> (builderElem . XT.documentRoot $ doc) <> tagend
builderElem el = tagstart <> (nameToBuilder . XT.elementName $ el) <> space <> (builderAttrList . XT.elementAttributes $ el) <> (builderChildren . XT.elementNodes $ el) <> tagend
builderContent (XT.ContentText t) = builderString t
builderContent (XT.ContentEntity e) = TB.fromString "(*ENTITY* " <> TB.fromText e <> TB.fromString " \"\"" <> tagend
builderInstruction (XT.Instruction tgt dat) = TB.fromString "(*PI* " <> TB.fromText tgt <> space <> builderString dat <> tagend
builderComment ct = TB.fromString "(*COMMENT* " <> builderString ct <> tagend
builderAttrList [] = empty
builderAttrList al =
let helperAttr [] = tagend
helperAttr ((n,cs):ats) = tagstart <> nameToBuilder n <> helperAttrVal cs <> helperAttr ats
helperAttrVal = foldr ((<>) . (space <>) . builderContent) tagend
in TB.fromString "(@ " <> helperAttr al
builderChildren [] = empty
builderChildren (c:cs) =
let helperChild (XT.NodeComment cmt) = builderComment cmt
helperChild (XT.NodeInstruction instr) = builderInstruction instr
helperChild (XT.NodeContent cnt) = builderContent cnt
helperChild (XT.NodeElement elt) = builderElem elt
in space <> helperChild c <> builderChildren cs
builderPrologue [] = space
builderPrologue (e:es) =
let helperPrologue (XT.MiscInstruction instr) = builderInstruction instr
helperPrologue (XT.MiscComment cmt) = builderComment cmt
in space <> helperPrologue e <> builderPrologue es
prologueSorter (XT.MiscInstruction _) (XT.MiscComment _) = GT
prologueSorter (XT.MiscComment _) (XT.MiscInstruction _) = LT
prologueSorter _ _ = EQ
getSXML :: SXMLParser XT.Document
getSXML = do
lparen
root
pr <- prologue
rEl <- element
rparen
return $ XT.Document pr rEl []
prologue :: SXMLParser XT.Prologue
prologue = do
optional annotations
pis <- many prologueProcInstr
comments <- many prologueComment
return $ XT.Prologue (pis ++ comments) Nothing []
prologueComment = do
c <- commentHelper
return $ XT.MiscComment c
comment = do
c <- commentHelper
return $ XT.NodeComment c
commentHelper = parens (cmt >> strTok)
prologueProcInstr = do
inst <- prologueHelper
return $ XT.MiscInstruction inst
procInstr = do
inst <- prologueHelper
return $ XT.NodeInstruction inst
prologueHelper = parens $ do
pinst
target <- nodeTok
optional annotations
instr <- strTok
return $ XT.Instruction target instr
annotations = do
lparen
atl
namespaces
many annotation
rparen
return ()
annotation = do
lparen
name <- nodeTok
target <- nodeTok <|> strTok
rparen
return ()
namespaces = do
lparen
nsl
many namespace
rparen
return ()
namespace = do
lparen
nid <- nodeTok
uri <- strTok
oid <- optional nodeTok
rparen
stUpdate (\(cns:stns) -> (M.insert nid (uri, oid) cns : stns))
return ()
helperName :: TS.Text -> SXMLParser XT.Name
helperName qname =
let (prefv, uqnamev) = TS.breakOnEnd sep qname
(pref, uqname) = if TS.null prefv then
(TS.empty, uqnamev)
else
(TS.init prefv, uqnamev)
in do
nss <- stGet
if TS.null pref then
return $ XT.Name uqname Nothing Nothing
else
let lval = lookupS pref nss
in case lval of
Nothing -> return $ XT.Name uqname (Just pref) Nothing
Just (uri, _) -> return $ XT.Name uqname (Just uri) (Just pref)
element :: SXMLParser XT.Element
element = do
lparen
eName <- nodeTok
stUpdate (M.empty:)
attrs <- optional annotAttributes
let attrs' = fromMaybe [] attrs
children <- many childOfElement
rparen
ename <- helperName eName
stUpdate tail
return $ XT.Element ename attrs' children
childOfElement = oneOf [elemText, comment, procInstr, entity, elementChild]
elementChild = do
el <- element
return $ XT.NodeElement el
attribute = do
lparen
name <- nodeTok
val <- optional strTok
optional annotations
rparen
atname <- helperName name
case val of
Nothing -> return (atname, [XT.ContentText $ XT.nameLocalName atname])
Just v -> return (atname, [XT.ContentText v])
annotAttributes = do
lparen
atl
attrs <- many attribute
optional annotations
rparen
return attrs
entity = do
lparen
ent
pid <- strTok
sid <- strTok
rparen
return $ XT.NodeContent $ XT.ContentEntity pid
elemText = do
val <- strTok
return $ XT.NodeContent $ XT.ContentText val
root =
satisfy' cdr "DocRoot"
where cdr tok = case tok of
DocRoot -> True
_ -> False
cmt =
satisfy' cct "Comment"
where cct tok = case tok of
Comment -> True
_ -> False
ent =
satisfy' cet "Entity"
where cet tok = case tok of
Entity -> True
_ -> False
nsl =
satisfy' cns "NamespacesList"
where cns tok = case tok of
NamespacesList -> True
_ -> False
atl =
satisfy' cal "AttributesList"
where cal tok = case tok of
AttributesList -> True
_ -> False
pinst =
satisfy' cpi "PI"
where cpi tok = case tok of
PI -> True
_ -> False
lparen =
satisfy' opp "OpenParen"
where opp tok = case tok of
OpenParen -> True
_ -> False
rparen =
satisfy' clp "CloseParen"
where clp tok = case tok of
CloseParen -> True
_ -> False
strTok =
(\(Str s) -> s) <$> satisfy' cs "Str"
where cs tok = case tok of
Str _ -> True
_ -> False
nodeTok =
(\(NodeName s) -> s) <$> satisfy' cn "NodeName"
where cn tok = case tok of
NodeName _ -> True
_ -> False
satisfy' p attendu = do
x <- next
if p x
then return x
else case x of
LexError s -> failBad s
_ -> fail ("Error: " ++ attendu ++ " expected, was " ++ show x)
parens :: SXMLParser a -> SXMLParser a
parens = bracket lparen rparen
lookupS _ [] = Nothing
lookupS idx (m:ms) = case M.lookup idx m of
Nothing -> lookupS idx ms
Just uri -> Just uri
sep = TS.singleton ':'