module Text.Named.Enamex
(
parseForest
, parseEnamex
, showForest
, showEnamex
) where
import Control.Applicative
import Control.Monad ((<=<), when)
import Data.Monoid
import Data.Attoparsec.Text.Lazy
import Data.List (intersperse)
import Data.Function (on)
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.Builder as L
import qualified Data.Named.Tree as Tr
pForest :: Parser (Tr.NeForest T.Text T.Text)
pForest = pTree `sepBy` (space *> skipSpace)
pTree :: Parser (Tr.NeTree T.Text T.Text)
pTree = pNode
<|> pLeaf
pLeaf :: Parser (Tr.NeTree T.Text T.Text)
pLeaf = Tr.Node <$> (Right <$> pWord) <*> pure []
pNode :: Parser (Tr.NeTree T.Text T.Text)
pNode = do
x <- pOpenTag
kids <- pForest
x' <- pCloseTag
when (x /= x') (fail "Tag start/end mismatch")
return $ Tr.Node (Left x) kids
pOpenTag :: Parser T.Text
pOpenTag = "<" .*> pWord <*. ">"
pCloseTag :: Parser T.Text
pCloseTag = "</" .*> pWord <*. ">"
pWord :: Parser T.Text
pWord =
unEscape <$> scan False special
where
special False c =
case c == ' ' || c == '<' || c == '>' of
True -> Nothing
False -> if c == '\\'
then Just True
else Just False
special True _ = Just False
unEscape :: T.Text -> T.Text
unEscape xs = x `T.append` case drop1 rest of
Just (y, ys) -> y `T.cons` unEscape ys
Nothing -> ""
where
drop1 = T.uncons <=< return . snd <=< T.uncons
(x, rest) = T.breakOn "\\" xs
escape :: T.Text -> T.Text
escape x = case T.uncons z of
Nothing -> y
Just (c, q) -> y
`T.append` ('\\'
`T.cons` (c
`T.cons` escape q))
where
(y, z) = T.break special x
special c = c == ' ' || c == '<' || c == '>' || c == '\\'
parseForest :: L.Text -> Tr.NeForest T.Text T.Text
parseForest = either error id . eitherResult . parse (pForest <* endOfInput)
parseEnamex :: L.Text -> [Tr.NeForest T.Text T.Text]
parseEnamex = map parseForest . L.lines
data Tag = Open | Close | Body
noSpace :: Tag -> Tag -> Bool
noSpace Open _ = True
noSpace Body Close = True
noSpace Close Close = True
noSpace _ _ = False
groupBy :: (a -> a -> Bool) -> [a] -> [[a]]
groupBy p (x : y : xs)
| p x y = join x $ groupBy p (y : xs)
| otherwise = [x] : groupBy p (y : xs)
where
join z (zs : zss) = (z : zs) : zss
join z [] = [[z]]
groupBy _ [x] = [[x]]
groupBy _ [] = []
buildForest :: Tr.NeForest t t -> [(t, Tag)]
buildForest = concat . map buildTree
buildTree :: Tr.NeTree t t -> [(t, Tag)]
buildTree (Tr.Node (Left x) ts) = (x, Open) : buildForest ts ++ [(x, Close)]
buildTree (Tr.Node (Right x) _) = [(x, Body)]
buildStream :: [(T.Text, Tag)] -> L.Builder
buildStream
= mconcat . intersperse " "
. map (mconcat . map buildTag)
. groupBy (noSpace `on` snd)
buildTag :: (T.Text, Tag) -> L.Builder
buildTag (x, tag) = case tag of
Open -> "<" `mappend` y `mappend` ">"
Close -> "</" `mappend` y `mappend` ">"
_ -> y
where
y = L.fromText (escape x)
showForest :: Tr.NeForest T.Text T.Text -> L.Text
showForest = L.toLazyText . buildStream . buildForest
showEnamex :: [Tr.NeForest T.Text T.Text] -> L.Text
showEnamex = L.toLazyText . mconcat . map (L.fromLazyText . showForest)