{-# LANGUAGE OverloadedStrings #-}
module Data.EDN.Parser (
parseMaybe, parseEither, parseBSL, parseBS, parseT, parseTL, parseS,
parseValue, parseTagged
) where
import Prelude ()
import Prelude.Compat hiding (String, takeWhile)
import Control.Applicative ((<|>))
import Control.Monad (void)
import Data.Attoparsec.ByteString.Char8 as A
import Data.Attoparsec.Combinator ()
import qualified Data.Attoparsec.Lazy as AL
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BSL
import Data.ByteString.Search (replace)
import qualified Data.ByteString.UTF8 as UTF8
import Data.Functor (($>))
import Data.Maybe (fromJust)
import Data.Scientific as Sci
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import Data.EDN.Types (Tagged (..), TaggedValue,
Value (..), makeMap, makeSet,
makeVec)
import qualified Prelude as P
isSpaceOrComma :: Char -> Bool
isSpaceOrComma ' ' = True
isSpaceOrComma '\r' = True
isSpaceOrComma '\n' = True
isSpaceOrComma '\t' = True
isSpaceOrComma ',' = True
isSpaceOrComma _ = False
spaceOrComma :: Parser Char
spaceOrComma = satisfy isSpaceOrComma <?> "space/comma"
skipSoC :: Parser ()
skipSoC = skipWhile isSpaceOrComma
parseNil :: Parser Value
parseNil = do
skipSoC
string "nil" $> Nil
parseBool :: Parser Value
parseBool = do
skipSoC
choice [ string "true" *> pure (Boolean True)
, string "false" *> pure (Boolean False)
]
parseString :: Parser Value
parseString = do
skipSoC
void $ char '"'
x <- A.scan False $ \s c -> if s
then Just False
else if c == '"'
then Nothing
else Just (c == '\\')
void $ char '"'
let prepare = if '\\' `BS.elem` x
then rep "\\\"" "\""
. rep "\\\\" "\\"
. rep "\\n" "\n"
. rep "\\r" "\r"
. rep "\\t" "\t"
else id
return $! String $ TE.decodeUtf8 $ prepare x
where
rep f t s = BS.concat . BSL.toChunks
$! replace (BS.pack f) (BS.pack t) s
parseCharacter :: Parser Value
parseCharacter = do
skipSoC
char '\\' *> (simple <|> anyCharUtf8)
where
simple :: Parser Value
simple = do
x <- string "newline"
<|> string "return"
<|> string "space"
<|> string "tab"
<|> string "\\"
return . Character $! case x of
"newline" -> '\n'
"return" -> '\r'
"space" -> ' '
"tab" -> '\t'
"\\" -> '\\'
_ -> error ("EDN.parseCharacter: impossible - simple" ++ show x)
anyCharUtf8 :: Parser Value
anyCharUtf8 = do
bs <- scan BS.empty go
case UTF8.decode bs of
Just (c, _) -> return $! Character c
Nothing -> error $ "EDN.parseCharacter: bad utf8 data? " ++ show bs
go :: BS.ByteString -> Char -> Maybe BS.ByteString
go s c
| BS.null s = Just (BS.singleton c)
| otherwise = case UTF8.decode s of
Nothing -> Just (s `BS.snoc` c)
Just (uc, _) -> if uc == UTF8.replacement_char
then Just (s `BS.snoc` c)
else Nothing
parseSymbol :: Parser Value
parseSymbol = do
skipSoC
c <- satisfy (inClass "a-zA-Z.*/<>!?$%&=+_-")
(ns, val) <- withNS c <|> withoutNS c
return $! Symbol ns val
where
withNS c = do
ns <- takeWhile (inClass "a-zA-Z0-9#:.*<>!?$%&=+_-")
vc <- char '/' *> satisfy (inClass "a-zA-Z.*/<>!?$%&=+_-")
val <- takeWhile1 (inClass "a-zA-Z0-9#:.*<>!?$%&=+_-")
return (c `BS.cons` ns, vc `BS.cons` val)
withoutNS c = do
val <- takeWhile (inClass "a-zA-Z0-9#:.*<>!?$%&=+_-")
return ("", c `BS.cons` val)
parseKeyword :: Parser Value
parseKeyword = do
skipSoC
c <- char ':' *> satisfy (inClass "a-zA-Z.*/<>!?$%&=+_-")
x <- takeWhile (inClass "a-zA-Z0-9#:.*/<>!?$%&=+_-")
return $! Keyword (c `BS.cons` x)
parseNumber :: Parser Value
parseNumber = do
skipSoC
n <- A.scientific
return $!
if Sci.isInteger n
then Integer (fromIntegral (fromJust (Sci.toBoundedInteger n) :: P.Int))
else Floating (Sci.toRealFloat n)
parseColl :: Parser t1
-> Parser t2
-> Parser a
-> ([a] -> Value)
-> Parser Value
parseColl openingBr closingBr item construct = do
skipSoC
_ <- openingBr
A.skipSpace
vs <- item `sepBy` spaceOrComma
A.skipSpace
_<- closingBr
return $! construct vs
parseList :: Parser Value
parseList =
parseColl (char '(') (char ')') parseTagged List
parseVector :: Parser Value
parseVector =
parseColl (char '[') (char ']') parseTagged makeVec
parseSet :: Parser Value
parseSet =
parseColl (char '#' *> char '{') (char '}') parseTagged makeSet
parseMap :: Parser Value
parseMap =
parseColl (char '{') (char '}') parseAssoc makeMap
where
parseAssoc = do
key <- parseValue
val <- parseTagged
return (key, val)
skipComment :: Parser ()
skipComment = skipSoC *> char ';' *> skipWhile (/= '\n')
parseDiscard :: Parser ()
parseDiscard = do
skipSoC
void $ string "#_"
void parseValue
parseValue :: Parser Value
parseValue = do
skipSoC
skipMany skipComment
skipMany parseDiscard
parseSet <|> parseMap
<|> parseVector <|> parseList
<|> parseNil <|> parseBool
<|> parseNumber
<|> parseKeyword <|> parseSymbol
<|> parseCharacter
<|> parseString
parseTagged :: Parser TaggedValue
parseTagged = do
skipSoC
withNS <|> withoutNS <|> noTag
where
withNS = do
ns <- char '#' *> parseIdent
tag <- char '/' *> parseIdent
value <- parseValue
return $! Tagged value ns tag
withoutNS = do
tag <- char '#' *> parseIdent
value <- parseValue
return $! Tagged value "" tag
parseIdent = takeWhile1 (inClass "a-zA-Z0-9-")
noTag = do
value <- parseValue
return $! NoTag value
parseMaybe :: BSL.ByteString -> Maybe TaggedValue
parseMaybe = AL.maybeResult . parseBSL
parseEither :: BSL.ByteString -> Either P.String TaggedValue
parseEither = AL.eitherResult . parseBSL
parseBSL :: BSL.ByteString -> AL.Result TaggedValue
parseBSL = AL.parse parseTagged
{-# INLINE parseBSL #-}
parseBS :: BS.ByteString -> AL.Result TaggedValue
parseBS s = parseBSL . BSL.fromChunks $ [s]
{-# INLINE parseBS #-}
parseTL :: TL.Text -> AL.Result TaggedValue
parseTL = parseBSL . TLE.encodeUtf8
{-# INLINE parseTL #-}
parseT :: T.Text -> AL.Result TaggedValue
parseT = parseBS . TE.encodeUtf8
{-# INLINE parseT #-}
parseS :: P.String -> AL.Result TaggedValue
parseS = parseBSL . BSL.pack
{-# INLINE parseS #-}