{-# LANGUAGE OverloadedStrings #-} -- | Parse an UTF-8 encoded EDN string into a haskell representation of EDN objects. -- Use 'Data.EDN.decode' to get actual types. module Data.EDN.Parser ( -- * Data parsers parseMaybe, parseEither, parseBSL, parseBS, parseT, parseTL, parseS, -- * Attoparsec implementation parseValue, parseTagged ) where import Prelude () import Prelude.Compat hiding (String, takeWhile) import Control.Applicative ((<|>)) import Control.Monad (void) import Data.Attoparsec.ByteString.Char8 as A import Data.Attoparsec.Combinator () import qualified Data.Attoparsec.Lazy as AL import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy.Char8 as BSL import Data.ByteString.Search (replace) import qualified Data.ByteString.UTF8 as UTF8 import Data.Functor (($>)) import Data.Maybe (fromJust) import Data.Scientific as Sci import qualified Data.Text as T import qualified Data.Text.Encoding as TE import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TLE import Data.EDN.Types (Tagged (..), TaggedValue, Value (..), makeMap, makeSet, makeVec) import qualified Prelude as P isSpaceOrComma :: Char -> Bool isSpaceOrComma ' ' = True isSpaceOrComma '\r' = True isSpaceOrComma '\n' = True isSpaceOrComma '\t' = True isSpaceOrComma ',' = True isSpaceOrComma _ = False spaceOrComma :: Parser Char spaceOrComma = satisfy isSpaceOrComma "space/comma" skipSoC :: Parser () skipSoC = skipWhile isSpaceOrComma parseNil :: Parser Value parseNil = do skipSoC string "nil" $> Nil parseBool :: Parser Value parseBool = do skipSoC choice [ string "true" *> pure (Boolean True) , string "false" *> pure (Boolean False) ] parseString :: Parser Value parseString = do skipSoC void $ char '"' x <- A.scan False $ \s c -> if s then Just False else if c == '"' then Nothing else Just (c == '\\') void $ char '"' let prepare = if '\\' `BS.elem` x then rep "\\\"" "\"" . rep "\\\\" "\\" . rep "\\n" "\n" . rep "\\r" "\r" . rep "\\t" "\t" else id return $! String $ TE.decodeUtf8 $ prepare x where rep f t s = BS.concat . BSL.toChunks $! replace (BS.pack f) (BS.pack t) s parseCharacter :: Parser Value parseCharacter = do skipSoC char '\\' *> (simple <|> anyCharUtf8) where simple :: Parser Value simple = do x <- string "newline" <|> string "return" <|> string "space" <|> string "tab" <|> string "\\" return . Character $! case x of "newline" -> '\n' "return" -> '\r' "space" -> ' ' "tab" -> '\t' "\\" -> '\\' _ -> error ("EDN.parseCharacter: impossible - simple" ++ show x) anyCharUtf8 :: Parser Value anyCharUtf8 = do bs <- scan BS.empty go case UTF8.decode bs of Just (c, _) -> return $! Character c Nothing -> error $ "EDN.parseCharacter: bad utf8 data? " ++ show bs go :: BS.ByteString -> Char -> Maybe BS.ByteString go s c | BS.null s = Just (BS.singleton c) | otherwise = case UTF8.decode s of Nothing -> Just (s `BS.snoc` c) Just (uc, _) -> if uc == UTF8.replacement_char then Just (s `BS.snoc` c) else Nothing parseSymbol :: Parser Value parseSymbol = do skipSoC c <- satisfy (inClass "a-zA-Z.*/<>!?$%&=+_-") (ns, val) <- withNS c <|> withoutNS c return $! Symbol ns val where withNS c = do ns <- takeWhile (inClass "a-zA-Z0-9#:.*<>!?$%&=+_-") vc <- char '/' *> satisfy (inClass "a-zA-Z.*/<>!?$%&=+_-") val <- takeWhile1 (inClass "a-zA-Z0-9#:.*<>!?$%&=+_-") return (c `BS.cons` ns, vc `BS.cons` val) withoutNS c = do val <- takeWhile (inClass "a-zA-Z0-9#:.*<>!?$%&=+_-") return ("", c `BS.cons` val) parseKeyword :: Parser Value parseKeyword = do skipSoC c <- char ':' *> satisfy (inClass "a-zA-Z.*/<>!?$%&=+_-") x <- takeWhile (inClass "a-zA-Z0-9#:.*/<>!?$%&=+_-") return $! Keyword (c `BS.cons` x) parseNumber :: Parser Value parseNumber = do skipSoC n <- A.scientific return $! if Sci.isInteger n then Integer (fromIntegral (fromJust (Sci.toBoundedInteger n) :: P.Int)) else Floating (Sci.toRealFloat n) parseColl :: Parser t1 -- opening bracket -> Parser t2 -- closing bracket -> Parser a -- item parser -> ([a] -> Value) -- Value constructor -> Parser Value parseColl openingBr closingBr item construct = do skipSoC _ <- openingBr A.skipSpace vs <- item `sepBy` spaceOrComma A.skipSpace _<- closingBr return $! construct vs parseList :: Parser Value parseList = parseColl (char '(') (char ')') parseTagged List parseVector :: Parser Value parseVector = parseColl (char '[') (char ']') parseTagged makeVec parseSet :: Parser Value parseSet = parseColl (char '#' *> char '{') (char '}') parseTagged makeSet parseMap :: Parser Value parseMap = parseColl (char '{') (char '}') parseAssoc makeMap where parseAssoc = do key <- parseValue val <- parseTagged return (key, val) skipComment :: Parser () skipComment = skipSoC *> char ';' *> skipWhile (/= '\n') parseDiscard :: Parser () parseDiscard = do skipSoC void $ string "#_" void parseValue -- | Parse a \"raw\" EDN value into a 'Value'. parseValue :: Parser Value parseValue = do skipSoC skipMany skipComment skipMany parseDiscard parseSet <|> parseMap <|> parseVector <|> parseList <|> parseNil <|> parseBool <|> parseNumber <|> parseKeyword <|> parseSymbol <|> parseCharacter <|> parseString -- | Parse a probably tagged EDN value into a 'TaggedValue'. parseTagged :: Parser TaggedValue parseTagged = do skipSoC withNS <|> withoutNS <|> noTag where withNS = do ns <- char '#' *> parseIdent tag <- char '/' *> parseIdent value <- parseValue return $! Tagged value ns tag withoutNS = do tag <- char '#' *> parseIdent value <- parseValue return $! Tagged value "" tag parseIdent = takeWhile1 (inClass "a-zA-Z0-9-") noTag = do value <- parseValue return $! NoTag value {- | Parse a lazy 'BSL.ByteString' into a 'TaggedValue'. If fails due to incomplete or invalid input, 'Nothing' is returned. -} parseMaybe :: BSL.ByteString -> Maybe TaggedValue parseMaybe = AL.maybeResult . parseBSL {- | Parse a lazy 'BSL.ByteString' into a 'TaggedValue'. If fails due to incomplete or invalid input, 'Left' is returned with the error message. -} parseEither :: BSL.ByteString -> Either P.String TaggedValue parseEither = AL.eitherResult . parseBSL -- | Parse a lazy 'BSL.ByteString'. parseBSL :: BSL.ByteString -> AL.Result TaggedValue parseBSL = AL.parse parseTagged {-# INLINE parseBSL #-} -- | Parse a strict 'BS.ByteString', but without continutations. parseBS :: BS.ByteString -> AL.Result TaggedValue parseBS s = parseBSL . BSL.fromChunks $ [s] {-# INLINE parseBS #-} -- | Parse a lazy 'TL.Text'. parseTL :: TL.Text -> AL.Result TaggedValue parseTL = parseBSL . TLE.encodeUtf8 {-# INLINE parseTL #-} -- | Parse a strict 'T.Text'. parseT :: T.Text -> AL.Result TaggedValue parseT = parseBS . TE.encodeUtf8 {-# INLINE parseT #-} -- | Parse a string AKA '[Char]'. Not really useful other than for debugging purposes. parseS :: P.String -> AL.Result TaggedValue parseS = parseBSL . BSL.pack {-# INLINE parseS #-}