{- | Parser of TOML language. Implemented with the help of @megaparsec@ package. -} module Toml.Parser ( ParseException (..) , parse , arrayP , boolP , dateTimeP , doubleP , integerP , keyP , keyValP , textP , tableHeaderP , tomlP ) where -- I hate default Prelude... Do I really need to import all this stuff manually?.. import Control.Applicative (Alternative (..)) import Control.Applicative.Combinators (between, count, manyTill, optional, sepEndBy, skipMany) import Control.Monad (void) import Data.Char (chr, digitToInt, isControl) import Data.Fixed (Pico) import Data.List (foldl') import Data.Semigroup ((<>)) import Data.Text (Text) import Data.Time (LocalTime (..), ZonedTime (..), fromGregorianValid, makeTimeOfDayValid, minutesToTimeZone) import Data.Void (Void) import Text.Megaparsec (Parsec, parseErrorPretty', try) import Text.Megaparsec.Char (alphaNumChar, anyChar, char, digitChar, eol, hexDigitChar, oneOf, satisfy, space, space1, string, tab) import Toml.PrefixTree (Key (..), Piece (..), fromList) import Toml.Type (AnyValue, DateTime (..), TOML (..), UValue (..), typeCheck) import qualified Control.Applicative.Combinators.NonEmpty as NC import qualified Data.HashMap.Lazy as HashMap import qualified Data.Text as Text import qualified Text.Megaparsec as Mega (parse) import qualified Text.Megaparsec.Char.Lexer as L ---------------------------------------------------------------------------- -- Library for parsing (boilerplate copy-pasted from megaparsec tutorial) ---------------------------------------------------------------------------- type Parser = Parsec Void Text -- space consumer sc :: Parser () sc = L.space space1 lineComment blockComment where lineComment = L.skipLineComment "#" blockComment = empty -- wrapper for consuming spaces after every lexeme (not before it!) lexeme :: Parser a -> Parser a lexeme = L.lexeme sc -- parser for "fixed" string text :: Text -> Parser Text text = L.symbol sc text_ :: Text -> Parser () text_ = void . text ---------------------------------------------------------------------------- -- TOML parser ---------------------------------------------------------------------------- -- Strings textP :: Parser Text textP = multilineBasicStringP <|> multilineLiteralStringP <|> literalStringP <|> basicStringP nonControlCharP :: Parser Text nonControlCharP = Text.singleton <$> satisfy (not . isControl) escapeSequenceP :: Parser Text escapeSequenceP = char '\\' >> anyChar >>= \case 'b' -> pure "\b" 't' -> pure "\t" 'n' -> pure "\n" 'f' -> pure "\f" 'r' -> pure "\r" '"' -> pure "\"" '\\' -> pure "\\" 'u' -> hexUnicodeP 4 'U' -> hexUnicodeP 8 c -> fail $ "Invalid escape sequence: " <> "\\" <> [c] where hexUnicodeP :: Int -> Parser Text hexUnicodeP n = count n hexDigitChar >>= \x -> case toUnicode $ hexToInt x of Just c -> pure (Text.singleton c) Nothing -> fail $ "Invalid unicode character: " <> "\\" <> (if n == 4 then "u" else "U") <> x where hexToInt :: String -> Int hexToInt xs = read $ "0x" ++ xs toUnicode :: Int -> Maybe Char toUnicode x -- Ranges from "The Unicode Standard". -- See definition D76 in Section 3.9, Unicode Encoding Forms. | x >= 0 && x <= 0xD7FF = Just (chr x) | x >= 0xE000 && x <= 0x10FFFF = Just (chr x) | otherwise = Nothing basicStringP :: Parser Text basicStringP = lexeme $ mconcat <$> (char '"' *> (escapeSequenceP <|> nonControlCharP) `manyTill` char '"') literalStringP :: Parser Text literalStringP = lexeme $ Text.pack <$> (char '\'' *> nonEolCharP `manyTill` char '\'') where nonEolCharP :: Parser Char nonEolCharP = satisfy (\c -> c /= '\n' && c /= '\r') multilineP :: Parser Text -> Parser Text -> Parser Text multilineP quotesP allowedCharP = lexeme $ fmap mconcat $ quotesP >> optional eol >> allowedCharP `manyTill` quotesP multilineBasicStringP :: Parser Text multilineBasicStringP = multilineP quotesP allowedCharP where quotesP = string "\"\"\"" allowedCharP :: Parser Text allowedCharP = lineEndingBackslashP <|> escapeSequenceP <|> nonControlCharP <|> eol lineEndingBackslashP :: Parser Text lineEndingBackslashP = Text.empty <$ try (char '\\' >> eol >> space) multilineLiteralStringP :: Parser Text multilineLiteralStringP = multilineP quotesP allowedCharP where quotesP = string "'''" allowedCharP :: Parser Text allowedCharP = nonControlCharP <|> eol <|> Text.singleton <$> tab -- Keys bareKeyP :: Parser Text bareKeyP = lexeme $ Text.pack <$> bareStrP where bareStrP :: Parser String bareStrP = some $ alphaNumChar <|> char '_' <|> char '-' -- adds " or ' to both sides quote :: Text -> Text -> Text quote q t = q <> t <> q keyComponentP :: Parser Piece keyComponentP = Piece <$> (bareKeyP <|> (quote "\"" <$> basicStringP) <|> (quote "'" <$> literalStringP)) keyP :: Parser Key keyP = Key <$> NC.sepBy1 keyComponentP (char '.') tableNameP :: Parser Key tableNameP = lexeme $ between (char '[') (char ']') keyP -- Values integerP :: Parser Integer integerP = lexeme $ binary <|> octal <|> hexadecimal <|> decimal where decimal = L.signed sc L.decimal binary = try (char '0' >> char 'b') >> mkNum 2 <$> (some binDigitChar) octal = try (char '0' >> char 'o') >> L.octal hexadecimal = try (char '0' >> char 'x') >> L.hexadecimal binDigitChar = oneOf ['0', '1'] mkNum b = foldl' (step b) 0 step b a c = a * b + fromIntegral (digitToInt c) doubleP :: Parser Double doubleP = lexeme $ L.signed sc (num <|> inf <|> nan) where num, inf, nan :: Parser Double num = L.float inf = 1 / 0 <$ string "inf" nan = 0 / 0 <$ string "nan" boolP :: Parser Bool boolP = False <$ text "false" <|> True <$ text "true" dateTimeP :: Parser DateTime dateTimeP = lexeme $ try hoursP <|> dayLocalZoned where -- dayLocalZoned can parse: only a local date, a local date with time, or -- a local date with a time and an offset dayLocalZoned :: Parser DateTime dayLocalZoned = do let makeLocal (Day day) (Hours hours) = Local $ LocalTime day hours makeLocal _ _ = error "Invalid arguments, unable to construct `Local`" makeZoned (Local localTime) mins = Zoned $ ZonedTime localTime (minutesToTimeZone mins) makeZoned _ _ = error "Invalid arguments, unable to construct `Zoned`" day <- try dayP maybeHours <- optional (try $ (char 'T' <|> char ' ') *> hoursP) case maybeHours of Nothing -> return day Just hours -> do maybeOffset <- optional (try timeOffsetP) case maybeOffset of Nothing -> return (makeLocal day hours) Just offset -> return (makeZoned (makeLocal day hours) offset) timeOffsetP :: Parser Int timeOffsetP = z <|> numOffset where z = pure 0 <* char 'Z' numOffset = do sign <- char '+' <|> char '-' hours <- int2DigitsP _ <- char ':' minutes <- int2DigitsP let totalMinutes = hours * 60 + minutes if sign == '+' then return totalMinutes else return (negate totalMinutes) hoursP :: Parser DateTime hoursP = do hours <- int2DigitsP _ <- char ':' minutes <- int2DigitsP _ <- char ':' seconds <- picoTruncated case makeTimeOfDayValid hours minutes seconds of Just time -> return (Hours time) Nothing -> fail $ "Invalid time of day: " <> show hours <> ":" <> show minutes <> ":" <> show seconds dayP :: Parser DateTime dayP = do year <- integer4DigitsP _ <- char '-' month <- int2DigitsP _ <- char '-' day <- int2DigitsP case fromGregorianValid year month day of Just date -> return (Day date) Nothing -> fail $ "Invalid date: " <> show year <> "-" <> show month <> "-" <> show day integer4DigitsP = (read :: String -> Integer) <$> count 4 digitChar int2DigitsP = (read :: String -> Int) <$> count 2 digitChar picoTruncated = do let rdPico = read :: String -> Pico int <- count 2 digitChar frc <- optional (char '.' >> take 12 <$> some digitChar) case frc of Nothing -> return (rdPico int) Just frc' -> return (rdPico $ int ++ "." ++ frc') arrayP :: Parser [UValue] arrayP = lexeme $ between (char '[' *> sc) (char ']') elements where elements :: Parser [UValue] elements = valueP `sepEndBy` spComma <* skipMany (text ",") spComma :: Parser () spComma = char ',' *> sc valueP :: Parser UValue valueP = UBool <$> boolP <|> UDate <$> dateTimeP <|> UDouble <$> try doubleP <|> UInteger <$> integerP <|> UText <$> textP <|> UArray <$> arrayP -- TOML keyValP :: Parser (Key, AnyValue) keyValP = do k <- keyP text_ "=" uval <- valueP case typeCheck uval of Left err -> fail $ show err Right v -> pure (k, v) tableHeaderP :: Parser (Key, TOML) tableHeaderP = do k <- tableNameP toml <- makeToml <$> many keyValP pure (k, toml) where makeToml :: [(Key, AnyValue)] -> TOML makeToml kv = TOML (HashMap.fromList kv) mempty tomlP :: Parser TOML tomlP = do sc kvs <- many keyValP tables <- many tableHeaderP pure TOML { tomlPairs = HashMap.fromList kvs , tomlTables = fromList tables } --------------------------------------------------------------------------- -- Exposed API ---------------------------------------------------------------------------- -- | Pretty parse exception for parsing toml. newtype ParseException = ParseException Text deriving (Show, Eq) -- | Parses 'Text' as 'TOML' object. parse :: Text -> Either ParseException TOML parse t = case Mega.parse tomlP "" t of Left err -> Left $ ParseException $ Text.pack $ parseErrorPretty' t err Right toml -> Right toml