{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TypeFamilies #-} module FieldParser where import Control.Category qualified as Cat import Control.Monad ((<=<)) import Data.Aeson qualified as Json import Data.Aeson.BetterErrors qualified as Json import Data.Aeson.Types qualified as Json import Data.Attoparsec.ByteString qualified as AttoBytes import Data.Attoparsec.Text qualified as Atto import Data.CaseInsensitive qualified as CaseInsensitive import Data.Error.Tree import Data.List.NonEmpty qualified as NonEmpty import Data.Map.Strict qualified as Map import Data.Scientific (Scientific) import Data.Scientific qualified as Scientific import Data.Semigroup.Foldable (Foldable1 (toNonEmpty)) import Data.Semigroupoid qualified as Semigroupoid import Data.Text qualified as Text import PossehlAnalyticsPrelude import Text.ParserCombinators.ReadPrec qualified as Read -- | Parser for a field. TODO: define what a field is -- -- If you want to build more complex parsers, use the 'attoparsecText' and 'attoparsecBytes' functions -- to build a parser from a bytestring. -- -- If you want to nest parsers, e.g. first want to decode to Text via utf-8 and then parse the Text, -- use the 'Semigroupoid'/'Category' instances to chain parsers. -- -- As a general rule, when you create an error message, try to include the value -- (or a shortened version of the value) that was not accepted. -- Otherwise the error will be hard to debug. -- TODO: Can we add some examples to each parser? newtype FieldParser' err from to = FieldParser (from -> Either err to) deriving stock (Functor) -- | An alias for 'FieldParser'' for the common case where @err@ = 'Error'. type FieldParser from to = FieldParser' Error from to -- | If the right parser fails, return its error, otherwise run the left parser. instance Semigroupoid (FieldParser' err) where o :: FieldParser' err middle to -> FieldParser' err from middle -> FieldParser' err from to o (FieldParser f) (FieldParser g) = FieldParser (f <=< g) -- | `id` is the parser that always succeeds. instance Cat.Category (FieldParser' err) where id :: FieldParser' err a a id = FieldParser pure (.) = Semigroupoid.o -- | You can map over both sides of a parser to change the types in a instance Profunctor (FieldParser' err) where dimap :: (from' -> from) -> (to -> to') -> FieldParser' err from to -> FieldParser' err from' to' dimap pre post (FieldParser parser) = FieldParser (fmap post . parser . pre) -- | Execute the field parser. runFieldParser :: FieldParser' err from to -> from -> Either err to runFieldParser (FieldParser fn) = fn -- | Change the type of the `err` in a field Parser.` mapError :: (err1 -> err2) -> FieldParser' err1 from to -> FieldParser' err2 from to mapError f (FieldParser original) = FieldParser $ \from -> original from & first f -- | Turn a @FieldParser Value@ directly into a valid 'parseJSON` implementation. -- -- If you want to parse any objects or lists, it’s better to use 'Json.toAesonParser' with 'jsonParser' instead, but for simple json scalars this one is better. toParseJSON :: FieldParser Json.Value a -> Json.Value -> Json.Parser a toParseJSON parser = Json.toAesonParser prettyError (jsonParser parser) -- | Turn a @FieldParser' ErrorTree Value@ directly into a valid 'parseJSON` implementation. -- -- If you want to parse any objects or lists, it’s better to use 'Json.toAesonParser' with 'jsonParser' instead, but for simple json scalars this one is better. toParseJSONErrorTree :: FieldParser' ErrorTree Json.Value a -> Json.Value -> Json.Parser a toParseJSONErrorTree parser = Json.toAesonParser prettyErrorTree (jsonParser parser) toReadPrec :: -- | ReadPrec to base this parser on (e.g. use @readPrec \@Text@@ to parse the same as Text) Read.ReadPrec from -> FieldParser from to -> Read.ReadPrec to toReadPrec innerReadPrec parser = do from :: from <- innerReadPrec case runFieldParser parser from of Left err -> fail (err & prettyError & textToString) Right a -> pure a -- | Turn a @FieldParser Value@ into an 'Json.ParseT` which parses this value. jsonParser :: Monad m => FieldParser' err Json.Value to -> Json.ParseT err m to jsonParser parser = ( Json.asValue >>= ( \from -> runFieldParser parser from & \case Right a -> pure a Left err -> Json.throwCustomError err ) ) -- TODO: provide nice shortened values in the error messages for these json parsers. -- | parse a json boolean from a 'Json.Value' jsonBool :: FieldParser Json.Value Bool jsonBool = FieldParser $ \case Json.Bool b -> Right b _ -> Left "Not a json boolean" -- | parse a json `null` from a 'Json.Value' jsonNull :: FieldParser Json.Value () jsonNull = FieldParser $ \case Json.Null -> Right () _ -> Left "Not a json `null`" -- | parse a json number from a 'Json.Value' jsonNumber :: FieldParser Json.Value Scientific jsonNumber = FieldParser $ \case Json.Number s -> Right s _ -> Left "Not a json number" -- | parse a json string from a 'Json.Value' jsonString :: FieldParser Json.Value Text jsonString = FieldParser $ \case Json.String s -> Right s _ -> Left "Not a json string" -- * Field parsers -- | Parse field as 'Text' utf8 :: FieldParser ByteString Text utf8 = FieldParser $ \bytes -> case bytesToTextUtf8 bytes of Left _err -> Left $ "Not a valid UTF-8 string" Right a -> Right a -- | A decimal number with an optional `+` or `-` sign character. signedDecimal :: FieldParser Text Integer signedDecimal = attoparsecText (\t -> [fmt|Not a signed decimal number: "{t}"|]) -- the decimal is okay, since Integer has no maximum length -- we don’t have to care about memory, since the input text would already not fit into memory (Atto.signed (Atto.decimal @Integer)) -- | A decimal natural number; does not allow for a @+@-sign. decimalNatural :: FieldParser Text Natural decimalNatural = attoparsecText (\t -> [fmt|Not a natural number: "{t}"|]) -- the decimal is okay, since Natural has no maximum length -- we don’t have to care about memory, since the input text would already not fit into memory (Atto.decimal @Integer) Cat.>>> integralToNatural @Integer -- | A signed, decimal, natural number. -- -- e.g. @12345@, @0@, or @+12@, but not @-12@. signedDecimalNatural :: FieldParser Text Natural signedDecimalNatural = attoparsecText (\t -> [fmt|Not a signed natural number: "{t}"|]) -- the decimal is okay, since Natural has no maximum length -- we don’t have to care about memory, since the input text would already not fit into memory (Atto.signed (Atto.decimal @Integer)) Cat.>>> integralToNatural @Integer -- | Parse any integral into a natural number, fails if the integral is negative. integralToNatural :: Integral i => FieldParser i Natural integralToNatural = FieldParser (\i -> i & intToNatural & annotate [fmt|Number must be 0 or positive, but was negative: {toInteger i}|]) -- | Parse any integral to an 'Integer'. This can never fail, but is here to mirror 'integralToNatural'. integralToInteger :: Integral i => FieldParser' err i Integer integralToInteger = lmap (fromIntegral @_ @Integer) Cat.id -- | An arbitrary-precision number in scientific notation. scientific :: FieldParser Text Scientific scientific = attoparsecText (\t -> [fmt|Not a scientific number: "{t}"|]) Atto.scientific -- | Parse a scientific into a bounded integral type. -- -- Scientific can be *very* big, (think @1e10000@) so this function makes sure we -- * don’t wrap around the bound -- * don’t fill up all our memory by e.g. parsing into @Integer@ or @Natural@. -- -- So if you want to go to @Natural@, you have to first set an intermediate type with a bound you want to accept -- (e.g. 64 bits via @Int@) and then go from that to the unbounded type (e.g. via 'integralToNatural'). -- -- @err@ is added as context around the bounded error. boundedScientificIntegral :: forall i. (Integral i, Bounded i) => Error -> FieldParser Scientific i boundedScientificIntegral err = FieldParser $ \s -> case Scientific.toBoundedInteger s of Nothing -> Left $ (err & errorContext [fmt|Must be between {iMinBound} and {iMaxBound}|]) Just i -> Right i where iMinBound = toInteger (minBound :: i) iMaxBound = toInteger (maxBound :: i) -- | Parse a scientific into a bounded floating point type. -- -- Scientific can be *very* big, (think @1e10000@) so this function makes sure we -- * don’t wrap around the bound -- * don’t fill up all our memory -- * Fit into the available floating point representation space boundedScientificRealFloat :: RealFloat d => FieldParser Scientific d boundedScientificRealFloat = FieldParser $ \s -> Scientific.toBoundedRealFloat s & first ( \zeroOrInf -> ( if | 0 == zeroOrInf -> [fmt|Number {show s} is too small to fit into floating point.|] | isInfinite zeroOrInf -> [fmt|Number {show s} is too big to fit into floating point.|] | otherwise -> [fmt|Number {show s} did not fit into floating point, but we don’t know why (BUG).|] ) ) -- | Parse an integer into a bounded integral type. -- -- @err@ is added as context around the bounded error. bounded :: forall i. (Integral i, Bounded i) => Text -> FieldParser Integer i bounded err = FieldParser $ \num -> case num & fromIntegerBounded of Nothing -> Left $ (errorContext err [fmt|Must be between {iMinBound} and {iMaxBound}, but was: {num & toInteger}|]) Just i -> Right i where -- from Scientific.toBoundedInteger fromIntegerBounded :: Integer -> Maybe i fromIntegerBounded i | i < iMinBound || i > iMaxBound = Nothing | otherwise = Just $ fromInteger i iMinBound = toInteger (minBound :: i) iMaxBound = toInteger (maxBound :: i) -- | Example of how to create a more “complicated” parser that checks whether a value -- is between two other values. clamped :: (Ord a, Show a) => -- | lower boundary (inclusive) a -> -- | upper boundary (exclusive) a -> FieldParser a a clamped lower upperExcl = FieldParser $ \a -> if a >= lower && a < upperExcl then Right a else Left $ [fmt|Value not between {lower & show} (inclusive) and {upperExcl & show} (exclusive), was: {show a}|] -- | @oneOf prettyFrom oneOfMap@ -- -- Takes a @oneOf@, which is a list of possibilities that this parser accepts. -- The comparison is done with '(==)', and then the according 'to' value is returned. -- -- In case of an error @prettyFrom@ is used to pretty-print the available choices and actual input. -- -- If you want to match on an 'Enum'-like type, -- you should probably use 'invertPretty' or 'invertPrettyCaseInsensitive' instead, -- which allows for exhaustiveness checks. oneOf :: Ord from => (from -> Text) -> [(from, to)] -> FieldParser from to oneOf errDisplay m = -- This doesn’t strictly need an 'Ord' instance, it can also use `findMaybe` with `==` instead of going through a map. oneOfMap errDisplay (Map.fromList m) -- | 'oneOf', but takes a map directly. -- -- | @oneOfMap prettyFrom oneOfMap@ -- -- Takes a @oneOfMap@, which is a map of possibilities that this parser accepts. -- -- In case of an error @prettyFrom@ is used to pretty-print the available choices and actual input. oneOfMap :: (Ord from) => (from -> Text) -> Map from to -> FieldParser from to oneOfMap errDisplay m = FieldParser $ \from -> m & Map.lookup from & \case Nothing -> do let prettyFrom f = [fmt|"{f & errDisplay}"|] Left $ [fmt|Not one of: {m & Map.keys <&> prettyFrom & Text.intercalate ", "}, was {from & prettyFrom}|] Just to -> Right to -- | Parse into an enum from a textual description of the fields. -- -- The given function is inverted with 'inverseMap' and then used as the parsing function. textEnum :: (Bounded to, Enum to) => (to -> Text) -> FieldParser Text to textEnum displayEnum = oneOfMap id (inverseMap displayEnum) -- | Try to run the first parser, or if it fails run the second one; return an Either. either :: FieldParser from to1 -> FieldParser from to2 -> FieldParser' ErrorTree from (Either to1 to2) either first' second' = FieldParser $ \from -> case runFieldParser first' from of Left err -> case runFieldParser second' from of Left err2 -> Left $ errorTree "Neither the left nor the right parser succeeded" $ err :| [err2] Right a -> Right $ Right a Right a -> Right $ Left a -- | Try to run the first parser, or if it fails run the next one; They have to return the same value. or :: NonEmpty (FieldParser from to) -> FieldParser' ErrorTree from to or parsers = FieldParser $ \from -> parsers & toNonEmpty & traverse ( \p -> runFieldParser p from -- we want to shortcut on the first successful parser, using Left & flipEither ) & \case Left a -> Right $ a Right errs -> Left $ errorTree "Neither of these parsers succeeded" errs where flipEither :: Either a b -> Either b a flipEither (Left err) = Right err flipEither (Right a) = Left a -- | Given a pretty printing function, it will create a parser -- that uses the inverse function to parse the field. -- -- The pretty printing function must create a different output for different inputs! -- Otherwise which value is returned is undefined. invertPretty :: (Bounded to, Enum to) => (to -> Text) -> FieldParser Text to invertPretty prettyFn = oneOfMap id (inverseMap prettyFn) -- | Given a pretty printing function, it will create a parser -- that uses the inverse function to parse the field. -- The parsed text is compared case-insensitively. -- -- The pretty printing function must create a different output for different inputs! -- This also means two outputs should not match if compared case-insensitively. -- Otherwise which value is returned is undefined. invertPrettyCaseInsensitive :: (Bounded to, Enum to) => (to -> Text) -> FieldParser Text to invertPrettyCaseInsensitive prettyFn = oneOfMap CaseInsensitive.original (inverseMap (\t -> prettyFn t & CaseInsensitive.mk)) & lmap CaseInsensitive.mk -- | 'oneOf' but only one value possible exactly :: Eq from => (from -> Text) -> from -> FieldParser from from exactly errDisplay from = FieldParser $ \from' -> if from == from' then Right from' else Left $ [fmt|Field has to be exactly {errDisplay from}, was: {errDisplay from'}|] -- | Takes a parser and lifts it to parse every element in a list. multiple :: -- \| @toLevelError@: A descriptive message about the context of how these multiple elements should be parsed -- -- e.g. @"Must be a |-separated list of (e.g. |), but some elements could not be parsed"@. -- It is used as the root of the 'ErrorTree'. Text -> -- | For each sub-parser that failed, @displayValOnErr@ is prefixed to its error. -- It receives the index (starting from 1) of the element that failed, -- so you can display it in the element’s error message. -- You can decide yourself whether you want to print the full value, part of the value, or only the index. (Natural -> from -> Text) -> -- | Parser for each element FieldParser from to -> FieldParser' ErrorTree [from] [to] multiple topLevelErr displayValOnErr inner = FieldParser $ \ta -> ta & toList & indexed & traverse run & \case Success b -> Right b Failure errs -> Left $ errorTree (newError topLevelErr) errs where indexed = zip [1 :: Natural ..] run (index, a) = case runFieldParser inner a of -- TODO: It would probably be nice to display the actual value that could not be parsed here! Left err -> Failure (singleton $ errorContext (displayValOnErr index a) err) Right res -> Success res nonEmpty :: err -> FieldParser' err [from] (NonEmpty from) nonEmpty msg = FieldParser $ \from -> do case from & NonEmpty.nonEmpty of Nothing -> Left msg Just ne -> Right ne -- | Wrap a FieldParser with some descriptions for generating better error messages. data FieldParserDesc' err from to = FieldParserDesc { -- | Symbolic description of a parser, e.g. “hh:mm” for a timestamp symbolicDesc :: Text, -- | Actual parser fieldParser :: FieldParser' err from to } type FieldParserDesc from to = FieldParserDesc' Error from to -- | Splits the input string into multiple elements based on the given separator string. -- -- Each element is then passed to the provided @innerParser@. -- -- This returns a descriptive tree of errors containing the errors of each sub-parser that failed. separatedBy :: -- | Separator Text -> -- | For each sub-parser that failed, @displayValOnErr@ is prefixed to its error. -- It receives the index (starting from 1) of the element that failed, -- so you can display it in the element’s error message. -- You can decide yourself whether you want to print the full value, part of the value, or only the index. (Natural -> Text -> Text) -> FieldParserDesc Text to -> FieldParser' ErrorTree Text [to] separatedBy separator displayValOnErr innerParser = ( Cat.id & rmap (Text.splitOn separator) & rmap ( \case -- splitOn for "" results in [""] but we want to accept a empty list [""] -> [] xs -> xs ) & mapError singleError ) Cat.>>> ( multiple ( let d = innerParser.symbolicDesc in [fmt|Must be a {separator}-separated list of {d} (e.g. "{d}{separator}{d}"), but some elements could not be parsed|] ) displayValOnErr innerParser.fieldParser ) -- | Ignore whitespace around a text. -- -- Shows how to use the profunctor instance to do pure transformations (that cannot throw any errors). -- -- Alternatively this could be implemented in the FieldParser pipeline like -- @ -- ignore = FieldParser $ \t -> Right (Text.strip t) -- @ ignoreSurroundingWhitespace :: FieldParser Text a -> FieldParser Text a ignoreSurroundingWhitespace = lmap Text.strip -- | Given an error message and an attoparsec parser, -- “clamp” the parser that it expects to match until the end of the string, -- then run the parser and put the given error message on error. -- -- This function works on "Data.Attoparsec.Text" parsers. attoparsecText :: -- | Error message to use if the parser fails (the attoparsec message is discarded) (Text -> Error) -> -- | Parser to use. Should not check for `endOfInput` itself. Atto.Parser a -> FieldParser Text a attoparsecText err parser = let parseAll = Atto.parseOnly (parser <* Atto.endOfInput) in FieldParser $ \text -> case parseAll text of Left _attoErr -> Left (err text) Right a -> Right a -- | Given an error message and an attoparsec parser, -- “clamp” the parser that it expects to match until the end of the string, -- then run the parser and put the given error message on error. -- -- This function works on "Data.Attoparsec.ByteString" parsers. attoparsecBytes :: -- | Error message to use if the parser fails (the attoparsec message is discarded) Error -> -- | Parser to use. Should not check for `endOfInput` itself. AttoBytes.Parser a -> FieldParser ByteString a attoparsecBytes err parser = let parseAll = AttoBytes.parseOnly (parser <* AttoBytes.endOfInput) in FieldParser $ \bytes -> case parseAll bytes of Left _attoErr -> Left err Right a -> Right a