{-| Module : Z.Data.Parser.Numeric Description : Textual numeric parsers. Copyright : (c) Dong Han, 2017-2019 License : BSD Maintainer : winterland1989@gmail.com Stability : experimental Portability : non-portable Textual numeric parsers. -} module Z.Data.Parser.Numeric ( -- * Decimal uint, int, integer , uint_, int_ -- * Hex , hex, hex', hex_ -- * Fractional , rational , float, double , scientific , scientifically -- * Stricter fractional(rfc8259) , rational' , float', double' , scientific' , scientifically' -- * Misc , hexLoop , decLoop , decLoopIntegerFast , isHexDigit , isDigit , floatToScientific , doubleToScientific ) where import Control.Applicative import Control.Monad import Data.Bits import Data.Int import qualified Data.Scientific as Sci import Data.Word import qualified Z.Data.Builder.Numeric as B import Z.Data.Builder.Numeric (pattern PLUS, pattern MINUS, pattern ZERO, pattern DOT, pattern LITTLE_E, pattern BIG_E) import Z.Data.Parser.Base (Parser, ()) import qualified Z.Data.Parser.Base as P import qualified Z.Data.Vector.Base as V import qualified Z.Data.Vector.Extra as V #define WORD64_SAFE_DIGITS_LEN 19 #define INT64_SAFE_DIGITS_LEN 18 -- | Parse and decode an unsigned hex number, fail in case of overflow. The hex digits -- @\'a\'@ through @\'f\'@ may be upper or lower case. -- -- This parser does not accept a leading @\"0x\"@ string, and consider -- sign bit part of the binary hex nibbles, e.g. -- -- >>> parse' hex "FF" == Right (-1 :: Int8) -- >>> parse' hex "7F" == Right (127 :: Int8) -- >>> parse' hex "7Ft" == Right (127 :: Int8) -- >>> parse' hex "7FF" == Left ["Z.Data.Parser.Numeric.hex","hex numeric number overflow"] -- hex :: forall a.(Integral a, FiniteBits a) => Parser a {-# INLINE hex #-} hex = "Z.Data.Parser.Numeric.hex" do bs <- P.takeWhile1 isHexDigit if V.length bs <= finiteBitSize (undefined :: a) `unsafeShiftR` 2 then return (hexLoop 0 bs) else P.fail' "hex numeric number overflow" -- | Same with 'hex', but only take as many as (bit_size/4) bytes. -- -- >>> parse' hex "FF" == Right (-1 :: Int8) -- >>> parse' hex "7F" == Right (127 :: Int8) -- >>> parse' hex "7Ft" == Right (127 :: Int8) -- >>> parse' hex "7FF" == Right (127 :: Int8) hex' :: forall a.(Integral a, FiniteBits a) => Parser a {-# INLINE hex' #-} hex' = "Z.Data.Parser.Numeric.hex'" do hexLoop 0 <$> P.takeN isHexDigit (finiteBitSize (undefined :: a) `unsafeShiftR` 2) where -- | Same with 'hex', but silently cast in case of overflow. -- -- >>> parse' hex "FF" == Right (-1 :: Int8) -- >>> parse' hex "7F" == Right (127 :: Int8) -- >>> parse' hex "7Ft" == Right (127 :: Int8) -- >>> parse' hex "7FF" == Right (-1 :: Int8) hex_ :: (Integral a, Bits a) => Parser a {-# INLINE hex_ #-} hex_ = "Z.Data.Parser.Numeric.hex_" hexLoop 0 <$> P.takeWhile1 isHexDigit -- | decode hex digits sequence within an array. hexLoop :: (Integral a, Bits a) => a -- ^ accumulator, usually start from 0 -> V.Bytes -> a {-# INLINE hexLoop #-} hexLoop = V.foldl' step where step a w = a `unsafeShiftL` 4 + fromIntegral (w2iHex w) w2iHex w | w <= 57 = w - 48 | w <= 70 = w - 55 | w <= 102 = w - 87 -- | A fast digit predicate. isHexDigit :: Word8 -> Bool {-# INLINE isHexDigit #-} isHexDigit w = w - 48 <= 9 || w - 65 <= 5 || w - 97 <= 5 -- | Same with 'uint', but sliently cast in case of overflow. uint_ :: forall a. (Integral a, Bounded a) => Parser a {-# INLINE uint_ #-} uint_ = "Z.Data.Parser.Numeric.uint_" decLoop 0 <$> P.takeWhile1 isDigit -- | Parse and decode an unsigned decimal number. -- -- Will fail in case of overflow. uint :: forall a. (Integral a, Bounded a) => Parser a {-# INLINE uint #-} uint = "Z.Data.Parser.Numeric.uint" do bs <- P.takeWhile1 isDigit if V.length bs <= WORD64_SAFE_DIGITS_LEN then do let w64 = decLoop @Word64 0 bs if w64 <= fromIntegral (maxBound :: a) then return (fromIntegral w64) else P.fail' "decimal numeric value overflow" else do let w64 = decLoop @Integer 0 bs if w64 <= fromIntegral (maxBound :: a) then return (fromIntegral w64) else P.fail' "decimal numeric value overflow" -- | Decode digits sequence within an array. -- -- This function may overflow if result can't fit into type. decLoop :: Integral a => a -- ^ accumulator, usually start from 0 -> V.Bytes -> a {-# INLINE decLoop #-} decLoop = V.foldl' step where step a w = a * 10 + fromIntegral (w - 48) -- | decode digits sequence within an array. -- -- A fast version to decode 'Integer' using machine word as much as possible. decLoopIntegerFast :: V.Bytes -> Integer {-# INLINE decLoopIntegerFast #-} decLoopIntegerFast bs | V.length bs <= WORD64_SAFE_DIGITS_LEN = fromIntegral (decLoop @Word64 0 bs) | otherwise = decLoop @Integer 0 bs -- | A fast digit predicate. isDigit :: Word8 -> Bool isDigit w = w - 48 <= 9 {-# INLINE isDigit #-} -- | Parse a decimal number with an optional leading @\'+\'@ or @\'-\'@ sign -- character. -- -- This parser will fail if overflow happens. int :: forall a. (Integral a, Bounded a) => Parser a {-# INLINE int #-} int = "Z.Data.Parser.Numeric.int" do w <- P.peek if w == MINUS then P.skipWord8 *> loopNe else if w == PLUS then P.skipWord8 *> loop else loop where loop = do bs <- P.takeWhile1 isDigit if V.length bs <= WORD64_SAFE_DIGITS_LEN then do let w64 = decLoop @Word64 0 bs if w64 <= fromIntegral (maxBound :: a) then return (fromIntegral w64) else P.fail' "decimal numeric value overflow" else do let w64 = decLoop @Integer 0 bs if w64 <= fromIntegral (maxBound :: a) then return (fromIntegral w64) else P.fail' "decimal numeric value overflow" loopNe = do bs <- P.takeWhile1 isDigit if V.length bs <= INT64_SAFE_DIGITS_LEN then do let i64 = negate (decLoop @Int64 0 bs) if i64 >= fromIntegral (minBound :: a) then return (fromIntegral i64) else P.fail' "decimal numeric value overflow" else do let i64 = negate (decLoop @Integer 0 bs) if i64 >= fromIntegral (minBound :: a) then return (fromIntegral i64) else P.fail' "decimal numeric value overflow" -- | Same with 'int', but sliently cast if overflow happens. int_ :: (Integral a, Bounded a) => Parser a {-# INLINE int_ #-} int_ = "Z.Data.Parser.Numeric.int_" do w <- P.peek if w == MINUS then P.skipWord8 *> (negate <$> loop) else if w == PLUS then P.skipWord8 *> loop else loop where loop = decLoop 0 <$> P.takeWhile1 isDigit -- | Parser specifically optimized for 'Integer'. -- integer :: Parser Integer {-# INLINE integer #-} integer = "Z.Data.Parser.Numeric.integer" do w <- P.peek if w == MINUS then P.skipWord8 *> (negate <$> integer') else if w == PLUS then P.skipWord8 *> integer' else integer' where -- strip integer's message integer' = decLoopIntegerFast <$> P.takeWhile1 isDigit -- | Parse a rational number. -- -- The syntax accepted by this parser is the same as for 'double'. -- -- /Note/: this parser is not safe for use with inputs from untrusted -- sources. An input with a suitably large exponent such as -- @"1e1000000000"@ will cause a huge 'Integer' to be allocated, -- resulting in what is effectively a denial-of-service attack. -- -- In most cases, it is better to use 'double' or 'scientific' -- instead. -- rational :: (Fractional a) => Parser a {-# INLINE rational #-} rational = "Z.Data.Parser.Numeric.rational" scientificallyInternal realToFrac -- | Parse a rational number and round to 'Double'. -- -- This parser accepts an optional leading sign character, followed by -- at least one decimal digit. The syntax similar to that accepted by -- the 'read' function, with the exception that a trailing @\'.\'@ or -- @\'e\'@ /not/ followed by a number is not consumed. -- -- Examples with behaviour identical to 'read': -- -- >parse' double "3" == ("", Right 3.0) -- >parse' double "3.1" == ("", Right 3.1) -- >parse' double "3e4" == ("", Right 30000.0) -- >parse' double "3.1e4" == ("", Right 31000.0) -- -- >parse' double ".3" == (".3", Left ParserError) -- >parse' double "e3" == ("e3", Left ParserError) -- -- Examples of differences from 'read': -- -- >parse' double "3.foo" == (".foo", Right 3.0) -- >parse' double "3e" == ("e", Right 3.0) -- >parse' double "-3e" == ("e", Right -3.0) -- -- This function does not accept string representations of \"NaN\" or -- \"Infinity\". -- double :: Parser Double {-# INLINE double #-} double = "Z.Data.Parser.Numeric.double" scientificallyInternal Sci.toRealFloat -- | Parse a rational number and round to 'Float'. -- -- Single precision version of 'double'. float :: Parser Float {-# INLINE float #-} float = "Z.Data.Parser.Numeric.float" scientificallyInternal Sci.toRealFloat -- | Parse a scientific number. -- -- The syntax accepted by this parser is the same as for 'double'. -- scientific :: Parser Sci.Scientific {-# INLINE scientific #-} scientific = "Z.Data.Parser.Numeric.scientific" scientificallyInternal id -- | Parse a scientific number and convert to result using a user supply function. -- -- The syntax accepted by this parser is the same as for 'double'. scientifically :: (Sci.Scientific -> a) -> Parser a {-# INLINE scientifically #-} scientifically h = "Z.Data.Parser.Numeric.scientifically" scientificallyInternal h -- | Strip message version. scientificallyInternal :: (Sci.Scientific -> a) -> Parser a {-# INLINE scientificallyInternal #-} scientificallyInternal h = do !sign <- P.peek when (sign == PLUS || sign == MINUS) (P.skipWord8) !intPart <- P.takeWhile1 isDigit -- backtrack here is neccessary to avoid eating extra dot or e -- attoparsec is doing it wrong here: https://github.com/bos/attoparsec/issues/112 !sci <- (do -- during number parsing we want to use machine word as much as possible -- so as long as range permit, we use Word64 instead of final Integer !fracPart <- P.word8 DOT *> P.takeWhile1 isDigit let !ilen = V.length intPart !flen = V.length fracPart !base = if ilen + flen <= WORD64_SAFE_DIGITS_LEN then fromIntegral (decLoop @Word64 (decLoop @Word64 0 intPart) fracPart) else let i = decLoopIntegerFast intPart f = decLoopIntegerFast fracPart in i * 10 ^ flen + f parseE base flen) <|> (parseE (decLoopIntegerFast intPart) 0) pure $! if sign /= MINUS then h sci else h (negate sci) where {-# INLINE parseE #-} parseE c e = (do _ <- P.satisfy (\w -> w == LITTLE_E || w == BIG_E) Sci.scientific c . subtract e <$> int) <|> pure (Sci.scientific c (negate e)) -------------------------------------------------------------------------------- -- | Parse a rational number. -- -- The syntax accepted by this parser is the same as for 'double''. -- -- /Note/: this parser is not safe for use with inputs from untrusted -- sources. An input with a suitably large exponent such as -- @"1e1000000000"@ will cause a huge 'Integer' to be allocated, -- resulting in what is effectively a denial-of-service attack. -- -- In most cases, it is better to use 'double'' or 'scientific'' -- instead. -- rational' :: (Fractional a) => Parser a {-# INLINE rational' #-} rational' = "Z.Data.Parser.Numeric.rational'" scientificallyInternal' realToFrac -- | More strict number parsing(rfc8259). -- -- 'scientific' support parse @2314.@ and @21321exyz@ without eating extra dot or @e@ via -- backtrack, this is not allowed in some strict grammer such as JSON, so we make an -- non-backtrack strict number parser separately using LL(1) lookahead. This parser also -- agree with 'read' on extra dot or e handling: -- -- >parse' double "3.foo" == Left ParseError -- >parse' double "3e" == Left ParseError -- -- Leading zeros or @+@ sign is also not allowed: -- -- >parse' double "+3.14" == Left ParseError -- >parse' double "0014" == Left ParseError -- -- If you have a similar grammer, you can use this parser to save considerable time. -- -- @ -- number = [ minus ] int [ frac ] [ exp ] -- decimal-point = %x2E ; . -- digit1-9 = %x31-39 ; 1-9 -- e = %x65 / %x45 ; e E -- exp = e [ minus / plus ] 1*DIGIT -- frac = decimal-point 1*DIGIT -- @ -- -- This function does not accept string representations of \"NaN\" or -- \"Infinity\". -- reference: https://tools.ietf.org/html/rfc8259#section-6 double' :: Parser Double {-# INLINE double' #-} double' = "Z.Data.Parser.Numeric.double'" scientificallyInternal' Sci.toRealFloat -- | Parse a rational number and round to 'Float' using stricter grammer. -- -- Single precision version of 'double''. float' :: Parser Float {-# INLINE float' #-} float' = "Z.Data.Parser.Numeric.float'" scientificallyInternal' Sci.toRealFloat -- | Parse a scientific number. -- -- The syntax accepted by this parser is the same as for 'double''. scientific' :: Parser Sci.Scientific {-# INLINE scientific' #-} scientific' = "Z.Data.Parser.Numeric.scientific'" scientificallyInternal' id -- | Parse a scientific number and convert to result using a user supply function. -- -- The syntax accepted by this parser is the same as for 'double''. scientifically' :: (Sci.Scientific -> a) -> P.Parser a {-# INLINE scientifically' #-} scientifically' h = "Z.Data.Parser.Numeric.scientifically'" scientificallyInternal' h -- | Strip message version of scientifically'. scientificallyInternal' :: (Sci.Scientific -> a) -> P.Parser a {-# INLINE scientificallyInternal' #-} scientificallyInternal' h = do !sign <- P.peek when (sign == MINUS) (P.skipWord8) -- no leading plus is allowed !intPart <- P.takeWhile1 isDigit when (V.length intPart > 1 && V.head intPart == ZERO) (P.fail' "leading zeros are not allowed") mdot <- P.peekMaybe !sci <- case mdot of Just DOT -> do !fracPart <- P.skipWord8 *> P.takeWhile1 isDigit -- during number parsing we want to use machine word as much as possible -- so as long as range permit, we use Word64 instead of final Integer let !ilen = V.length intPart !flen = V.length fracPart !base = if ilen + flen <= WORD64_SAFE_DIGITS_LEN then fromIntegral (decLoop @Word64 (decLoop @Word64 0 intPart) fracPart) else let i = decLoopIntegerFast intPart f = decLoopIntegerFast fracPart in i * 10 ^ flen + f parseE base flen _ -> parseE (decLoopIntegerFast intPart) 0 pure $! if sign /= MINUS then h sci else h (negate sci) where {-# INLINE parseE #-} parseE !c !e = do me <- P.peekMaybe e' <- case me of Just ec | ec == LITTLE_E || ec == BIG_E -> P.skipWord8 *> int _ -> pure 0 pure $! Sci.scientific c (e' - e) -------------------------------------------------------------------------------- floatToScientific :: Float -> Sci.Scientific {-# INLINE floatToScientific #-} floatToScientific rf | rf < 0 = -(fromFloatingDigits (B.grisu3_sp (-rf))) | rf == 0 = 0 | otherwise = fromFloatingDigits (B.grisu3_sp rf) doubleToScientific :: Double -> Sci.Scientific {-# INLINE doubleToScientific #-} doubleToScientific rf | rf < 0 = -(fromFloatingDigits (B.grisu3 (-rf))) | rf == 0 = 0 | otherwise = fromFloatingDigits (B.grisu3 rf) fromFloatingDigits :: ([Int], Int) -> Sci.Scientific {-# INLINE fromFloatingDigits #-} fromFloatingDigits (digits, e) = go digits 0 0 where -- There's no way a float or double has more digits a 'Int64' can't handle go :: [Int] -> Int64 -> Int -> Sci.Scientific go [] !c !n = Sci.scientific (fromIntegral c) (e - n) go (d:ds) !c !n = go ds (c * 10 + fromIntegral d) (n + 1)