{-| Module : Z.Data.Parser.Numeric Description : Textual numeric parsers. Copyright : (c) Dong Han, 2017-2019 License : BSD Maintainer : winterland1989@gmail.com Stability : experimental Portability : non-portable Textual numeric parsers. -} module Z.Data.Parser.Numeric ( -- * Decimal uint, int, integer , uint_, int_, digit -- * Hex , hex, hex', hex_ -- * Fractional , rational , float, double , scientific , scientifically -- * Stricter fractional(rfc8259) , rational' , float', double' , scientific' , scientifically' -- * Misc , w2iHex, w2iDec , hexLoop , decLoop , decLoopIntegerFast , sciToDouble ) where import Control.Applicative import Control.Monad import Data.Bits import Data.Int import qualified Data.Scientific as Sci import Data.Word import GHC.Exts import GHC.Num import GHC.Float (expt) import Z.Data.ASCII import Z.Data.Parser.Base (Parser, ()) import qualified Z.Data.Parser.Base as P import qualified Z.Data.Vector.Base as V import qualified Z.Data.Vector.Extra as V import Z.Foreign import System.IO.Unsafe #define WORD64_SAFE_DIGITS_LEN 19 #define INT64_SAFE_DIGITS_LEN 18 -- | Parse and decode an unsigned hex number, fail if input length is larger than (bit_size/4). The hex digits -- @\'a\'@ through @\'f\'@ may be upper or lower case. -- -- This parser does not accept a leading @\"0x\"@ string, and consider -- sign bit part of the binary hex nibbles, e.g. -- -- >>> parse' hex "FF" == Right (-1 :: Int8) -- >>> parse' hex "7F" == Right (127 :: Int8) -- >>> parse' hex "7Ft" == Right (127 :: Int8) -- >>> parse' hex "7FF" == Left ["Z.Data.Parser.Numeric.hex","hex numeric number overflow"] -- hex :: forall a.(Integral a, FiniteBits a) => Parser a {-# INLINABLE hex #-} {-# SPECIALIZE INLINE hex :: Parser Int #-} {-# SPECIALIZE INLINE hex :: Parser Int8 #-} {-# SPECIALIZE INLINE hex :: Parser Int16 #-} {-# SPECIALIZE INLINE hex :: Parser Int32 #-} {-# SPECIALIZE INLINE hex :: Parser Int64 #-} {-# SPECIALIZE INLINE hex :: Parser Word #-} {-# SPECIALIZE INLINE hex :: Parser Word8 #-} {-# SPECIALIZE INLINE hex :: Parser Word16 #-} {-# SPECIALIZE INLINE hex :: Parser Word32 #-} {-# SPECIALIZE INLINE hex :: Parser Word64 #-} hex = "Z.Data.Parser.Numeric.hex" do bs <- P.takeWhile1 isHexDigit if V.length bs <= finiteBitSize (undefined :: a) `unsafeShiftR` 2 then return $! hexLoop 0 bs else P.fail' "hex numeric number overflow" -- | Same with 'hex', but only take as many as (bit_size/4) bytes. -- -- >>> parse' hex "FF" == Right (-1 :: Int8) -- >>> parse' hex "7F" == Right (127 :: Int8) -- >>> parse' hex "7Ft" == Right (127 :: Int8) -- >>> parse' hex "7FF" == Right (127 :: Int8) hex' :: forall a.(Integral a, FiniteBits a) => Parser a {-# INLINABLE hex' #-} {-# SPECIALIZE INLINE hex' :: Parser Int #-} {-# SPECIALIZE INLINE hex' :: Parser Int8 #-} {-# SPECIALIZE INLINE hex' :: Parser Int16 #-} {-# SPECIALIZE INLINE hex' :: Parser Int32 #-} {-# SPECIALIZE INLINE hex' :: Parser Int64 #-} {-# SPECIALIZE INLINE hex' :: Parser Word #-} {-# SPECIALIZE INLINE hex' :: Parser Word8 #-} {-# SPECIALIZE INLINE hex' :: Parser Word16 #-} {-# SPECIALIZE INLINE hex' :: Parser Word32 #-} {-# SPECIALIZE INLINE hex' :: Parser Word64 #-} hex' = "Z.Data.Parser.Numeric.hex'" do hexLoop 0 <$> P.takeN isHexDigit (finiteBitSize (undefined :: a) `unsafeShiftR` 2) where -- | Same with 'hex', but silently cast in case of overflow. -- -- >>> parse' hex "FF" == Right (-1 :: Int8) -- >>> parse' hex "7F" == Right (127 :: Int8) -- >>> parse' hex "7Ft" == Right (127 :: Int8) -- >>> parse' hex "7FF" == Right (-1 :: Int8) hex_ :: (Integral a, Bits a) => Parser a {-# INLINABLE hex_ #-} {-# SPECIALIZE INLINE hex_ :: Parser Int #-} {-# SPECIALIZE INLINE hex_ :: Parser Int8 #-} {-# SPECIALIZE INLINE hex_ :: Parser Int16 #-} {-# SPECIALIZE INLINE hex_ :: Parser Int32 #-} {-# SPECIALIZE INLINE hex_ :: Parser Int64 #-} {-# SPECIALIZE INLINE hex_ :: Parser Word #-} {-# SPECIALIZE INLINE hex_ :: Parser Word8 #-} {-# SPECIALIZE INLINE hex_ :: Parser Word16 #-} {-# SPECIALIZE INLINE hex_ :: Parser Word32 #-} {-# SPECIALIZE INLINE hex_ :: Parser Word64 #-} hex_ = "Z.Data.Parser.Numeric.hex_" hexLoop 0 <$> P.takeWhile1 isHexDigit -- | decode hex digits sequence within an array. hexLoop :: forall a. (Integral a, Bits a) => a -- ^ accumulator, usually start from 0 -> V.Bytes -> a {-# INLINE hexLoop #-} hexLoop = V.foldl' step where step a w = a `unsafeShiftL` 4 + fromIntegral (w2iHex w :: a) -- | Convert A ASCII hex digit to 'Int' value. w2iHex :: Integral a => Word8 -> a {-# INLINE w2iHex #-} w2iHex w | w <= 57 = fromIntegral w - 48 | w <= 70 = fromIntegral w - 55 | otherwise = fromIntegral w - 87 -- | Same with 'uint', but sliently cast in case of overflow. uint_ :: forall a. (Integral a, Bounded a) => Parser a {-# INLINABLE uint_ #-} {-# SPECIALIZE INLINE uint_ :: Parser Int #-} {-# SPECIALIZE INLINE uint_ :: Parser Int8 #-} {-# SPECIALIZE INLINE uint_ :: Parser Int16 #-} {-# SPECIALIZE INLINE uint_ :: Parser Int32 #-} {-# SPECIALIZE INLINE uint_ :: Parser Int64 #-} {-# SPECIALIZE INLINE uint_ :: Parser Word #-} {-# SPECIALIZE INLINE uint_ :: Parser Word8 #-} {-# SPECIALIZE INLINE uint_ :: Parser Word16 #-} {-# SPECIALIZE INLINE uint_ :: Parser Word32 #-} {-# SPECIALIZE INLINE uint_ :: Parser Word64 #-} uint_ = "Z.Data.Parser.Numeric.uint_" decLoop 0 <$> P.takeWhile1 isDigit -- | Parse and decode an unsigned decimal number. -- -- Will fail in case of overflow. uint :: forall a. (Integral a, Bounded a) => Parser a {-# INLINABLE uint #-} {-# SPECIALIZE INLINE uint :: Parser Int #-} {-# SPECIALIZE INLINE uint :: Parser Int8 #-} {-# SPECIALIZE INLINE uint :: Parser Int16 #-} {-# SPECIALIZE INLINE uint :: Parser Int32 #-} {-# SPECIALIZE INLINE uint :: Parser Int64 #-} {-# SPECIALIZE INLINE uint :: Parser Word #-} {-# SPECIALIZE INLINE uint :: Parser Word8 #-} {-# SPECIALIZE INLINE uint :: Parser Word16 #-} {-# SPECIALIZE INLINE uint :: Parser Word32 #-} {-# SPECIALIZE INLINE uint :: Parser Word64 #-} uint = "Z.Data.Parser.Numeric.uint" do bs <- P.takeWhile1 isDigit if V.length bs <= WORD64_SAFE_DIGITS_LEN then do let w64 = decLoop @Word64 0 bs if w64 <= fromIntegral (maxBound :: a) then return $! fromIntegral w64 else P.fail' "decimal numeric value overflow" else do let w64 = decLoop @Integer 0 bs if w64 <= fromIntegral (maxBound :: a) then return $! fromIntegral w64 else P.fail' "decimal numeric value overflow" -- | Decode digits sequence within an array. -- -- This function may overflow if result can't fit into type. decLoop :: Integral a => a -- ^ accumulator, usually start from 0 -> V.Bytes -> a {-# INLINE decLoop #-} decLoop = V.foldl' step where step a w = a * 10 + w2iDec w -- | Convert A ASCII decimal digit to 'Int' value. -- w2iDec :: Integral a => Word8 -> a {-# INLINE w2iDec #-} w2iDec w = fromIntegral w - 48 -- | decode digits sequence within an array. -- -- A fast version to decode 'Integer' using machine word as much as possible. decLoopIntegerFast :: V.Bytes -> Integer {-# INLINE decLoopIntegerFast #-} decLoopIntegerFast bs | V.length bs <= WORD64_SAFE_DIGITS_LEN = fromIntegral (decLoop @Word64 0 bs) | otherwise = decLoop @Integer 0 bs -- | Take a single decimal digit and return as 'Int'. -- digit :: Parser Int {-# INLINE digit #-} digit = do d <- P.satisfy isDigit return $! w2iDec d -- | Parse a decimal number with an optional leading @\'+\'@ or @\'-\'@ sign -- character. -- -- This parser will fail if overflow happens. int :: forall a. (Integral a, Bounded a) => Parser a {-# INLINABLE int #-} {-# SPECIALIZE INLINE int :: Parser Int #-} {-# SPECIALIZE INLINE int :: Parser Int8 #-} {-# SPECIALIZE INLINE int :: Parser Int16 #-} {-# SPECIALIZE INLINE int :: Parser Int32 #-} {-# SPECIALIZE INLINE int :: Parser Int64 #-} {-# SPECIALIZE INLINE int :: Parser Word #-} {-# SPECIALIZE INLINE int :: Parser Word8 #-} {-# SPECIALIZE INLINE int :: Parser Word16 #-} {-# SPECIALIZE INLINE int :: Parser Word32 #-} {-# SPECIALIZE INLINE int :: Parser Word64 #-} int = "Z.Data.Parser.Numeric.int" do w <- P.peek if w == MINUS then P.skipWord8 *> loopNe else if w == PLUS then P.skipWord8 *> loop else loop where loop = do bs <- P.takeWhile1 isDigit if V.length bs <= WORD64_SAFE_DIGITS_LEN then do let w64 = decLoop @Word64 0 bs if w64 <= fromIntegral (maxBound :: a) then return $! fromIntegral w64 else P.fail' "decimal numeric value overflow" else do let w64 = decLoop @Integer 0 bs if w64 <= fromIntegral (maxBound :: a) then return $! fromIntegral w64 else P.fail' "decimal numeric value overflow" loopNe = do bs <- P.takeWhile1 isDigit if V.length bs <= INT64_SAFE_DIGITS_LEN then do let i64 = negate (decLoop @Int64 0 bs) if i64 >= fromIntegral (minBound :: a) then return $! fromIntegral i64 else P.fail' "decimal numeric value overflow" else do let i64 = negate (decLoop @Integer 0 bs) if i64 >= fromIntegral (minBound :: a) then return $! fromIntegral i64 else P.fail' "decimal numeric value overflow" -- | Same with 'int', but sliently cast if overflow happens. int_ :: (Integral a, Bounded a) => Parser a {-# INLINABLE int_ #-} {-# SPECIALIZE INLINE int_ :: Parser Int #-} {-# SPECIALIZE INLINE int_ :: Parser Int8 #-} {-# SPECIALIZE INLINE int_ :: Parser Int16 #-} {-# SPECIALIZE INLINE int_ :: Parser Int32 #-} {-# SPECIALIZE INLINE int_ :: Parser Int64 #-} {-# SPECIALIZE INLINE int_ :: Parser Word #-} {-# SPECIALIZE INLINE int_ :: Parser Word8 #-} {-# SPECIALIZE INLINE int_ :: Parser Word16 #-} {-# SPECIALIZE INLINE int_ :: Parser Word32 #-} {-# SPECIALIZE INLINE int_ :: Parser Word64 #-} int_ = "Z.Data.Parser.Numeric.int_" do w <- P.peek if w == MINUS then P.skipWord8 *> (negate <$> loop) else if w == PLUS then P.skipWord8 *> loop else loop where loop = decLoop 0 <$> P.takeWhile1 isDigit -- | Parser specifically optimized for 'Integer'. -- integer :: Parser Integer {-# INLINABLE integer #-} integer = "Z.Data.Parser.Numeric.integer" do w <- P.peek if w == MINUS then P.skipWord8 *> (negate <$> integer') else if w == PLUS then P.skipWord8 *> integer' else integer' where -- strip integer's message integer' = decLoopIntegerFast <$> P.takeWhile1 isDigit -- | Parse a rational number. -- -- The syntax accepted by this parser is the same as for 'double'. -- -- /Note/: this parser is not safe for use with inputs from untrusted -- sources. An input with a suitably large exponent such as -- @"1e1000000000"@ will cause a huge 'Integer' to be allocated, -- resulting in what is effectively a denial-of-service attack. -- -- In most cases, it is better to use 'double' or 'scientific' -- instead. -- rational :: (Fractional a) => Parser a {-# INLINABLE rational #-} rational = "Z.Data.Parser.Numeric.rational" scientificallyInternal realToFrac -- | Parse a rational number and round to 'Double'. -- -- This parser accepts an optional leading sign character, followed by -- at least one decimal digit. The syntax similar to that accepted by -- the 'read' function, with the exception that a trailing @\'.\'@ or -- @\'e\'@ /not/ followed by a number is not consumed. -- -- Examples with behaviour identical to 'read': -- -- >parse' double "3" == ("", Right 3.0) -- >parse' double "3.1" == ("", Right 3.1) -- >parse' double "3e4" == ("", Right 30000.0) -- >parse' double "3.1e4" == ("", Right 31000.0) -- -- >parse' double ".3" == (".3", Left ParserError) -- >parse' double "e3" == ("e3", Left ParserError) -- -- Examples of differences from 'read': -- -- >parse' double "3.foo" == (".foo", Right 3.0) -- >parse' double "3e" == ("e", Right 3.0) -- >parse' double "-3e" == ("e", Right -3.0) -- -- This function does not accept string representations of \"NaN\" or -- \"Infinity\". -- double :: Parser Double {-# INLINABLE double #-} double = "Z.Data.Parser.Numeric.double" scientificallyInternal sciToDouble -- | Parse a rational number and round to 'Float'. -- -- Single precision version of 'double'. float :: Parser Float {-# INLINABLE float #-} float = "Z.Data.Parser.Numeric.float" scientificallyInternal Sci.toRealFloat -- | Parse a scientific number. -- -- The syntax accepted by this parser is the same as for 'double'. -- scientific :: Parser Sci.Scientific {-# INLINABLE scientific #-} scientific = "Z.Data.Parser.Numeric.scientific" scientificallyInternal id -- | Parse a scientific number and convert to result using a user supply function. -- -- The syntax accepted by this parser is the same as for 'double'. scientifically :: (Sci.Scientific -> a) -> Parser a {-# INLINABLE scientifically #-} scientifically h = "Z.Data.Parser.Numeric.scientifically" scientificallyInternal h -- | Strip message version. scientificallyInternal :: (Sci.Scientific -> a) -> Parser a {-# INLINE scientificallyInternal #-} scientificallyInternal h = do !sign <- P.peek when (sign == PLUS || sign == MINUS) (P.skipWord8) !intPart <- P.takeWhile1 isDigit -- backtrack here is neccessary to avoid eating extra dot or e -- attoparsec is doing it wrong here: https://github.com/bos/attoparsec/issues/112 !sci <- (do -- during number parsing we want to use machine word as much as possible -- so as long as range permit, we use Word64 instead of final Integer !fracPart <- P.word8 DOT *> P.takeWhile1 isDigit let !ilen = V.length intPart !flen = V.length fracPart !base = if ilen + flen <= WORD64_SAFE_DIGITS_LEN then fromIntegral (decLoop @Word64 (decLoop @Word64 0 intPart) fracPart) else let i = decLoopIntegerFast intPart f = decLoopIntegerFast fracPart in i * (expt 10 flen) + f parseE base flen) <|> (parseE (decLoopIntegerFast intPart) 0) -- intentionally lazy return here, we have done the grammar check, and h could potentially be very expensive, e.g. sciToDouble -- retained references are sign and sci, which are already in NF pure (if sign /= MINUS then h sci else h (negate sci)) where parseE c e = (do _ <- P.satisfy (\w -> w == LETTER_e || w == LETTER_E) e' <- int pure $! Sci.scientific c (e' - e)) <|> (pure $! Sci.scientific c (negate e)) -------------------------------------------------------------------------------- -- | Parse a rational number. -- -- The syntax accepted by this parser is the same as for 'double''. -- -- /Note/: this parser is not safe for use with inputs from untrusted -- sources. An input with a suitably large exponent such as -- @"1e1000000000"@ will cause a huge 'Integer' to be allocated, -- resulting in what is effectively a denial-of-service attack. -- -- In most cases, it is better to use 'double'' or 'scientific'' -- instead. -- rational' :: (Fractional a) => Parser a {-# INLINABLE rational' #-} rational' = "Z.Data.Parser.Numeric.rational'" scientificallyInternal' realToFrac -- | More strict number parsing(rfc8259). -- -- 'scientific' support parse @2314.@ and @21321exyz@ without eating extra dot or @e@ via -- backtrack, this is not allowed in some strict grammer such as JSON, so we make an -- non-backtrack strict number parser separately using LL(1) lookahead. This parser also -- agree with 'read' on extra dot or e handling: -- -- >parse' double "3.foo" == Left ParseError -- >parse' double "3e" == Left ParseError -- -- Leading zeros or @+@ sign is also not allowed: -- -- >parse' double "+3.14" == Left ParseError -- >parse' double "0014" == Left ParseError -- -- If you have a similar grammer, you can use this parser to save considerable time. -- -- @ -- number = [ minus ] int [ frac ] [ exp ] -- decimal-point = %x2E ; . -- digit1-9 = %x31-39 ; 1-9 -- e = %x65 / %x45 ; e E -- exp = e [ minus / plus ] 1*DIGIT -- frac = decimal-point 1*DIGIT -- @ -- -- This function does not accept string representations of \"NaN\" or -- \"Infinity\". -- reference: https://tools.ietf.org/html/rfc8259#section-6 double' :: Parser Double {-# INLINABLE double' #-} double' = "Z.Data.Parser.Numeric.double'" scientificallyInternal' sciToDouble #define FASTFLOAT_SMALLEST_POWER -325 #define FASTFLOAT_LARGEST_POWER 308 -- | Faster scientific to double conversion using . -- -- See @cbits/compute_float_64.c@. sciToDouble :: Sci.Scientific -> Double {-# INLINABLE sciToDouble #-} sciToDouble sci = case c of (IS i#) | (e >= FASTFLOAT_SMALLEST_POWER && e <= FASTFLOAT_LARGEST_POWER) -> unsafeDupablePerformIO $ do let i = (I# i#) s = if i >= 0 then 0 else 1 i' = fromIntegral $ if i >= 0 then i else (0-i) (success, r) <- allocPrimUnsafe @Word8 (compute_float_64 (fromIntegral e) i' s) if success == 0 then return $! Sci.toRealFloat sci else return $! r _ -> Sci.toRealFloat sci where e = Sci.base10Exponent sci c = Sci.coefficient sci -- | Parse a rational number and round to 'Float' using stricter grammer. -- -- Single precision version of 'double''. float' :: Parser Float {-# INLINABLE float' #-} float' = "Z.Data.Parser.Numeric.float'" scientificallyInternal' Sci.toRealFloat -- | Parse a scientific number. -- -- The syntax accepted by this parser is the same as for 'double''. scientific' :: Parser Sci.Scientific {-# INLINABLE scientific' #-} scientific' = "Z.Data.Parser.Numeric.scientific'" scientificallyInternal' id -- | Parse a scientific number and convert to result using a user supply function. -- -- The syntax accepted by this parser is the same as for 'double''. scientifically' :: (Sci.Scientific -> a) -> P.Parser a {-# INLINABLE scientifically' #-} scientifically' h = "Z.Data.Parser.Numeric.scientifically'" scientificallyInternal' h -- | Strip message version of scientifically'. scientificallyInternal' :: (Sci.Scientific -> a) -> P.Parser a {-# INLINE scientificallyInternal' #-} scientificallyInternal' h = do !sign <- P.peek when (sign == MINUS) (P.skipWord8) -- no leading plus is allowed !intPart <- P.takeWhile1 isDigit when (V.length intPart > 1 && V.head intPart == DIGIT_0) (P.fail' "leading zeros are not allowed") mdot <- P.peekMaybe !sci <- case mdot of Just DOT -> do !fracPart <- P.skipWord8 *> P.takeWhile1 isDigit -- during number parsing we want to use machine word as much as possible -- so as long as range permit, we use Word64 instead of final Integer let !ilen = V.length intPart !flen = V.length fracPart !base = if ilen + flen <= WORD64_SAFE_DIGITS_LEN then fromIntegral (decLoop @Word64 (decLoop @Word64 0 intPart) fracPart) else let i = decLoopIntegerFast intPart f = decLoopIntegerFast fracPart in i * (expt 10 flen) + f parseE base flen _ -> parseE (decLoopIntegerFast intPart) 0 -- intentionally lazy return here, we have done the grammar check, and h could potentially be very expensive, e.g. sciToDouble -- retained references are sign and sci, which are already in NF pure (if sign /= MINUS then h sci else h (negate sci)) where parseE !c !e = do me <- P.peekMaybe e' <- case me of Just ec | ec == LETTER_e || ec == LETTER_E -> P.skipWord8 *> int _ -> pure 0 pure $! Sci.scientific c (e' - e) foreign import ccall unsafe compute_float_64 :: Int64 -- ^ power of 10 -> Word64 -- ^ base -> Word8 -- ^ negative -> MBA# Word8 -- ^ success? -> IO Double -- ^ result