{-# LANGUAGE BangPatterns #-} {-# LANGUAGE BinaryLiterals #-} {-# LANGUAGE CPP #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE GADTSyntax #-} {-# LANGUAGE KindSignatures #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE MagicHash #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE PolyKinds #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE UnboxedSums #-} {-# LANGUAGE UnboxedTuples #-} {- | Parse input as though it were text encoded by ISO 8859-1 (Latin-1). All byte sequences are valid text under ISO 8859-1. -} module Data.Bytes.Parser.Latin ( -- * Matching -- ** Required char , char2 , char3 , char4 , char5 , char6 , char7 , char8 , char9 , char10 , char11 , char12 -- ** Try , trySatisfy , trySatisfyThen -- * One Character , any , opt , opt# -- * Many Characters , takeTrailedBy -- * Lookahead , peek , peek' -- * Skip , skipDigits , skipDigits1 , skipChar , skipChar1 , skipTrailedBy , skipUntil , skipWhile -- * End of Input , endOfInput , isEndOfInput -- * Numbers -- ** Decimal -- *** Unsigned , decWord , decWord8 , decWord16 , decWord32 , decWord64 -- *** Signed , decUnsignedInt , decUnsignedInt# , decSignedInt , decStandardInt , decTrailingInt , decTrailingInt# , decSignedInteger , decUnsignedInteger , decTrailingInteger -- ** Hexadecimal -- *** Variable Length , hexWord8 , hexWord16 , hexWord32 -- *** Fixed Length , hexFixedWord8 , hexFixedWord16 , hexFixedWord32 , hexFixedWord64 , hexFixedWord128 , hexFixedWord256 -- *** Digit , hexNibbleLower , tryHexNibbleLower , hexNibble , tryHexNibble ) where import Prelude hiding (any, fail, length, takeWhile) import Data.Bits ((.|.)) import Data.Bytes.Parser (bindFromLiftedToInt, endOfInput, isEndOfInput) import Data.Bytes.Parser.Internal (InternalStep (..), Parser (..), Result (..), Result#, ST#, boxBytes, indexLatinCharArray, uneffectful, uneffectful#, unfailing, upcastUnitSuccess) import Data.Bytes.Parser.Unsafe (cursor, expose, unconsume) import Data.Bytes.Types (Bytes (..)) import Data.Char (ord) import Data.Kind (Type) import Data.WideWord (Word128 (Word128), Word256 (Word256)) import Data.Word (Word8) import GHC.Exts (Char (C#), Char#, Int (I#), Int#, RuntimeRep, TYPE, Word#, gtWord#, indexCharArray#, int2Word#, ltWord#, notI#, or#, (+#), (-#)) import GHC.Word (Word (W#), Word16 (W16#), Word32 (W32#), Word64 (W64#), Word8 (W8#)) import qualified Data.Bytes as Bytes import qualified Data.Primitive as PM import qualified GHC.Exts as Exts {- | Runs the predicate on the next character in the input. If the predicate is matched, this consumes the character. Otherwise, the character is not consumed. This returns @False@ if the end of the input has been reached. This never fails. -} trySatisfy :: (Char -> Bool) -> Parser e s Bool trySatisfy f = uneffectful $ \chunk -> case length chunk of 0 -> Success False (offset chunk) (length chunk) _ -> case f (indexLatinCharArray (array chunk) (offset chunk)) of True -> Success True (offset chunk + 1) (length chunk - 1) False -> Success False (offset chunk) (length chunk) {- | Runs the function on the next character in the input. If the function returns @Just@, this consumes the character and then runs the parser on the remaining input. If the function returns @Nothing@, this does not consume the tested character, and it runs the default parser on the input (which includes the tested character). If there is no input remaining, this also runs the default parser. This combinator never fails. -} trySatisfyThen :: forall (r :: RuntimeRep) (e :: Type) (s :: Type) (a :: TYPE r). -- | Default parser. Runs on @Nothing@ or end of input. Parser e s a -> -- | Parser-selecting predicate (Char -> Maybe (Parser e s a)) -> Parser e s a {-# INLINE trySatisfyThen #-} trySatisfyThen (Parser g) f = Parser ( \input@(# arr, off0, len0 #) s0 -> case len0 of 0# -> g input s0 _ -> case f (C# (indexCharArray# arr off0)) of Nothing -> g input s0 Just (Parser p) -> p (# arr, off0 +# 1#, len0 -# 1# #) s0 ) {- | Consume the next character, failing if it does not match the expected value or if there is no more input. -} char :: e -> Char -> Parser e s () {-# INLINE char #-} char e !c = uneffectful $ \chunk -> if length chunk > 0 then if indexLatinCharArray (array chunk) (offset chunk) == c then Success () (offset chunk + 1) (length chunk - 1) else Failure e else Failure e {- | Consume the next two characters, failing if they do not match the expected values. > char2 e a b === char e a *> char e b -} char2 :: e -> Char -> Char -> Parser e s () {-# INLINE char2 #-} char2 e !c0 !c1 = uneffectful $ \chunk -> if | length chunk > 1 , indexLatinCharArray (array chunk) (offset chunk) == c0 , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 -> Success () (offset chunk + 2) (length chunk - 2) | otherwise -> Failure e {- | Consume three characters, failing if they do not match the expected values. > char3 e a b c === char e a *> char e b *> char e c -} char3 :: e -> Char -> Char -> Char -> Parser e s () {-# INLINE char3 #-} char3 e !c0 !c1 !c2 = uneffectful $ \chunk -> if | length chunk > 2 , indexLatinCharArray (array chunk) (offset chunk) == c0 , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 -> Success () (offset chunk + 3) (length chunk - 3) | otherwise -> Failure e {- | Consume four characters, failing if they do not match the expected values. > char4 e a b c d === char e a *> char e b *> char e c *> char e d -} char4 :: e -> Char -> Char -> Char -> Char -> Parser e s () {-# INLINE char4 #-} char4 e !c0 !c1 !c2 !c3 = uneffectful $ \chunk -> if | length chunk > 3 , indexLatinCharArray (array chunk) (offset chunk) == c0 , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 -> Success () (offset chunk + 4) (length chunk - 4) | otherwise -> Failure e {- | Consume five characters, failing if they do not match the expected values. -} char5 :: e -> Char -> Char -> Char -> Char -> Char -> Parser e s () {-# INLINE char5 #-} char5 e !c0 !c1 !c2 !c3 !c4 = uneffectful $ \chunk -> if | length chunk > 4 , indexLatinCharArray (array chunk) (offset chunk) == c0 , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 -> Success () (offset chunk + 5) (length chunk - 5) | otherwise -> Failure e {- | Consume six characters, failing if they do not match the expected values. -} char6 :: e -> Char -> Char -> Char -> Char -> Char -> Char -> Parser e s () {-# INLINE char6 #-} char6 e !c0 !c1 !c2 !c3 !c4 !c5 = uneffectful $ \chunk -> if | length chunk > 5 , indexLatinCharArray (array chunk) (offset chunk) == c0 , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 , indexLatinCharArray (array chunk) (offset chunk + 5) == c5 -> Success () (offset chunk + 6) (length chunk - 6) | otherwise -> Failure e {- | Consume seven characters, failing if they do not match the expected values. -} char7 :: e -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Parser e s () {-# INLINE char7 #-} char7 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 = uneffectful $ \chunk -> if | length chunk > 6 , indexLatinCharArray (array chunk) (offset chunk) == c0 , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 , indexLatinCharArray (array chunk) (offset chunk + 5) == c5 , indexLatinCharArray (array chunk) (offset chunk + 6) == c6 -> Success () (offset chunk + 7) (length chunk - 7) | otherwise -> Failure e {- | Consume eight characters, failing if they do not match the expected values. -} char8 :: e -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Parser e s () {-# INLINE char8 #-} char8 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 = uneffectful $ \chunk -> if | length chunk > 7 , indexLatinCharArray (array chunk) (offset chunk) == c0 , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 , indexLatinCharArray (array chunk) (offset chunk + 5) == c5 , indexLatinCharArray (array chunk) (offset chunk + 6) == c6 , indexLatinCharArray (array chunk) (offset chunk + 7) == c7 -> Success () (offset chunk + 8) (length chunk - 8) | otherwise -> Failure e {- | Consume nine characters, failing if they do not match the expected values. -} char9 :: e -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Parser e s () {-# INLINE char9 #-} char9 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 = uneffectful $ \chunk -> if | length chunk > 8 , indexLatinCharArray (array chunk) (offset chunk) == c0 , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 , indexLatinCharArray (array chunk) (offset chunk + 5) == c5 , indexLatinCharArray (array chunk) (offset chunk + 6) == c6 , indexLatinCharArray (array chunk) (offset chunk + 7) == c7 , indexLatinCharArray (array chunk) (offset chunk + 8) == c8 -> Success () (offset chunk + 9) (length chunk - 9) | otherwise -> Failure e {- | Consume ten characters, failing if they do not match the expected values. -} char10 :: e -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Parser e s () {-# INLINE char10 #-} char10 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 = uneffectful $ \chunk -> if | length chunk > 9 , indexLatinCharArray (array chunk) (offset chunk) == c0 , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 , indexLatinCharArray (array chunk) (offset chunk + 5) == c5 , indexLatinCharArray (array chunk) (offset chunk + 6) == c6 , indexLatinCharArray (array chunk) (offset chunk + 7) == c7 , indexLatinCharArray (array chunk) (offset chunk + 8) == c8 , indexLatinCharArray (array chunk) (offset chunk + 9) == c9 -> Success () (offset chunk + 10) (length chunk - 10) | otherwise -> Failure e {- | Consume eleven characters, failing if they do not match the expected values. -} char11 :: e -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Parser e s () {-# INLINE char11 #-} char11 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 !c10 = uneffectful $ \chunk -> if | length chunk > 10 , indexLatinCharArray (array chunk) (offset chunk) == c0 , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 , indexLatinCharArray (array chunk) (offset chunk + 5) == c5 , indexLatinCharArray (array chunk) (offset chunk + 6) == c6 , indexLatinCharArray (array chunk) (offset chunk + 7) == c7 , indexLatinCharArray (array chunk) (offset chunk + 8) == c8 , indexLatinCharArray (array chunk) (offset chunk + 9) == c9 , indexLatinCharArray (array chunk) (offset chunk + 10) == c10 -> Success () (offset chunk + 11) (length chunk - 11) | otherwise -> Failure e {- | Consume twelve characters, failing if they do not match the expected values. -} char12 :: e -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Parser e s () {-# INLINE char12 #-} char12 e !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 !c10 !c11 = uneffectful $ \chunk -> if | length chunk > 11 , indexLatinCharArray (array chunk) (offset chunk) == c0 , indexLatinCharArray (array chunk) (offset chunk + 1) == c1 , indexLatinCharArray (array chunk) (offset chunk + 2) == c2 , indexLatinCharArray (array chunk) (offset chunk + 3) == c3 , indexLatinCharArray (array chunk) (offset chunk + 4) == c4 , indexLatinCharArray (array chunk) (offset chunk + 5) == c5 , indexLatinCharArray (array chunk) (offset chunk + 6) == c6 , indexLatinCharArray (array chunk) (offset chunk + 7) == c7 , indexLatinCharArray (array chunk) (offset chunk + 8) == c8 , indexLatinCharArray (array chunk) (offset chunk + 9) == c9 , indexLatinCharArray (array chunk) (offset chunk + 10) == c10 , indexLatinCharArray (array chunk) (offset chunk + 11) == c11 -> Success () (offset chunk + 12) (length chunk - 12) | otherwise -> Failure e -- | Consumes and returns the next character in the input. any :: e -> Parser e s Char {-# INLINE any #-} any e = uneffectful $ \chunk -> if length chunk > 0 then let c = indexLatinCharArray (array chunk) (offset chunk) in Success c (offset chunk + 1) (length chunk - 1) else Failure e {- | Consume a character from the input or return @Nothing@ if end of the stream has been reached. Since ISO 8859-1 maps every bytes to a character, this parser never fails. -} opt :: Parser e s (Maybe Char) {-# INLINE opt #-} opt = uneffectful $ \chunk -> case length chunk of 0 -> Success Nothing (offset chunk) (length chunk) _ -> Success (Just (indexLatinCharArray (array chunk) (offset chunk))) (offset chunk + 1) (length chunk - 1) -- | Variant of @opt@ with unboxed result. opt# :: Parser e s (# (# #) | Char# #) {-# INLINE opt# #-} opt# = Parser ( \(# arr, off, len #) s0 -> case len of 0# -> (# s0, (# | (# (# (# #) | #), off, len #) #) #) _ -> (# s0, (# | (# (# | indexCharArray# arr off #), off +# 1#, len -# 1# #) #) #) ) skipDigitsAsciiLoop :: Bytes -> -- Chunk (# Int#, Int# #) skipDigitsAsciiLoop !c = if length c > 0 then let w = indexLatinCharArray (array c) (offset c) in if w >= '0' && w <= '9' then skipDigitsAsciiLoop (Bytes.unsafeDrop 1 c) else (# unI (offset c), unI (length c) #) else (# unI (offset c), unI (length c) #) skipDigitsAscii1LoopStart :: e -> Bytes -> -- chunk Result# e () skipDigitsAscii1LoopStart e !c = if length c > 0 then let w = indexLatinCharArray (array c) (offset c) in if w >= '0' && w <= '9' then upcastUnitSuccess (skipDigitsAsciiLoop (Bytes.unsafeDrop 1 c)) else (# e | #) else (# e | #) {- | Variant of 'skipDigits' that requires at least one digit to be present. -} skipDigits1 :: e -> Parser e s () {-# INLINE skipDigits1 #-} skipDigits1 e = uneffectful# $ \c -> skipDigitsAscii1LoopStart e c {- | Skip the characters @0-9@ until a non-digit is encountered. This parser does not fail. -} skipDigits :: Parser e s () skipDigits = uneffectful# $ \c -> upcastUnitSuccess (skipDigitsAsciiLoop c) unI :: Int -> Int# {-# INLINE unI #-} unI (I# w) = w {- | Skip the character any number of times. This succeeds even if the character was not present. -} skipChar :: Char -> Parser e s () {-# INLINE skipChar #-} skipChar !w = uneffectful# $ \c -> upcastUnitSuccess (skipLoop w c) {- | Skip the character any number of times. It must occur at least once or else this will fail. -} skipChar1 :: e -> Char -> Parser e s () {-# INLINE skipChar1 #-} skipChar1 e !w = uneffectful# $ \c -> skipLoop1Start e w c skipLoop :: Char -> -- byte to match Bytes -> -- Chunk (# Int#, Int# #) skipLoop !w !c = if length c > 0 then if indexLatinCharArray (array c) (offset c) == w then skipLoop w (Bytes.unsafeDrop 1 c) else (# unI (offset c), unI (length c) #) else (# unI (offset c), unI (length c) #) skipLoop1Start :: e -> Char -> -- byte to match Bytes -> -- chunk Result# e () skipLoop1Start e !w !chunk0 = if length chunk0 > 0 then if indexLatinCharArray (array chunk0) (offset chunk0) == w then upcastUnitSuccess (skipLoop w (Bytes.unsafeDrop 1 chunk0)) else (# e | #) else (# e | #) {- | Parse a decimal-encoded 8-bit word. If the number is larger than 255, this parser fails. -} decWord8 :: e -> Parser e s Word8 decWord8 e = Parser ( \chunk0 s0 -> case decSmallWordStart e 256 (boxBytes chunk0) s0 of (# s1, r #) -> (# s1, upcastWord8Result r #) ) {- | Parse a hexadecimal-encoded 8-bit word. If the number is larger than 255, this parser fails. This allows leading zeroes and is insensitive to case. For example, @00A@, @0a@ and @A@ would all be accepted as the same number. -} hexWord8 :: e -> Parser e s Word8 hexWord8 e = Parser ( \chunk0 s0 -> case hexSmallWordStart e 256 (boxBytes chunk0) s0 of (# s1, r #) -> (# s1, upcastWord8Result r #) ) {- | Parse a hexadecimal-encoded 16-bit word. If the number is larger than 65535, this parser fails. This allows leading zeroes and is insensitive to case. For example, @0100a@ and @100A@ would both be accepted as the same number. -} hexWord16 :: e -> Parser e s Word16 hexWord16 e = Parser ( \chunk0 s0 -> case hexSmallWordStart e 65536 (boxBytes chunk0) s0 of (# s1, r #) -> (# s1, upcastWord16Result r #) ) hexWord32 :: e -> Parser e s Word32 hexWord32 e = Parser ( \chunk0 s0 -> case hexSmallWordStart e 4294967296 (boxBytes chunk0) s0 of (# s1, r #) -> (# s1, upcastWord32Result r #) ) {- | Parse a decimal-encoded 16-bit word. If the number is larger than 65535, this parser fails. -} decWord16 :: e -> Parser e s Word16 decWord16 e = Parser ( \chunk0 s0 -> case decSmallWordStart e 65536 (boxBytes chunk0) s0 of (# s1, r #) -> (# s1, upcastWord16Result r #) ) {- | Parse a decimal-encoded 32-bit word. If the number is larger than 4294967295, this parser fails. -} decWord32 :: e -> Parser e s Word32 -- This will not work on 32-bit platforms. decWord32 e = Parser ( \chunk0 s0 -> case decSmallWordStart e 4294967296 (boxBytes chunk0) s0 of (# s1, r #) -> (# s1, upcastWord32Result r #) ) {- | Parse a decimal-encoded number. If the number is too large to be represented by a machine word, this fails with the provided error message. This accepts any number of leading zeroes. -} decWord :: e -> Parser e s Word decWord e = Parser ( \chunk0 s0 -> case decWordStart e (boxBytes chunk0) s0 of (# s1, r #) -> (# s1, upcastWordResult r #) ) {- | Parse a decimal-encoded unsigned number. If the number is too large to be represented by a 64-bit word, this fails with the provided error message. This accepts any number of leading zeroes. -} decWord64 :: e -> Parser e s Word64 decWord64 e = Parser ( \chunk0 s0 -> case decWordStart e (boxBytes chunk0) s0 of (# s1, r #) -> (# s1, upcastWord64Result r #) ) hexSmallWordStart :: e -> -- Error message Word -> -- Upper Bound Bytes -> -- Chunk ST# s (Result# e Word#) hexSmallWordStart e !limit !chunk0 s0 = if length chunk0 > 0 then case oneHexMaybe (PM.indexByteArray (array chunk0) (offset chunk0)) of Nothing -> (# s0, (# e | #) #) Just w -> (# s0, hexSmallWordMore e w limit (Bytes.unsafeDrop 1 chunk0) #) else (# s0, (# e | #) #) decSmallWordStart :: e -> -- Error message Word -> -- Upper Bound Bytes -> -- Chunk ST# s (Result# e Word#) decSmallWordStart e !limit !chunk0 s0 = if length chunk0 > 0 then let !w = fromIntegral @Word8 @Word (PM.indexByteArray (array chunk0) (offset chunk0)) - 48 in if w < 10 then (# s0, decSmallWordMore e w limit (Bytes.unsafeDrop 1 chunk0) #) else (# s0, (# e | #) #) else (# s0, (# e | #) #) -- This will not inline since it is recursive, but worker -- wrapper will still happen. decWordMore :: e -> -- Error message Word -> -- Accumulator Bytes -> -- Chunk Result# e Word# decWordMore e !acc !chunk0 = case len of 0 -> (# | (# unW (fromIntegral acc), unI (offset chunk0), 0# #) #) _ -> let !w = fromIntegral @Word8 @Word (PM.indexByteArray (array chunk0) (offset chunk0)) - 48 in if w < 10 then let (overflow, acc') = unsignedPushBase10 acc w in if overflow then (# e | #) else decWordMore e acc' (Bytes.unsafeDrop 1 chunk0) else (# | (# unW (fromIntegral acc), unI (offset chunk0), len# #) #) where !len@(I# len#) = length chunk0 upcastWordResult :: Result# e Word# -> Result# e Word {-# INLINE upcastWordResult #-} upcastWordResult (# e | #) = (# e | #) upcastWordResult (# | (# a, b, c #) #) = (# | (# W# a, b, c #) #) {- FOURMOLU_DISABLE -} -- This only works on 64-bit platforms. upcastWord64Result :: Result# e Word# -> Result# e Word64 {-# inline upcastWord64Result #-} upcastWord64Result (# e | #) = (# e | #) upcastWord64Result (# | (# a, b, c #) #) = (# | (# W64# ( #if MIN_VERSION_base(4,17,0) Exts.wordToWord64# a #else a #endif ), b, c #) #) {- FOURMOLU_ENABLE -} hexSmallWordMore :: e -> -- Error message Word -> -- Accumulator Word -> -- Upper Bound Bytes -> -- Chunk Result# e Word# hexSmallWordMore e !acc !limit !chunk0 = if length chunk0 > 0 then case oneHexMaybe (PM.indexByteArray (array chunk0) (offset chunk0)) of Nothing -> (# | (# unW acc, unI (offset chunk0), unI (length chunk0) #) #) Just w -> let w' = acc * 16 + w in if w' < limit then hexSmallWordMore e w' limit (Bytes.unsafeDrop 1 chunk0) else (# e | #) else (# | (# unW acc, unI (offset chunk0), 0# #) #) decSmallWordMore :: e -> -- Error message Word -> -- Accumulator Word -> -- Upper Bound Bytes -> -- Chunk Result# e Word# decSmallWordMore e !acc !limit !chunk0 = if length chunk0 > 0 then let !w = fromIntegral @Word8 @Word (PM.indexByteArray (array chunk0) (offset chunk0)) - 48 in if w < 10 then let w' = acc * 10 + w in if w' < limit then decSmallWordMore e w' limit (Bytes.unsafeDrop 1 chunk0) else (# e | #) else (# | (# unW acc, unI (offset chunk0), unI (length chunk0) #) #) else (# | (# unW acc, unI (offset chunk0), 0# #) #) unW :: Word -> Word# {-# INLINE unW #-} unW (W# w) = w decWordStart :: e -> -- Error message Bytes -> -- Chunk ST# s (Result# e Word#) decWordStart e !chunk0 s0 = if length chunk0 > 0 then let !w = fromIntegral @Word8 @Word (PM.indexByteArray (array chunk0) (offset chunk0)) - 48 in if w < 10 then (# s0, decWordMore e w (Bytes.unsafeDrop 1 chunk0) #) else (# s0, (# e | #) #) else (# s0, (# e | #) #) {- FOURMOLU_DISABLE -} -- Precondition: the word is small enough upcastWord16Result :: Result# e Word# -> Result# e Word16 {-# inline upcastWord16Result #-} upcastWord16Result (# e | #) = (# e | #) upcastWord16Result (# | (# a, b, c #) #) = (# | (# W16# ( #if MIN_VERSION_base(4,16,0) Exts.wordToWord16# #endif a), b, c #) #) -- Precondition: the word is small enough upcastWord32Result :: Result# e Word# -> Result# e Word32 {-# inline upcastWord32Result #-} upcastWord32Result (# e | #) = (# e | #) upcastWord32Result (# | (# a, b, c #) #) = (# | (# W32# ( #if MIN_VERSION_base(4,16,0) Exts.wordToWord32# #endif a), b, c #) #) -- Precondition: the word is small enough upcastWord8Result :: Result# e Word# -> Result# e Word8 {-# inline upcastWord8Result #-} upcastWord8Result (# e | #) = (# e | #) upcastWord8Result (# | (# a, b, c #) #) = (# | (# W8# ( #if MIN_VERSION_base(4,16,0) Exts.wordToWord8# #endif a), b, c #) #) {- FOURMOLU_ENABLE -} {- | Parse a decimal-encoded number. If the number is too large to be represented by a machine integer, this fails with the provided error message. This rejects input with that is preceeded by plus or minus. Consequently, it does not parse negative numbers. Use 'decStandardInt' or 'decSignedInt' for that purpose. On a 64-bit platform 'decWord' will successfully parse 9223372036854775808 (i.e. @2 ^ 63@), but 'decUnsignedInt' will fail. This parser allows leading zeroes. -} decUnsignedInt :: e -> Parser e s Int decUnsignedInt e = Parser ( \chunk0 s0 -> case decPosIntStart e (boxBytes chunk0) s0 of (# s1, r #) -> (# s1, upcastIntResult r #) ) -- | Variant of 'decUnsignedInt' with an unboxed result. decUnsignedInt# :: e -> Parser e s Int# decUnsignedInt# e = Parser (\chunk0 s0 -> decPosIntStart e (boxBytes chunk0) s0) {- | Parse a decimal-encoded number. If the number is too large to be represented by a machine integer, this fails with the provided error message. This allows the number to optionally be prefixed by plus or minus. If the sign prefix is not present, the number is interpreted as positive. This allows leading zeroes. -} decSignedInt :: e -> Parser e s Int decSignedInt e = Parser ( \chunk0 s0 -> case runParser (decSignedInt# e) chunk0 s0 of (# s1, r #) -> (# s1, upcastIntResult r #) ) {- | Variant of 'decUnsignedInt' that lets the caller supply a leading digit. This is useful when parsing formats like JSON where integers with leading zeroes are considered invalid. The calling context must consume the first digit before calling this parser. Results are always positive numbers. -} decTrailingInt :: -- | Error message e -> -- | Leading digit, should be between @0@ and @9@. Int -> Parser e s Int decTrailingInt e (I# w) = Parser ( \chunk0 s0 -> case runParser (decTrailingInt# e w) chunk0 s0 of (# s1, r #) -> (# s1, upcastIntResult r #) ) decTrailingInt# :: e -> -- Error message Int# -> -- Leading digit, should be between @0@ and @9@. Parser e s Int# decTrailingInt# e !w = Parser (\chunk0 s0 -> (# s0, decPosIntMore e (W# (int2Word# w)) maxIntAsWord (boxBytes chunk0) #)) maxIntAsWord :: Word maxIntAsWord = fromIntegral (maxBound :: Int) {- | Parse a decimal-encoded number. If the number is too large to be represented by a machine integer, this fails with the provided error message. This allows the number to optionally be prefixed by minus. If the minus prefix is not present, the number is interpreted as positive. The disallows a leading plus sign. For example, 'decStandardInt' rejects @+42@, but 'decSignedInt' allows it. -} decStandardInt :: e -> Parser e s Int decStandardInt e = Parser ( \chunk0 s0 -> case runParser (decStandardInt# e) chunk0 s0 of (# s1, r #) -> (# s1, upcastIntResult r #) ) decSignedInt# :: e -> Parser e s Int# {-# NOINLINE decSignedInt# #-} decSignedInt# e = any e `bindFromLiftedToInt` \c -> case c of '+' -> Parser -- plus sign (\chunk0 s0 -> decPosIntStart e (boxBytes chunk0) s0) '-' -> Parser -- minus sign (\chunk0 s0 -> decNegIntStart e (boxBytes chunk0) s0) _ -> Parser -- no sign, there should be a digit here ( \chunk0 s0 -> let !w = char2Word c - 48 in if w < 10 then (# s0, decPosIntMore e w maxIntAsWord (boxBytes chunk0) #) else (# s0, (# e | #) #) ) -- This is the same as decSignedInt except that we disallow -- a leading plus sign. decStandardInt# :: e -> Parser e s Int# {-# NOINLINE decStandardInt# #-} decStandardInt# e = any e `bindFromLiftedToInt` \c -> case c of '-' -> Parser -- minus sign (\chunk0 s0 -> decNegIntStart e (boxBytes chunk0) s0) _ -> Parser -- no sign, there should be a digit here ( \chunk0 s0 -> let !w = char2Word c - 48 in if w < 10 then (# s0, decPosIntMore e w maxIntAsWord (boxBytes chunk0) #) else (# s0, (# e | #) #) ) {- | Variant of 'decUnsignedInteger' that lets the caller supply a leading digit. This is useful when parsing formats like JSON where integers with leading zeroes are considered invalid. The calling context must consume the first digit before calling this parser. Results are always positive numbers. -} decTrailingInteger :: -- | Leading digit, should be between @0@ and @9@. Int -> Parser e s Integer decTrailingInteger (I# w) = Parser (\chunk0 s0 -> (# s0, (# | decIntegerChunks (I# w) 10 0 (boxBytes chunk0) #) #)) {- | Parse a decimal-encoded positive integer of arbitrary size. This rejects input that begins with a plus or minus sign. -} decUnsignedInteger :: e -> Parser e s Integer decUnsignedInteger e = Parser (\chunk0 s0 -> decUnsignedIntegerStart e (boxBytes chunk0) s0) {- | Parse a decimal-encoded integer of arbitrary size. This accepts input that begins with a plus or minus sign. Input without a sign prefix is interpreted as positive. -} decSignedInteger :: e -> Parser e s Integer {-# NOINLINE decSignedInteger #-} decSignedInteger e = any e >>= \c -> case c of '+' -> do decUnsignedInteger e '-' -> do x <- decUnsignedInteger e pure $! negate x _ -> Parser -- no sign, there should be a digit here ( \chunk0 s0 -> let !w = char2Word c - 48 in if w < 10 then let !r = decIntegerChunks (fromIntegral @Word @Int w) 10 0 (boxBytes chunk0) in (# s0, (# | r #) #) else (# s0, (# e | #) #) ) decPosIntStart :: e -> -- Error message Bytes -> -- Chunk ST# s (Result# e Int#) decPosIntStart e !chunk0 s0 = if length chunk0 > 0 then let !w = fromIntegral @Word8 @Word (PM.indexByteArray (array chunk0) (offset chunk0)) - 48 in if w < 10 then (# s0, decPosIntMore e w maxIntAsWord (Bytes.unsafeDrop 1 chunk0) #) else (# s0, (# e | #) #) else (# s0, (# e | #) #) decNegIntStart :: e -> -- Error message Bytes -> -- Chunk ST# s (Result# e Int#) decNegIntStart e !chunk0 s0 = if length chunk0 > 0 then let !w = fromIntegral @Word8 @Word (PM.indexByteArray (array chunk0) (offset chunk0)) - 48 in if w < 10 then case decPosIntMore e w (maxIntAsWord + 1) (Bytes.unsafeDrop 1 chunk0) of (# | (# x, y, z #) #) -> (# s0, (# | (# (notI# x +# 1#), y, z #) #) #) (# err | #) -> (# s0, (# err | #) #) else (# s0, (# e | #) #) else (# s0, (# e | #) #) decUnsignedIntegerStart :: e -> Bytes -> ST# s (Result# e Integer) decUnsignedIntegerStart e !chunk0 s0 = if length chunk0 > 0 then let !w = (PM.indexByteArray (array chunk0) (offset chunk0)) - 48 in if w < (10 :: Word8) then let !r = decIntegerChunks (fromIntegral @Word8 @Int w) 10 0 (Bytes.unsafeDrop 1 chunk0) in (# s0, (# | r #) #) else (# s0, (# e | #) #) else (# s0, (# e | #) #) -- This will not inline since it is recursive, but worker -- wrapper will still happen. Fails if the accumulator -- exceeds the upper bound. decPosIntMore :: e -> -- Error message Word -> -- Accumulator, precondition: less than or equal to bound Word -> -- Inclusive Upper Bound, either (2^63 - 1) or 2^63 Bytes -> -- Chunk Result# e Int# decPosIntMore e !acc !upper !chunk0 = if len > 0 then let !w = fromIntegral @Word8 @Word (PM.indexByteArray (array chunk0) (offset chunk0)) - 48 in if w < 10 then let (overflow, acc') = positivePushBase10 acc w upper in if overflow then (# e | #) else decPosIntMore e acc' upper (Bytes.unsafeDrop 1 chunk0) else (# | (# unI (fromIntegral acc), unI (offset chunk0), len# #) #) else (# | (# unI (fromIntegral acc), unI (offset chunk0), 0# #) #) where !len@(I# len#) = length chunk0 -- This will not inline since it is recursive, but worker -- wrapper will still happen. When the accumulator -- exceeds the size of a machine integer, this pushes the -- accumulated machine int and the shift amount onto the -- stack. -- We are intentionally lazy in the accumulator. There is -- no need to force this on every iteration. We do however, -- force it preemptively every time it changes. -- Because of how we track overflow, we are able to use the -- same function for both positive and negative numbers. decIntegerChunks :: Int -> -- Chunk accumulator (e.g. 236) Int -> -- Chunk base-ten bound (e.g. 1000) Integer -> -- Accumulator Bytes -> -- Chunk (# Integer, Int#, Int# #) decIntegerChunks !nAcc !eAcc acc !chunk0 = if len > 0 then let !w = fromIntegral @Word8 @Word (PM.indexByteArray (array chunk0) (offset chunk0)) - 48 in if w < 10 then let !eAcc' = eAcc * 10 in if eAcc' >= eAcc then decIntegerChunks (nAcc * 10 + fromIntegral @Word @Int w) eAcc' acc (Bytes.unsafeDrop 1 chunk0) else -- In this case, notice that we deliberately -- unconsume the digit that would have caused -- an overflow. let !r = (acc * fromIntegral @Int @Integer eAcc) + (fromIntegral @Int @Integer nAcc) in decIntegerChunks 0 1 r chunk0 else let !r = (acc * fromIntegral @Int @Integer eAcc) + (fromIntegral @Int @Integer nAcc) in (# r, unI (offset chunk0), len# #) else let !r = (acc * fromIntegral @Int @Integer eAcc) + (fromIntegral @Int @Integer nAcc) in (# r, unI (offset chunk0), 0# #) where !len@(I# len#) = length chunk0 upcastIntResult :: Result# e Int# -> Result# e Int upcastIntResult (# e | #) = (# e | #) upcastIntResult (# | (# a, b, c #) #) = (# | (# I# a, b, c #) #) char2Word :: Char -> Word char2Word = fromIntegral . ord {- | Take characters until the specified character is encountered. Consumes the matched character as well. Fails if the character is not present. Visually, the cursor advancement and resulting @Bytes@ for @takeTrailedBy \'D\'@ look like this: > A B C D E F | input > |->->->-| | cursor > {\-*-*-\} | result bytes -} takeTrailedBy :: e -> Char -> Parser e s Bytes takeTrailedBy e !w = do !start <- cursor skipTrailedBy e w !end <- cursor !arr <- expose pure (Bytes arr start (end - (start + 1))) {- | Skip all characters until the terminator is encountered and then consume the matching character as well. Visually, @skipTrailedBy \'C\'@ advances the cursor like this: > A Z B Y C X C W > |->->->->-| This fails if it reaches the end of input without encountering the character. -} skipTrailedBy :: e -> Char -> Parser e s () skipTrailedBy e !w = uneffectful# $ \c -> skipUntilConsumeLoop e w c {- | Skip all characters until the terminator is encountered. This does not consume the terminator. Visually, @skipUntil \'C\'@ advances the cursor like this: > A Z B Y C X C W > |->->->-| This succeeds if it reaches the end of the input without encountering the terminator. It never fails. -} skipUntil :: Char -> Parser e s () skipUntil !w = uneffectful# $ \c -> skipUntilLoop w c skipUntilLoop :: Char -> -- byte to match Bytes -> -- Chunk Result# e () skipUntilLoop !w !c = case length c of 0 -> (# | (# (), unI (offset c), 0# #) #) _ -> if indexLatinCharArray (array c) (offset c) /= w then skipUntilLoop w (Bytes.unsafeDrop 1 c) else (# | (# (), unI (offset c), unI (length c) #) #) skipUntilConsumeLoop :: e -> -- Error message Char -> -- byte to match Bytes -> -- Chunk Result# e () skipUntilConsumeLoop e !w !c = case length c of 0 -> (# e | #) _ -> if indexLatinCharArray (array c) (offset c) /= w then skipUntilConsumeLoop e w (Bytes.unsafeDrop 1 c) else (# | (# (), unI (offset c + 1), unI (length c - 1) #) #) {- FOURMOLU_DISABLE -} -- | Parse exactly eight ASCII-encoded characters, interpreting them as the -- hexadecimal encoding of a 32-bit number. Note that this rejects a sequence -- such as @BC5A9@, requiring @000BC5A9@ instead. This is insensitive to case. hexFixedWord32 :: e -> Parser e s Word32 {-# inline hexFixedWord32 #-} hexFixedWord32 e = Parser (\x s0 -> case runParser (hexFixedWord32# e) x s0 of (# s1, r #) -> case r of (# err | #) -> (# s1, (# err | #) #) (# | (# a, b, c #) #) -> (# s1, (# | (# W32# ( #if MIN_VERSION_base(4,16,0) Exts.wordToWord32# #endif a), b, c #) #) #) ) {- FOURMOLU_ENABLE -} hexFixedWord32# :: e -> Parser e s Word# {-# NOINLINE hexFixedWord32# #-} hexFixedWord32# e = uneffectfulWord# $ \chunk -> if length chunk >= 8 then let !w0@(W# n0) = oneHex $ PM.indexByteArray (array chunk) (offset chunk) !w1@(W# n1) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 1) !w2@(W# n2) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 2) !w3@(W# n3) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 3) !w4@(W# n4) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 4) !w5@(W# n5) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 5) !w6@(W# n6) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 6) !w7@(W# n7) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 7) in if | w0 .|. w1 .|. w2 .|. w3 .|. w4 .|. w5 .|. w6 .|. w7 /= maxBound -> (# | (# (n0 `Exts.timesWord#` 268435456##) `Exts.plusWord#` (n1 `Exts.timesWord#` 16777216##) `Exts.plusWord#` (n2 `Exts.timesWord#` 1048576##) `Exts.plusWord#` (n3 `Exts.timesWord#` 65536##) `Exts.plusWord#` (n4 `Exts.timesWord#` 4096##) `Exts.plusWord#` (n5 `Exts.timesWord#` 256##) `Exts.plusWord#` (n6 `Exts.timesWord#` 16##) `Exts.plusWord#` n7 , unI (offset chunk) +# 8# , unI (length chunk) -# 8# #) #) | otherwise -> (# e | #) else (# e | #) {- FOURMOLU_DISABLE -} -- | Parse exactly 16 ASCII-encoded characters, interpreting them as the -- hexadecimal encoding of a 64-bit number. Note that this rejects a sequence -- such as @BC5A9@, requiring @00000000000BC5A9@ instead. This is insensitive -- to case. hexFixedWord64 :: e -> Parser e s Word64 {-# inline hexFixedWord64 #-} hexFixedWord64 e = Parser (\x s0 -> case runParser (hexFixedWord64# e) x s0 of (# s1, r #) -> case r of (# err | #) -> (# s1, (# err | #) #) (# | (# a, b, c #) #) -> (# s1, (# | (# W64# ( #if MIN_VERSION_base(4,17,0) Exts.wordToWord64# a #else a #endif ), b, c #) #) #) ) {- FOURMOLU_ENABLE -} hexFixedWord128 :: e -> Parser e s Word128 hexFixedWord128 e = Word128 <$> hexFixedWord64 e <*> hexFixedWord64 e hexFixedWord256 :: e -> Parser e s Word256 hexFixedWord256 e = Word256 <$> hexFixedWord64 e <*> hexFixedWord64 e <*> hexFixedWord64 e <*> hexFixedWord64 e hexFixedWord64# :: e -> Parser e s Word# {-# NOINLINE hexFixedWord64# #-} hexFixedWord64# e = uneffectfulWord# $ \chunk -> if length chunk >= 16 then let go !off !len !acc = case len of 0 -> case acc of W# r -> (# | (# r , unI off , unI (length chunk) -# 16# #) #) _ -> case oneHexMaybe (PM.indexByteArray (array chunk) off) of Nothing -> (# e | #) Just w -> go (off + 1) (len - 1) ((acc * 16) + w) in go (offset chunk) (16 :: Int) (0 :: Word) else (# e | #) {- FOURMOLU_DISABLE -} -- | Parse exactly four ASCII-encoded characters, interpreting -- them as the hexadecimal encoding of a 16-bit number. Note that -- this rejects a sequence such as @5A9@, requiring @05A9@ instead. -- This is insensitive to case. This is particularly useful when -- parsing escape sequences in C or JSON, which allow encoding -- characters in the Basic Multilingual Plane as @\\uhhhh@. hexFixedWord16 :: e -> Parser e s Word16 {-# inline hexFixedWord16 #-} hexFixedWord16 e = Parser (\x s0 -> case runParser (hexFixedWord16# e) x s0 of (# s1, r #) -> case r of (# err | #) -> (# s1, (# err | #) #) (# | (# a, b, c #) #) -> (# s1, (# | (# W16# ( #if MIN_VERSION_base(4,16,0) Exts.wordToWord16# #endif a), b, c #) #) #) ) hexFixedWord16# :: e -> Parser e s Word# {-# noinline hexFixedWord16# #-} hexFixedWord16# e = uneffectfulWord# $ \chunk -> if length chunk >= 4 then let !w0@(W# n0) = oneHex $ PM.indexByteArray (array chunk) (offset chunk) !w1@(W# n1) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 1) !w2@(W# n2) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 2) !w3@(W# n3) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 3) in if | w0 .|. w1 .|. w2 .|. w3 /= maxBound -> (# | (# (n0 `Exts.timesWord#` 4096##) `Exts.plusWord#` (n1 `Exts.timesWord#` 256##) `Exts.plusWord#` (n2 `Exts.timesWord#` 16##) `Exts.plusWord#` n3 , unI (offset chunk) +# 4# , unI (length chunk) -# 4# #) #) | otherwise -> (# e | #) else (# e | #) -- | Parse exactly two ASCII-encoded characters, interpretting -- them as the hexadecimal encoding of a 8-bit number. Note that -- this rejects a sequence such as @A@, requiring @0A@ instead. -- This is insensitive to case. hexFixedWord8 :: e -> Parser e s Word8 {-# inline hexFixedWord8 #-} hexFixedWord8 e = Parser (\x s0 -> case runParser (hexFixedWord8# e) x s0 of (# s1, r #) -> case r of (# err | #) -> (# s1, (# err | #) #) (# | (# a, b, c #) #) -> (# s1, (# | (# W8# ( #if MIN_VERSION_base(4,16,0) Exts.wordToWord8# #endif a), b, c #) #) #) ) {- FOURMOLU_ENABLE -} hexFixedWord8# :: e -> Parser e s Word# {-# NOINLINE hexFixedWord8# #-} hexFixedWord8# e = uneffectfulWord# $ \chunk -> if length chunk >= 2 then let !w0@(W# n0) = oneHex $ PM.indexByteArray (array chunk) (offset chunk) !w1@(W# n1) = oneHex $ PM.indexByteArray (array chunk) (offset chunk + 1) in if | w0 .|. w1 /= maxBound -> (# | (# (n0 `Exts.timesWord#` 16##) `Exts.plusWord#` n1 , unI (offset chunk) +# 2# , unI (length chunk) -# 2# #) #) | otherwise -> (# e | #) else (# e | #) {- | Consume a single character that is the lowercase hexadecimal encoding of a 4-bit word. Fails if the character is not in the class @[a-f0-9]@. -} hexNibbleLower :: e -> Parser e s Word hexNibbleLower e = uneffectful $ \chunk -> case length chunk of 0 -> Failure e _ -> let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8 in if | w >= 48 && w < 58 -> Success (fromIntegral w - 48) (offset chunk + 1) (length chunk - 1) | w >= 97 && w < 103 -> Success (fromIntegral w - 87) (offset chunk + 1) (length chunk - 1) | otherwise -> Failure e {- | Consume a single character that is the case-insensitive hexadecimal encoding of a 4-bit word. Fails if the character is not in the class @[a-fA-F0-9]@. -} hexNibble :: e -> Parser e s Word hexNibble e = uneffectful $ \chunk -> case length chunk of 0 -> Failure e _ -> let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8 in if | w >= 48 && w < 58 -> Success (fromIntegral w - 48) (offset chunk + 1) (length chunk - 1) | w >= 65 && w < 71 -> Success (fromIntegral w - 55) (offset chunk + 1) (length chunk - 1) | w >= 97 && w < 103 -> Success (fromIntegral w - 87) (offset chunk + 1) (length chunk - 1) | otherwise -> Failure e {- | Consume a single character that is the lowercase hexadecimal encoding of a 4-bit word. Returns @Nothing@ without consuming the character if it is not in the class @[a-f0-9]@. The parser never fails. -} tryHexNibbleLower :: Parser e s (Maybe Word) tryHexNibbleLower = unfailing $ \chunk -> case length chunk of 0 -> InternalStep Nothing (offset chunk) (length chunk) _ -> let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8 in if | w >= 48 && w < 58 -> InternalStep (Just (fromIntegral w - 48)) (offset chunk + 1) (length chunk - 1) | w >= 97 && w < 103 -> InternalStep (Just (fromIntegral w - 87)) (offset chunk + 1) (length chunk - 1) | otherwise -> InternalStep Nothing (offset chunk) (length chunk) {- | Consume a single character that is the case-insensitive hexadecimal encoding of a 4-bit word. Returns @Nothing@ without consuming the character if it is not in the class @[a-fA-F0-9]@. This parser never fails. -} tryHexNibble :: Parser e s (Maybe Word) tryHexNibble = unfailing $ \chunk -> case length chunk of 0 -> InternalStep Nothing (offset chunk) (length chunk) _ -> let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8 in if | w >= 48 && w < 58 -> InternalStep (Just (fromIntegral w - 48)) (offset chunk + 1) (length chunk - 1) | w >= 65 && w < 71 -> InternalStep (Just (fromIntegral w - 55)) (offset chunk + 1) (length chunk - 1) | w >= 97 && w < 103 -> InternalStep (Just (fromIntegral w - 87)) (offset chunk + 1) (length chunk - 1) | otherwise -> InternalStep Nothing (offset chunk) (length chunk) -- Returns the maximum machine word if the argument is not -- the ASCII encoding of a hexadecimal digit. oneHex :: Word8 -> Word {-# INLINE oneHex #-} oneHex w | w >= 48 && w < 58 = (fromIntegral w - 48) | w >= 65 && w < 71 = (fromIntegral w - 55) | w >= 97 && w < 103 = (fromIntegral w - 87) | otherwise = maxBound oneHexMaybe :: Word8 -> Maybe Word {-# INLINE oneHexMaybe #-} oneHexMaybe w | w >= 48 && w < 58 = Just (fromIntegral w - 48) | w >= 65 && w < 71 = Just (fromIntegral w - 55) | w >= 97 && w < 103 = Just (fromIntegral w - 87) | otherwise = Nothing uneffectfulWord# :: (Bytes -> Result# e Word#) -> Parser e s Word# {-# INLINE uneffectfulWord# #-} uneffectfulWord# f = Parser (\b s0 -> (# s0, (f (boxBytes b)) #)) -- Precondition: the arguments are non-negative. Boolean is -- true when overflow happens. Performs: a * 10 + b -- Postcondition: when overflow is false, the resulting -- word is less than or equal to the upper bound positivePushBase10 :: Word -> Word -> Word -> (Bool, Word) {-# INLINE positivePushBase10 #-} positivePushBase10 (W# a) (W# b) (W# upper) = let !(# ca, r0 #) = Exts.timesWord2# a 10## !r1 = Exts.plusWord# r0 b !cb = int2Word# (gtWord# r1 upper) !cc = int2Word# (ltWord# r1 0##) !c = ca `or#` cb `or#` cc in (case c of 0## -> False; _ -> True, W# r1) unsignedPushBase10 :: Word -> Word -> (Bool, Word) {-# INLINE unsignedPushBase10 #-} unsignedPushBase10 (W# a) (W# b) = let !(# ca, r0 #) = Exts.timesWord2# a 10## !r1 = Exts.plusWord# r0 b !cb = int2Word# (ltWord# r1 r0) !c = ca `or#` cb in (case c of 0## -> False; _ -> True, W# r1) -- | Skip while the predicate is matched. This is always inlined. skipWhile :: (Char -> Bool) -> Parser e s () {-# INLINE skipWhile #-} skipWhile f = go where go = isEndOfInput >>= \case True -> pure () False -> do w <- anyUnsafe if f w then go else unconsume 1 -- Interpret the next byte as an Latin1-encoded character. -- Does not check to see if any characters are left. This -- is not exported. anyUnsafe :: Parser e s Char {-# INLINE anyUnsafe #-} anyUnsafe = uneffectful $ \chunk -> let w = indexCharArray (array chunk) (offset chunk) :: Char in Success w (offset chunk + 1) (length chunk - 1) -- Reads one byte and interprets it as Latin1-encoded character. indexCharArray :: PM.ByteArray -> Int -> Char {-# INLINE indexCharArray #-} indexCharArray (PM.ByteArray x) (I# i) = C# (indexCharArray# x i) {- | Match any character, to perform lookahead. Returns 'Nothing' if end of input has been reached. Does not consume any input. /Note/: Because this parser does not fail, do not use it with combinators such as 'many', because such as 'many', because such parsers loop until a failure occurs. Careless use will thus result in an infinite loop. -} peek :: Parser e s (Maybe Char) {-# INLINE peek #-} peek = uneffectful $ \(Bytes arr off len) -> let v = if len > 0 then Just (indexCharArray arr off) else Nothing in Success v off len {- | Match any byte, to perform lookahead. Does not consume any input, but will fail if end of input has been reached. -} peek' :: e -> Parser e s Char {-# INLINE peek' #-} peek' e = uneffectful $ \(Bytes arr off len) -> if len > 0 then Success (indexCharArray arr off) off len else Failure e