{-# language BangPatterns #-} {-# language BinaryLiterals #-} {-# language DataKinds #-} {-# language DeriveFunctor #-} {-# language DerivingStrategies #-} {-# language GADTSyntax #-} {-# language KindSignatures #-} {-# language LambdaCase #-} {-# language MagicHash #-} {-# language MultiWayIf #-} {-# language PolyKinds #-} {-# language RankNTypes #-} {-# language ScopedTypeVariables #-} {-# language StandaloneDeriving #-} {-# language TypeApplications #-} {-# language UnboxedSums #-} {-# language UnboxedTuples #-} -- | Parse token sequences. module Data.Parser ( Parser , Result(..) , Slice(..) , parseSmallArray , parseSmallArrayEffectfully -- * Primitives , any , opt , peek , token , effect , fail , trySatisfy -- * Control Flow , foldSepBy1 , sepBy1 , skipWhile ) where import Prelude hiding (length,any,fail) import Data.Bool (bool) import Data.Primitive (SmallArray(..)) import Data.Bytes.Parser (Result(..),Slice(..)) import Data.Parser.Unsafe (Parser(..)) import GHC.Exts (TYPE,Int(I#),Int#) import GHC.ST (ST(ST),runST) import qualified Data.Primitive as PM import qualified GHC.Exts as Exts type Result# e (a :: TYPE r) = (# e | (# a, Int#, Int# #) #) -- ints are offset and length -- | Consumes and returns the next token from the input. -- Fails if no tokens are left. any :: e -> Parser a e s a {-# inline any #-} any e = uneffectful $ \array off len -> case len of 0 -> Failure e _ -> let w = PM.indexSmallArray array off in Success (Slice (off + 1) (len - 1) w) -- | Consume a token from the input or return @Nothing@ if -- end of the stream has been reached. This parser never fails. opt :: Parser a e s (Maybe a) {-# inline opt #-} opt = uneffectful $ \array off len -> case len of 0 -> Success (Slice off 0 Nothing) _ -> let w = PM.indexSmallArray array off in Success (Slice (off + 1) (len - 1) (Just w)) -- | Looks at the next token from the input. If the token matches -- the predicate, consume the token and return @True@. Otherwise, -- do not consume the token and return @False@. If no tokens -- remain in the input, return @False@. This parser never fails. trySatisfy :: (a -> Bool) -> Parser a e s Bool {-# inline trySatisfy #-} trySatisfy p = uneffectful $ \array off len -> case len of 0 -> Success (Slice off 0 False) _ -> let w = PM.indexSmallArray array off in case p w of True -> Success (Slice (off + 1) (len - 1) True) False -> Success (Slice off len False) -- | Lift an effect into a parser. effect :: ST s b -> Parser a e s b {-# inline effect #-} effect (ST f) = Parser (\(# _, off, len #) s0 -> case f s0 of (# s1, b #) -> (# s1, (# | (# b, off, len #) #) #) ) -- | Consumes and returns the next token from the input. -- Fails if no tokens are left. fail :: e -> Parser a e s b {-# inline fail #-} fail e = Parser (\_ s0 -> (# s0, (# e | #) #) ) -- | Consumes the next token from the input. Fails if it -- is not equal to the expected value. token :: Eq a => e -- ^ Error message -> a -- ^ Expected value of next token -> Parser a e s () {-# inline token #-} token e a = do b <- any e bool (fail e) (pure ()) (a == b) -- | Returns the next token from the input without consuming -- it. Fails if no tokens are left. peek :: e -> Parser a e s a {-# inline peek #-} peek e = uneffectful $ \array off len -> if len > 0 then let w = PM.indexSmallArray array off in Success (Slice off len w) else Failure e -- | Fold over the tokens, repeatedly running @step@ followed -- by @separator@ until @separator@ returns 'False'. This is -- strict in the accumulator and always runs @step@ at least -- once. There is no backtracking; any failure causes the whole -- combinator to fail. foldSepBy1 :: Parser a e s Bool -- ^ Separator -> (b -> Parser a e s b) -- ^ Step -> b -- ^ Initial value -> Parser a e s b {-# inline foldSepBy1 #-} foldSepBy1 sep f b0 = f b0 >>= go where go !b = sep >>= \case True -> f b >>= go False -> pure b -- | Skip tokens for which the predicate is true. skipWhile :: (a -> Bool) -- ^ Predicate -> Parser a e s () {-# inline skipWhile #-} skipWhile f = go where go = opt >>= \case Nothing -> pure () Just t -> case f t of True -> go False -> internalUnconsume 1 -- | Fold over the tokens, repeatedly running @step@ followed -- by @separator@ until @separator@ returns 'False'. The results -- of @step@ are discarded, but in conjunction with @effect@, -- this can be used to populate an array or a builder. This -- always runs @step@ at least once. -- -- > sepBy1 sep step === step *> (sep >>= bool (pure ()) (step *> (sep >>= bool (pure ()) (...)))) sepBy1 :: Parser a e s Bool -- ^ Separator -> Parser a e s b -- ^ Step -> Parser a e s () {-# inline sepBy1 #-} sepBy1 sep f = f *> go where go = sep >>= \case True -> f *> go False -> pure () uneffectful :: (SmallArray a -> Int -> Int -> Result e b) -> Parser a e s b {-# inline uneffectful #-} uneffectful f = Parser ( \(# arr,off,len #) s0 -> (# s0, unboxResult (f (SmallArray arr) (I# off) (I# len)) #) ) unboxResult :: Result e a -> Result# e a unboxResult (Success (Slice (I# b) (I# c) a)) = (# | (# a, b, c #) #) unboxResult (Failure e) = (# e | #) parseSmallArray :: forall a e b. (forall s. Parser a e s b) -> SmallArray a -> Result e b parseSmallArray p (SmallArray arr) = runST action where action :: forall s. ST s (Result e b) action = case p @s of Parser f -> ST (\s0 -> case f (# arr, 0#, (Exts.sizeofSmallArray# arr) #) s0 of (# s1, r #) -> (# s1, boxResult r #) ) parseSmallArrayEffectfully :: Parser a e s b -> SmallArray a -> ST s (Result e b) parseSmallArrayEffectfully (Parser f) (SmallArray arr) = ST (\s0 -> case f (# arr, 0#, (Exts.sizeofSmallArray# arr) #) s0 of (# s1, r #) -> (# s1, boxResult r #) ) boxResult :: Result# e a -> Result e a boxResult (# | (# a, b, c #) #) = Success (Slice (I# b) (I# c) a) boxResult (# e | #) = Failure e -- A copy of unconsume so that the modules do not need to -- be restructured. internalUnconsume :: Int -> Parser a e s () {-# inline internalUnconsume #-} internalUnconsume n = uneffectful $ \_ off len -> Success (Slice (off - n) (len + n) ())