module Sasha (
    -- * Sasha the lexer

    -- | This is the ordinary Haskell (i.e. slow) interface.
    --
    -- The fast one is in "Sasha.TTH" module, but that requires @TemplateHaskell@.
    --
    Sasha,
    sasha,
    -- * ERE specification
    ERE,
    empty,
    eps,
    char,
    charRange,
    charSet,
    utf8Char,
    anyChar,
    anyUtf8Char,
    appends,
    unions,
    intersections,
    star,
    plus,
    string,
    utf8String,
    complement,
    satisfy,
    digit,
) where

import Data.Word (Word8)

import qualified Data.ByteString as BS

import Sasha.Internal.ERE

-- | Lexer grammar specification: regular expression and result builder function
-- which takes a prefix (the matching part) and a suffix (the rest of input).
type Sasha r = [(ERE, BS.ByteString -> BS.ByteString -> r)]

-- | Scan for a single token.
sasha
    :: forall r. r    -- ^ no match value
    -> Sasha r        -- ^ scanner rules definitions
    -> BS.ByteString  -- ^ input
    -> r              -- ^ result
sasha :: forall r. r -> Sasha r -> ByteString -> r
sasha r
noMatch Sasha r
grammar ByteString
input0 = r -> Int -> ByteString -> Sasha r -> r
go r
noMatch Int
0 ByteString
input0 Sasha r
grammar
  where
    -- Note: acc has to be lazy
    go :: r -> Int -> BS.ByteString -> Sasha r -> r
    go :: r -> Int -> ByteString -> Sasha r -> r
go r
acc !Int
_ !ByteString
_       [] = r
acc
    go r
acc !Int
i !ByteString
input   Sasha r
ts = case ByteString -> Maybe (Word8, ByteString)
BS.uncons ByteString
input of
        Maybe (Word8, ByteString)
Nothing          -> r
acc
        Just (Word8
c, ByteString
input') -> r -> Int -> ByteString -> Sasha r -> r
go (forall {p}. [p] -> p -> p
next [r]
accs r
acc) (Int
i forall a. Num a => a -> a -> a
+ Int
1) ByteString
input' Sasha r
ts'
          where
            ts' :: Sasha r
ts' = forall r. Word8 -> Sasha r -> Sasha r
derivativeSasha Word8
c Sasha r
ts
            accs :: [r]
accs = [ case Int -> ByteString -> (ByteString, ByteString)
BS.splitAt (Int
i forall a. Num a => a -> a -> a
+ Int
1) ByteString
input0 of (ByteString
pfx, ByteString
sfx) -> ByteString -> ByteString -> r
f ByteString
pfx ByteString
sfx | (ERE
ere, ByteString -> ByteString -> r
f) <- Sasha r
ts', ERE -> Bool
nullable ERE
ere]

            next :: [p] -> p -> p
next []    p
x = p
x
            next (p
x:[p]
_) p
_ = p
x

derivativeSasha :: Word8 -> Sasha r -> Sasha r
derivativeSasha :: forall r. Word8 -> Sasha r -> Sasha r
derivativeSasha Word8
c Sasha r
ts =
    [ (ERE
ere', ByteString -> ByteString -> r
f)
    | (ERE
ere,  ByteString -> ByteString -> r
f) <- Sasha r
ts
    , let ere' :: ERE
ere' = Word8 -> ERE -> ERE
derivative Word8
c ERE
ere
    , Bool -> Bool
not (ERE -> Bool
isEmpty ERE
ere')
    ]