-- | Efficient literal branching using Template Haskell. module FlatParse.Stateful.Switch ( switch, switchWithPost, rawSwitchWithPost ) where import Control.Monad import Data.Foldable import Data.Map (Map) import Language.Haskell.TH import qualified Data.Map.Strict as M import FlatParse.Common.Switch import FlatParse.Stateful.Base ( ensure, skipBack, branch, failed ) import FlatParse.Stateful.Bytes ( bytes, bytesUnsafe ) import FlatParse.Stateful.Integers ( anyWord8Unsafe ) {-| This is a template function which makes it possible to branch on a collection of string literals in an efficient way. By using `switch`, such branching is compiled to a trie of primitive parsing operations, which has optimized control flow, vectorized reads and grouped checking for needed input bytes. The syntax is slightly magical, it overloads the usual @case@ expression. An example: @ $(switch [| case _ of "foo" -> pure True "bar" -> pure False |]) @ The underscore is mandatory in @case _ of@. Each branch must be a string literal, but optionally we may have a default case, like in @ $(switch [| case _ of "foo" -> pure 10 "bar" -> pure 20 _ -> pure 30 |]) @ All case right hand sides must be parsers with the same type. That type is also the type of the whole `switch` expression. A `switch` has longest match semantics, and the order of cases does not matter, except for the default case, which may only appear as the last case. If a `switch` does not have a default case, and no case matches the input, then it returns with failure, \without\ having consumed any input. A fallthrough to the default case also does not consume any input. -} switch :: Q Exp -> Q Exp switch = switchWithPost Nothing {-| Switch expression with an optional first argument for performing a post-processing action after every successful branch matching. For example, if we have @ws :: ParserT st r e ()@ for a whitespace parser, we might want to consume whitespace after matching on any of the switch cases. For that case, we can define a "lexeme" version of `switch` as follows. @ switch' :: Q Exp -> Q Exp switch' = switchWithPost (Just [| ws |]) @ Note that this @switch'@ function cannot be used in the same module it's defined in, because of the stage restriction of Template Haskell. -} switchWithPost :: Maybe (Q Exp) -> Q Exp -> Q Exp switchWithPost postAction exp = do !postAction <- sequence postAction (!cases, !fallback) <- parseSwitch exp genTrie $! genSwitchTrie' postAction cases fallback -- | Version of `switchWithPost` without syntactic sugar. The second argument is the -- list of cases, the third is the default case. rawSwitchWithPost :: Maybe (Q Exp) -> [(String, Q Exp)] -> Maybe (Q Exp) -> Q Exp rawSwitchWithPost postAction cases fallback = do !postAction <- sequence postAction !cases <- forM cases \(str, rhs) -> (str,) <$> rhs !fallback <- sequence fallback genTrie $! genSwitchTrie' postAction cases fallback #if MIN_VERSION_base(4,15,0) mkDoE = DoE Nothing {-# inline mkDoE #-} #else mkDoE = DoE {-# inline mkDoE #-} #endif genTrie :: (Map (Maybe Int) Exp, Trie' (Rule, Int, Maybe Int)) -> Q Exp genTrie (rules, t) = do branches <- traverse (\e -> (,) <$> (newName "rule") <*> pure e) rules let ix m k = case M.lookup k m of Nothing -> error ("key not in map: " ++ show k) Just a -> a let ensure' :: Maybe Int -> Maybe (Q Exp) ensure' = fmap (\n -> [| ensure n |]) fallback :: Rule -> Int -> Q Exp fallback rule 0 = pure $ VarE $ fst $ ix branches rule fallback rule n = [| skipBack n >> $(pure $ VarE $ fst $ ix branches rule) |] let go :: Trie' (Rule, Int, Maybe Int) -> Q Exp go = \case Branch' (r, n, alloc) ts | M.null ts -> pure $ VarE $ fst $ branches M.! r | otherwise -> do !next <- (traverse . traverse) go (M.toList ts) !defaultCase <- fallback r (n + 1) let cases = mkDoE $ [BindS (VarP (mkName "c")) (VarE 'anyWord8Unsafe), NoBindS (CaseE (VarE (mkName "c")) (map (\(w, t) -> Match (LitP (IntegerL (fromIntegral w))) (NormalB t) []) next ++ [Match WildP (NormalB defaultCase) []]))] case ensure' alloc of Nothing -> pure cases Just alloc -> [| branch $alloc $(pure cases) $(fallback r n) |] Path (r, n, alloc) ws t -> case ensure' alloc of Nothing -> [| branch $(bytes ws) $(go t) $(fallback r n)|] Just alloc -> [| branch ($alloc >> $(bytesUnsafe ws)) $(go t) $(fallback r n) |] letE (map (\(x, rhs) -> valD (varP x) (normalB (pure rhs)) []) (Data.Foldable.toList branches)) (go t) parseSwitch :: Q Exp -> Q ([(String, Exp)], Maybe Exp) parseSwitch exp = exp >>= \case CaseE (UnboundVarE _) [] -> error "switch: empty clause list" CaseE (UnboundVarE _) cases -> do (!cases, !last) <- pure (init cases, last cases) !cases <- forM cases \case Match (LitP (StringL str)) (NormalB rhs) [] -> pure (str, rhs) _ -> error "switch: expected a match clause on a string literal" (!cases, !last) <- case last of Match (LitP (StringL str)) (NormalB rhs) [] -> pure (cases ++ [(str, rhs)], Nothing) Match WildP (NormalB rhs) [] -> pure (cases, Just rhs) _ -> error "switch: expected a match clause on a string literal or a wildcard" pure (cases, last) _ -> error "switch: expected a \"case _ of\" expression" genSwitchTrie' :: Maybe Exp -> [(String, Exp)] -> Maybe Exp -> (Map (Maybe Int) Exp, Trie' (Rule, Int, Maybe Int)) genSwitchTrie' postAction cases fallback = let (!branches, !strings) = unzip do (!i, (!str, !rhs)) <- zip [0..] cases case postAction of Nothing -> pure ((Just i, rhs), (i, str)) Just !post -> pure ((Just i, (VarE '(>>)) `AppE` post `AppE` rhs), (i, str)) !m = M.fromList ((Nothing, maybe (VarE 'failed) id fallback) : branches) !trie = compileTrie strings in (m , trie)