{-# LANGUAGE LambdaCase #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeSynonymInstances #-} -- {-# LANGUAGE TemplateHaskell #-} -- {-# OPTIONS_GHC -Wunused-imports #-} -- | Abstract syntax instance for grammars with single-character tokens. module CharacterTokenGrammar where import Control.Monad import Control.Monad.Except import Control.Monad.State import qualified Data.List as List import Data.IntMap (IntMap) import qualified Data.IntMap as IntMap import Data.Map (Map) import qualified Data.Map as Map import Data.Maybe import Data.Semigroup ((<>)) -- uses microlens-platform import Lens.Micro import Lens.Micro.Extras (view) import qualified LBNF.Abs as A import LBNF.Print (printTree) import CFG import DebugPrint -- | Grammar over single-character terminals with identifiers as rule names. type Term = Char type NTName = A.Ident type RuleName = A.Ident type Grammar = Grammar' NTName RuleName Term type NT = NT' NTName type NTDef = NTDef' NTName RuleName Term type Form = Form' Term -- | Intermediate rule format. type IRule = (NT, RuleName, [A.Entry]) type Error = Either String -- | Convert grammar to internal format; check for single-character terminals. -- Also return start non-terminal if the grammar has any rules checkGrammar :: A.Grammar -> Error (Maybe NT, Grammar) checkGrammar (A.Rules rs) = (`runStateT` emptyGrammar) $ do irs <- mapM addNT rs mapM_ addRule irs -- The start symbol is the lhs of the first rule return $ listToMaybe $ map (\ (x, _, _) -> x) irs where -- Pass 1: collect non-terminals from lhss of rules. addNT :: A.Rule -> StateT Grammar Error IRule addNT (A.Prod r x es) = StateT $ \ grm@(Grammar n dict defs) -> do -- Check if we have seen NT x before. case Map.lookup x dict of -- Yes, use its number. Just i -> return ((NT i x, r, es), grm) -- No, insert a new entry into the dictionary. Nothing -> return ((NT n x, r, es), Grammar (n+1) (Map.insert x n dict) defs) -- Pass 2: scope-check and convert rhss of rules. addRule :: IRule -> StateT Grammar Error () addRule (NT i x, r, es) = StateT $ \ grm -> do alt <- Alt r . Form <$> do forM es $ \case -- Current limitation: Since we have no lexer, terminals are characters. A.Term [a] -> return $ Term a A.Term _ -> throwError "terminals must be single-character strings" -- Convert non-terminal names into de Bruijn indices (numbers). A.NT y -> case Map.lookup y $ view grmNTDict grm of Nothing -> throwError $ "undefined non-terminal " ++ printTree y Just j -> return $ NonTerm $ NT j y return ((), over grmNTDefs (IntMap.insertWith (<>) i (NTDef x [alt])) grm) -- | Turn grammar back to original format. reifyGrammar :: Grammar -> A.Grammar reifyGrammar grm@(Grammar _ dict defs) = A.Rules . (`concatMap` IntMap.toList defs) $ \ (i, NTDef x alts) -> (`map` alts) $ \ (Alt r (Form alpha)) -> A.Prod r x . (`map` alpha) $ \case Term a -> A.Term [a] NonTerm j -> A.NT $ ntToIdent grm j ntToIdent :: Grammar -> NT -> NTName ntToIdent grm (NT i x) = x -- -- Lookup in Grammar no longer needed as NT's carry their name. -- ntToIdent grm (NT i x) = -- view ntName $ -- IntMap.findWithDefault (error "printGrammar: impossible") i $ -- view grmNTDefs grm -- * Printing instance DebugPrint (A.Ident) where debugPrint (A.Ident s) = s instance DebugPrint Term where debugPrint = show -- debugPrint = (:[])