module Pinchot
(
Intervals
, include
, exclude
, solo
, pariah
, Pinchot
, RuleName
, AlternativeName
, Rule
, terminal
, terminalSeq
, nonTerminal
, list
, list1
, option
, wrap
, label
, (<?>)
, earleyGrammar
, allRulesToCode
, ruleTreeToCode
) where
import Pinchot.Intervals
import Control.Applicative ((<|>), liftA2)
import Control.Exception (Exception)
import Control.Monad (join, when)
import Control.Monad.Fix (MonadFix, mfix)
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.Except (ExceptT, throwE, runExceptT)
import Control.Monad.Trans.State (State, runState, get, put)
import Data.Char (isUpper)
import Data.Foldable (toList)
import Data.Map (Map)
import qualified Data.Map as M
import Data.Monoid ((<>))
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Typeable (Typeable)
import Language.Haskell.TH
(ExpQ, ConQ, normalC, mkName, strictType, notStrict, DecQ, newtypeD,
cxt, conT, Name, dataD, appT, DecsQ, appE, Q, Stmt(NoBindS), uInfixE, bindS,
varE, varP, conE, Pat, Exp(AppE, DoE), lamE)
import qualified Language.Haskell.TH.Syntax as Syntax
import Text.Earley (satisfy, rule, symbol)
import qualified Text.Earley ((<?>))
data RuleType t
= RTerminal (Intervals t)
| RBranch (Branch t, [(Branch t)])
| RSeqTerm [t]
| ROptional (Rule t)
| RMany (Rule t)
| RMany1 (Rule t)
| RWrap (Rule t)
deriving (Eq, Ord, Show)
data Rule t = Rule String (Maybe String) (RuleType t)
deriving (Eq, Ord, Show)
label :: String -> Rule t -> Rule t
label s (Rule n _ t) = Rule n (Just s) t
(<?>) :: Pinchot t (Rule t) -> String -> Pinchot t (Rule t)
p <?> s = fmap (label s) p
infixr 0 <?>
data Branch t = Branch String [(Rule t)]
deriving (Eq, Ord, Show)
data Names t = Names
{ tyConNames :: Set RuleName
, dataConNames :: Set String
, nextIndex :: Int
, allRules :: Map Int (Rule t)
} deriving (Eq, Ord, Show)
data Error
= InvalidName String
| EmptyNonTerminal String
deriving (Show, Typeable)
instance Exception Error
newtype Pinchot t a
= Pinchot { runPinchot :: (ExceptT Error (State (Names t)) a) }
deriving (Functor, Applicative, Monad, MonadFix)
addRuleName
:: RuleName
-> Pinchot t ()
addRuleName name = Pinchot $ do
old@(Names tyNames _ _ _) <- lift get
case name of
[] -> throw
x:_ -> do
when (not (isUpper x)) throw
when (Set.member name tyNames) throw
lift $ put (old { tyConNames = Set.insert name tyNames })
where
throw = throwE $ InvalidName name
addDataConName
:: String
-> Pinchot t ()
addDataConName name = Pinchot $ do
old@(Names _ dcNames _ _) <- lift get
case name of
[] -> throw
x:_ -> do
when (not (isUpper x)) throw
when (Set.member name dcNames) throw
lift $ put (old { dataConNames = Set.insert name dcNames })
where
throw = throwE $ InvalidName name
newRule
:: RuleName
-> RuleType t
-> Pinchot t (Rule t)
newRule name ty = Pinchot $ do
runPinchot (addRuleName name)
st <- lift get
let r = Rule name Nothing ty
newSt = st { nextIndex = succ (nextIndex st)
, allRules = M.insert (nextIndex st) r
(allRules st)
}
lift (put newSt)
return r
type RuleName = String
type AlternativeName = String
terminal
:: RuleName
-> Intervals t
-> Pinchot t (Rule t)
terminal name ivls = newRule name (RTerminal ivls)
splitNonTerminal
:: String
-> [(String, [(Rule t)])]
-> Pinchot t ((String, [(Rule t)]), [(String, [Rule t])])
splitNonTerminal n sq = Pinchot $ case sq of
[] -> throwE $ EmptyNonTerminal n
x : xs -> return (x, xs)
terminalSeq
:: RuleName
-> [t]
-> Pinchot t (Rule t)
terminalSeq name sq = newRule name (RSeqTerm sq)
nonTerminal
:: RuleName
-> [(AlternativeName, [Rule t])]
-> Pinchot t (Rule t)
nonTerminal name sq = do
mapM_ addDataConName . fmap fst $ sq
(b1, bs) <- splitNonTerminal name sq
let branches = RBranch (uncurry Branch b1, fmap (uncurry Branch) bs)
newRule name branches
list
:: RuleName
-> Rule t
-> Pinchot t (Rule t)
list name r = newRule name (RMany r)
list1
:: RuleName
-> Rule t
-> Pinchot t (Rule t)
list1 name r = newRule name (RMany1 r)
option
:: RuleName
-> Rule t
-> Pinchot t (Rule t)
option name r = newRule name (ROptional r)
wrap
:: RuleName
-> Rule t
-> Pinchot t (Rule t)
wrap name r = newRule name (RWrap r)
getAncestors
:: Rule t
-> State (Set String) [Rule t]
getAncestors r@(Rule name _ ei) = do
set <- get
if Set.member name set
then return []
else do
put (Set.insert name set)
case ei of
RTerminal _ -> return [r]
RBranch (b1, bs) -> do
as1 <- branchAncestors b1
ass <- fmap join . mapM branchAncestors $ bs
return $ r : as1 <> ass
RSeqTerm _ -> return [r]
ROptional c -> do
cs <- getAncestors c
return $ r : cs
RMany c -> do
cs <- getAncestors c
return $ r : cs
RMany1 c -> do
cs <- getAncestors c
return $ r : cs
RWrap c -> do
cs <- getAncestors c
return $ r : cs
where
branchAncestors (Branch _ rs) = fmap join . mapM getAncestors $ rs
ruleAndAncestors
:: Rule t
-> [Rule t]
ruleAndAncestors r = fst $ runState (getAncestors r) Set.empty
rulesDemandedBeforeDefined :: Foldable f => f (Rule t) -> Set Name
rulesDemandedBeforeDefined = snd . foldl f (Set.empty, Set.empty)
where
f (lhsDefined, results) (Rule nm _ ty)
= (Set.insert nm lhsDefined, results')
where
results' = case ty of
RTerminal _ -> results
RBranch (b1, bs) -> foldr checkBranch (checkBranch b1 results) bs
where
checkBranch (Branch _ rls) rslts = foldr checkRule rslts rls
RSeqTerm _ -> results
ROptional r -> checkRule r results
RMany r -> addHelper $ checkRule r results
RMany1 r -> addHelper $ checkRule r results
RWrap r -> checkRule r results
checkRule (Rule name _ _) rslts
| Set.member name lhsDefined = rslts
| otherwise = Set.insert (ruleName name) rslts
addHelper = Set.insert (helperName nm)
thBranch :: Branch t -> ConQ
thBranch (Branch nm rules) = normalC name fields
where
name = mkName nm
mkField (Rule n _ _) = strictType notStrict (conT (mkName n))
fields = toList . fmap mkField $ rules
thRule
:: Name
-> [Name]
-> Rule t
-> DecQ
thRule typeName derives (Rule nm _ ruleType) = case ruleType of
RTerminal _ -> newtypeD (cxt []) name [] newtypeCon derives
where
newtypeCon = normalC name
[strictType notStrict (conT typeName)]
RBranch (b1, bs) -> dataD (cxt []) name [] cons derives
where
cons = thBranch b1 : toList (fmap thBranch bs)
RSeqTerm _ -> dataD (cxt []) name [] cons derives
where
cons = [normalC name
[strictType notStrict (appT [t| [] |]
(conT typeName))]]
ROptional (Rule inner _ _) -> newtypeD (cxt []) name [] newtypeCon derives
where
newtypeCon = normalC name
[strictType notStrict (appT [t| Maybe |]
(conT (mkName inner)))]
RMany (Rule inner _ _) -> newtypeD (cxt []) name [] newtypeCon derives
where
newtypeCon = normalC name
[strictType notStrict (appT [t| [] |]
(conT (mkName inner)))]
RMany1 (Rule inner _ _) -> dataD (cxt []) name [] [cons] derives
where
cons = normalC name
[ strictType notStrict (conT (mkName inner))
, strictType notStrict (appT [t| [] |]
(conT (mkName inner)))]
RWrap (Rule inner _ _) -> newtypeD (cxt []) name [] newtypeCon derives
where
newtypeCon = normalC name
[ strictType notStrict (conT (mkName inner)) ]
where
name = mkName nm
thAllRules
:: Name
-> [Name]
-> Map Int (Rule t)
-> DecsQ
thAllRules typeName derives
= sequence
. fmap (thRule typeName derives)
. fmap snd
. M.toAscList
allRulesToCode
:: Name
-> [Name]
-> Pinchot t a
-> DecsQ
allRulesToCode typeName derives pinchot = case ei of
Left err -> fail $ "pinchot: bad grammar: " ++ show err
Right _ -> thAllRules typeName derives (allRules st')
where
(ei, st') = runState (runExceptT (runPinchot pinchot))
(Names Set.empty Set.empty 0 M.empty)
ruleTreeToCode
:: Name
-> [Name]
-> Pinchot t (Rule t)
-> DecsQ
ruleTreeToCode typeName derives pinchot = case ei of
Left err -> fail $ "pinchot: bad grammar: " ++ show err
Right r -> sequence . toList . fmap (thRule typeName derives)
. runCalc . getAncestors $ r
where
runCalc stateCalc = fst $ runState stateCalc (Set.empty)
(ei, _) = runState (runExceptT (runPinchot pinchot))
(Names Set.empty Set.empty 0 M.empty)
ruleToParser
:: Syntax.Lift t
=> String
-> Rule t
-> Q [Stmt]
ruleToParser prefix (Rule nm mayDescription rt) = case rt of
RTerminal ivls -> do
topRule <- makeRule expression
return [topRule]
where
expression = [| fmap $constructor (satisfy (inIntervals ivls)) |]
RBranch (b1, bs) -> do
topRule <- makeRule expression
return [topRule]
where
expression = foldl addBranch (branchToParser prefix b1) bs
where
addBranch tree branch =
[| $tree <|> $(branchToParser prefix branch) |]
RSeqTerm sq -> do
let nestRule = bindS (varP helper) [|rule $(go sq)|]
where
go sqnce = case sqnce of
[] -> [|pure []|]
x : xs -> [|liftA2 (:) (symbol x) $(go xs)|]
nest <- nestRule
topRule <- makeRule (wrapper helper)
return [nest, topRule]
ROptional (Rule innerNm _ _) -> fmap (:[]) (makeRule expression)
where
expression = [| fmap $constructor (pure Nothing <|> $(just)) |]
where
just = [| fmap Just $(varE (ruleName innerNm)) |]
RMany (Rule innerNm _ _) -> do
let nestRule = bindS (varP helper) ([|rule|] `appE` parseSeq)
where
parseSeq = uInfixE [|pure []|] [|(<|>)|] pSeq
where
pSeq = [|liftA2 (:) $(varE (ruleName innerNm)) $(varE helper) |]
nest <- nestRule
top <- makeRule $ wrapper helper
return [nest, top]
RMany1 (Rule innerNm _ _) -> do
let nestRule = bindS (varP helper) [|rule $(parseSeq)|]
where
parseSeq = [| pure [] <|> $pSeq |]
where
pSeq = [| (:) <$> $(varE (ruleName innerNm))
<*> $(varE helper) |]
nest <- nestRule
let topExpn = [| $constructor <$> $(varE (ruleName innerNm))
<*> $(varE helper) |]
top <- makeRule topExpn
return [nest, top]
RWrap (Rule innerNm _ _) -> fmap (:[]) (makeRule expression)
where
expression = [|fmap $constructor $(varE (ruleName innerNm)) |]
where
makeRule expression = varP (ruleName nm) `bindS`
[|rule ($expression Text.Earley.<?> $(textToExp desc))|]
desc = maybe nm id mayDescription
textToExp txt = [| $(Syntax.lift txt) |]
constructor = constructorName prefix nm
wrapper wrapRule = [|fmap $constructor $(varE wrapRule) |]
helper = helperName nm
constructorName
:: String
-> String
-> ExpQ
constructorName pfx nm = conE (mkName name)
where
name = pfx' ++ nm
pfx'
| null pfx = ""
| otherwise = pfx ++ "."
ruleName :: String -> Name
ruleName suffix = mkName ("_r'" ++ suffix)
helperName :: String -> Name
helperName suffix = mkName ("_h'" ++ suffix)
branchToParser
:: Syntax.Lift t
=> String
-> Branch t
-> ExpQ
branchToParser prefix (Branch name rules) = case rules of
[] -> [| pure $constructor |]
(Rule rule1 _ _) : xs -> foldl f z xs
where
z = [| $constructor <$> $(varE (ruleName rule1)) |]
f soFar (Rule rule2 _ _) = [| $soFar <*> $(varE (ruleName rule2)) |]
where
constructor = constructorName prefix name
lazyPattern
:: Foldable c
=> c Name
-> Q Pat
lazyPattern = finish . foldr gen [p| () |]
where
gen name rest = [p| ($(varP name), $rest) |]
finish pat = [p| ~(_, $pat) |]
bigTuple
:: Foldable c
=> Name
-> c Name
-> ExpQ
bigTuple top = finish . foldr f [| () |]
where
f n rest = [| ( $(varE n), $rest) |]
finish tup = [| ($(varE top), $tup) |]
earleyGrammar
:: Syntax.Lift t
=> String
-> Pinchot t (Rule t)
-> Q Exp
earleyGrammar prefix pinc = case ei of
Left err -> fail $ "pinchot: bad grammar: " ++ show err
Right r@(Rule top _ _) -> do
let neededRules = ruleAndAncestors r
otherNames = rulesDemandedBeforeDefined neededRules
lamb = lamE [lazyPattern otherNames] expression
expression = do
stmts <- fmap concat . mapM (ruleToParser prefix)
. toList $ neededRules
result <- bigTuple (ruleName top) otherNames
rtn <- [|return|]
let returner = rtn `AppE` result
return $ DoE (stmts ++ [NoBindS returner])
[| fmap fst (mfix $lamb) |]
where
(ei, _) = runState (runExceptT (runPinchot pinc))
(Names Set.empty Set.empty 0 M.empty)