-- | Collection of small helper functions for grammars.

module FormalLanguage.CFG.Grammar.Util where

import Control.Lens hiding (Index,index)
import Data.Tuple (swap)
import Data.List (sort,nub,genericReplicate)

import FormalLanguage.CFG.Grammar.Types



-- | @Term@, @Deletion@, and @Epsilon@ all count as terminal symbols.

isTerminal :: Symbol -> Bool
isTerminal = allOf folded (\case SynVar{} -> False; (SynTerm _ _) -> False; _ -> True) . _getSymbolList

-- | @Term@, and @Epsilon@ are terminal symbols that can be bound.

isBindableTerminal :: Symbol -> Bool
isBindableTerminal = allOf folded (\case (Term _ _) -> True; _ -> False) . _getSymbolList

-- | Only @SynVar@s are non-terminal.

isSyntactic :: Symbol -> Bool
isSyntactic = allOf folded (\case SynVar{} -> True; _ -> False) . _getSymbolList

-- | special case of single-tape synvar in multi-tape setting

isSynStacked :: Symbol -> Bool
isSynStacked = allOf folded (\case SynVar{} -> True; Deletion -> True; _ -> False) . _getSymbolList

-- | true if we have a split synvar

isAllSplit :: Symbol -> Bool
isAllSplit = allOf folded (\case (SynVar _ _ n _) -> n>1 ; _ -> False) . _getSymbolList

-- | Set all @splitK@ values to @0@ for lookups.

splitK0 :: Symbol -> Symbol
splitK0 = set (getSymbolList . traverse . splitK) 0

-- | Take a split symbol and rewrite as full.

splitToFull :: Symbol -> Symbol
splitToFull (Symbol [SynVar s i n k]) = Symbol . genericReplicate n $ SynVar s i n 0

-- | Is this a syntactic terminal symbol?

isSynTerm :: Symbol -> Bool
isSynTerm = allOf folded (\case (SynTerm _ _) -> True; _ -> False) . _getSymbolList

-- | Epsilon-only symbols.

isEpsilon :: Symbol -> Bool
isEpsilon = allOf folded (\case Epsilon -> True; _ -> False) . _getSymbolList

-- | Dimension of the grammar. Rather costly, because we check for dimensional
-- consistency.

dim :: Grammar -> Int
dim g
  | null ls = error "no terminal symbol in grammar"
  | all (l==) ls = l
  | otherwise = error "inconsistent dimensionality"
  where ls@(l:_) = map (length . _getSymbolList) $ g^.rules.folded.rhs

-- | Extract single-tape terminals together with their tape dimension.

uniqueTermsWithTape :: Grammar -> [(SynTermEps , Tape)]
uniqueTermsWithTape = uniqueSynTermEpsWithTape . uniqueTerminalSymbols

-- | Extract single-tape bindable terminals together with their tape dimension.

uniqueBindableTermsWithTape :: Grammar -> [(SynTermEps , Tape)]
uniqueBindableTermsWithTape = uniqueSynTermEpsWithTape . uniqueBindableTerminalSymbols

-- |

uniqueSynVarsWithTape :: Grammar -> [(SynTermEps, Tape)]
uniqueSynVarsWithTape = uniqueSynTermEpsWithTape . uniqueSyntacticSymbols

-- |

uniqueSynTermsWithTape :: Grammar -> [(SynTermEps, Tape)]
uniqueSynTermsWithTape = uniqueSynTermEpsWithTape . uniqueSynTermSymbols

-- |

uniqueSynTermEpsWithTape :: [Symbol] -> [(SynTermEps, Tape)]
uniqueSynTermEpsWithTape = nub . sort                             -- cleanup
                         . map swap                               -- swap index to second position
                         . concatMap (zip [0..] . _getSymbolList) -- combine single-tape STEs with tape indices

-- | Return the nub list of terminal symbols. This includes @Deletion@
-- symbols, and might not be what you want. Check
-- 'uniqueBindableTerminalSymbols' too!

uniqueTerminalSymbols :: Grammar -> [Symbol]
uniqueTerminalSymbols = nub . sort . filter isTerminal . toListOf (rules.folded.rhs.folded)

-- |

uniqueBindableTerminalSymbols :: Grammar -> [Symbol]
uniqueBindableTerminalSymbols = nub . sort . filter isBindableTerminal . toListOf (rules.folded.rhs.folded)

-- | Return the nub list of syntactic symbols.

uniqueSyntacticSymbols :: Grammar -> [Symbol]
uniqueSyntacticSymbols g = nub . sort . filter isSyntactic $ g^..rules.folded.lhs

-- | Return the nub list of syntactic terminals.

uniqueSynTermSymbols :: Grammar -> [Symbol]
uniqueSynTermSymbols = nub . sort . filter isSynTerm . toListOf (rules.folded.rhs.folded)

-- |
--
-- TODO Currently a stub (original is in @.Grammar@ still. Want to have it
-- monadically, as the code is a mess.

normalizeStartEpsilon :: Grammar -> Grammar
normalizeStartEpsilon = error "normalizeStartEpsilon: (re-)write me"



-- | Left-linear grammars have at most one non-terminal on the RHS. It is the
-- first symbol.

isLeftLinear :: Grammar -> Bool
isLeftLinear g = allOf folded isll $ g^.rules where
  isll :: Rule -> Bool
  isll (Rule l _ []) = isSyntactic l
  isll (Rule l _ rs) = isSyntactic l && (allOf folded (not . isSyntactic) $ tail rs) -- at most one non-terminal

isRightLinear :: Grammar -> Bool
isRightLinear g = allOf folded isrl $ g^.rules where
  isrl :: Rule -> Bool
  isrl (Rule l _ []) = isSyntactic l
  isrl (Rule l _ rs) = isSyntactic l && (allOf folded (not . isSyntactic) $ init rs)

isLinear :: Grammar -> Bool
isLinear g = allOf folded isl $ g^.rules where
  isl :: Rule -> Bool
  isl (Rule l _ []) = isSyntactic l
  isl (Rule l _ rs) = isSyntactic l && (1 >= (length . filter isSyntactic $ rs))