-- {-# LANGUAGE Haskell2010 #-} {-# LANGUAGE EmptyCase, PostfixOperators, TupleSections, NamedFieldPuns, BangPatterns, BinaryLiterals, HexFloatLiterals, NumericUnderscores, GADTSyntax, RankNTypes, TypeApplications, PolyKinds, ExistentialQuantification, TypeOperators, ConstraintKinds, ExplicitForAll, KindSignatures, NamedWildCards, ScopedTypeVariables, FlexibleContexts, FlexibleInstances, MultiParamTypeClasses, ConstrainedClassMethods, InstanceSigs, TypeSynonymInstances, DeriveDataTypeable, DeriveFoldable, DeriveFunctor, DeriveTraversable, StandaloneDeriving, EmptyDataDeriving, DeriveLift, DeriveGeneric #-} -- Deleted GeneralisedNewtypeDeriving, because it is not compatible with Safe -- Deleted ImportQualifiedPost, StandaloneKindSignatures, because they are not supported in ghc 8.8.1 {-# LANGUAGE MonoLocalBinds #-} -- {-# LANGUAGE LambdaCase #-} {- | (I assume you have read description at https://hackage.haskell.org/package/check-cfg-ambiguity .) Example. Let's check grammar of expressions of form @1 + (2 + 3)@ for ambiguity. >>> import CheckCFGAmbiguity >>> import qualified Data.Map as M >>> :{ checkAmbiguity (M.fromList [ ("expr", [[N "term"], [N "expr", T "+", N "term"]]), ("term", [[T "number"], [T "(", N "expr", T ")"]]) ]) "expr" 10 :} SeemsUnambiguous -} module CheckCFGAmbiguity (TerminalOrNonterminal(..), checkAmbiguity, Result(..) {- $Earley -}) where import qualified Data.Map import qualified Data.Set import Control.Monad.ST(runST) import Data.STRef(newSTRef, readSTRef, writeSTRef) import Data.Foldable(for_) import Data.Maybe(fromJust, catMaybes) import Data.List(find) import Data.Functor((<&>)) import Control.Monad(when) data TerminalOrNonterminal t n = T t | N n deriving (Eq, Ord, Show) -- We always create values of this type using "toGrammar", thus we enforce that all RHS nonterminals appear in LHS -- It is okey to have nonterminals (i. e. nonterminals present in LHS) without productions -- Order of productions for single nonterminal is not important. But I use list anyway, not multiset newtype Grammar t n = Grammar (Data.Map.Map n [[TerminalOrNonterminal t n]]) deriving (Eq, Ord, Show) toGrammar :: (Ord n) => Data.Map.Map n [[TerminalOrNonterminal t n]] -> Maybe (Grammar t n) toGrammar g = if all (\prods -> all (\prod -> all (\case { T _ -> True; N nn -> Data.Map.member nn g; }) prod) prods) g then Just (Grammar g) else Nothing {- $setup >>> :set -XHaskell2010 >>> :set -XScopedTypeVariables >>> import Test.QuickCheck((===)) >>> import Data.Maybe(isJust, isNothing) >>> import Data.List(groupBy, sortBy) >>> :{ conv2 g = let { nts = map fst g; g2 = g <&> \(nn, prod) -> (nn, prod <&> \symb -> if elem symb nts then N symb else T symb); g3 = sortBy (\(a, _) (b, _) -> compare a b) g2; g4 = groupBy (\(a, _) (b, _) -> a == b) g3 <&> \gr -> (fst $ head gr, map snd gr); } in Data.Map.fromList g4 conv g = fromJust $ toGrammar $ conv2 g -- We know that fromJust will not fail :} -} -- $ -- prop> isJust $ toGrammar $ Data.Map.fromList [("a", [[N "b"]]), ("b", [[N "a"]])] -- prop> isJust $ toGrammar $ Data.Map.fromList [("a", [[N "b"]]), ("b", [[]])] -- prop> isJust $ toGrammar $ Data.Map.fromList [("a", [[N "b"]]), ("b", [])] -- prop> isNothing $ toGrammar $ Data.Map.fromList [("a", [[N "b"]]), ("b", [[N "c", N "a"]])] {- $ >>> :{ conv [ ("term", ["id"]), ("prod", ["term"]), ("term", ["(", "prod", ")"]), ("prod", ["prod", "*", "term"]) ] == (Grammar $ Data.Map.fromList [ ("prod", [ [N "term"], [N "prod", T "*", N "term"] ]), ("term", [ [T "id"], [T "(", N "prod", T ")"] ]) ]) :} True -} while :: (Monad m) => m Bool -> m () while body = do { continue <- body; if continue then while body else return (); } data LowLevelTestAmbiguityResult = LLNoStart | LLAmbiguous | LLUnambiguous deriving (Eq, Ord, Show) -- Precondition: every nonterminal, reachable from start, should generate nonempty language -- Precondition: count >= 1 lowLevelTestAmbiguity :: (Ord n, Ord t) => Grammar t n -> n -> Int -> LowLevelTestAmbiguityResult lowLevelTestAmbiguity (Grammar g) start count = case Data.Map.member start g of { False -> LLNoStart; True -> runST $ do { allWords <- newSTRef $ Data.Set.singleton [N start]; currWords <- newSTRef [[N start]]; i <- newSTRef 0; collision <- newSTRef False; while $ do { do { currWordsV <- readSTRef currWords; writeSTRef currWords $ concat $ currWordsV <&> \word -> let { (before, after) = break (\case { N _ -> True; T _ -> False; }) word; } in case after of { [] -> []; (N nn):rest -> (fromJust $ Data.Map.lookup nn g) <&> \prod -> before ++ prod ++ rest; _ -> error "Impossible"; }; currWordsV2 <- readSTRef currWords; allWordsV <- readSTRef allWords; let { allWordsV2 = Data.Set.union allWordsV (Data.Set.fromList currWordsV2); }; writeSTRef allWords allWordsV2; when (Data.Set.size allWordsV2 /= Data.Set.size allWordsV + length currWordsV2) $ writeSTRef collision True; }; iV <- readSTRef i; writeSTRef i (iV + 1); collisionV <- readSTRef collision; return (not collisionV && iV + 1 < count); }; collisionV <- readSTRef collision; if collisionV then return LLAmbiguous else return LLUnambiguous; }; } {- $ >>> :{ do { g :: Grammar String String <- toGrammar $ Data.Map.fromList [("start", [])]; return $ lowLevelTestAmbiguity g "start" 10; } :} Just LLUnambiguous >>> :{ do { g :: Grammar String String <- toGrammar $ Data.Map.fromList [("start", [[N "a"]]), ("a", [])]; return $ lowLevelTestAmbiguity g "start" 10; } :} Just LLUnambiguous -} -- $ -- prop> lowLevelTestAmbiguity (conv []) "start" 10 === LLNoStart -- prop> lowLevelTestAmbiguity (conv [("a", [])]) "start" 10 === LLNoStart -- prop> lowLevelTestAmbiguity (conv [("a", ["a"])]) "a" 10 === LLAmbiguous -- prop> lowLevelTestAmbiguity (conv [("a", ["b", "a"])]) "a" 10 == LLUnambiguous -- Checks that all nonterminals generate nonempty languages ntsProduceNonEmptyLang :: (Ord n) => Grammar t n -> Bool ntsProduceNonEmptyLang (Grammar g) = let { g2 = g <&> (\prods -> prods <&> (\prod -> catMaybes $ prod <&> (\case { N a -> Just a; T _ -> Nothing; }))); rec g3 = case Data.Map.null g3 of { True -> True; False -> case find (any null . snd) (Data.Map.toList g3) of { Just (x, _) -> rec $ fmap (map (filter (/= x))) (Data.Map.delete x g3); Nothing -> False; }; }; } in rec g2 {- $ >>> ntsProduceNonEmptyLang (conv [] :: Grammar String String) True >>> :{ do { g <- toGrammar $ Data.Map.fromList [("start", [])]; return $ ntsProduceNonEmptyLang g; } :} Just False >>> :{ ntsProduceNonEmptyLang (conv [ ("start", []) ]) :} True >>> :{ ntsProduceNonEmptyLang (conv [ ("start", ["a"]) ]) :} True >>> :{ ntsProduceNonEmptyLang (conv [ ("start", ["a"]), ("a", []) ]) :} True >>> :{ ntsProduceNonEmptyLang (conv [ ("start", ["a"]), ("a", ["b"]) ]) :} True >>> :{ ntsProduceNonEmptyLang (conv [ ("start", ["start"]) ]) :} False >>> :{ ntsProduceNonEmptyLang (conv [ ("term", ["id"]), ("term", ["(", "prod", ")"]), ("prod", ["term"]), ("prod", ["prod", "*", "term"]) ]) :} True -} data Result -- | Count of steps is less than 1 = WrongCount -- | Some nonterminal from RHS is not found in LHS | NTNotFound -- | Start nonterminal is not found in LHS | NoStart -- | Some nonterminal generates empty language | EmptyLang -- | The grammar is 100% ambiguous (i. e. the library was able to find ambiguous string) | Ambiguous -- | The grammar seems to be unambiguous (i. e. the library was not able to find ambiguous string after specified number of steps) | SeemsUnambiguous deriving (Eq, Ord, Show) -- | -- Checks grammar for ambiguity (see example above). Before actual ambiguity checking this function checks that every nonterminal generates nonempty language. If some nonterminal generates empty language, this function reports this and doesn't do actual ambiguity checking checkAmbiguity :: (Ord n, Ord t) => Data.Map.Map n [[TerminalOrNonterminal t n]] -- ^ Grammar (see example above) -> n -- ^ Start nonterminal -> Int -- ^ Count of steps. I don't try to document precise meaning of this argument -> Result checkAmbiguity g start count = case count >= 1 of { False -> WrongCount; True -> case toGrammar g of { Nothing -> NTNotFound; Just gg -> case Data.Map.member start g of { False -> NoStart; True -> case ntsProduceNonEmptyLang gg of { False -> EmptyLang; True -> case lowLevelTestAmbiguity gg start count of { LLNoStart -> error "Impossible"; LLAmbiguous -> Ambiguous; LLUnambiguous -> SeemsUnambiguous; }; }; }; }; } {- $ >>> checkAmbiguity (conv2 [("start", [])]) "start" 1 SeemsUnambiguous >>> checkAmbiguity (conv2 [("start", [])]) "start" 0 WrongCount >>> checkAmbiguity (conv2 [("start", [])]) "start" (-1) WrongCount >>> checkAmbiguity (Data.Map.fromList [("start", [[N "a"]])]) "start" 10 NTNotFound >>> :{ let { g :: Data.Map.Map String [[TerminalOrNonterminal String String]] = Data.Map.fromList [("start", [])]; } in checkAmbiguity g "start" 10 :} EmptyLang >>> :{ let { g :: Data.Map.Map String [[TerminalOrNonterminal String String]] = Data.Map.fromList [("start", [[N "a"]]), ("a", [])]; } in checkAmbiguity g "start" 10 :} EmptyLang >>> checkAmbiguity (conv2 []) "start" 10 NoStart >>> checkAmbiguity (conv2 [("a", [])]) "start" 10 NoStart >>> checkAmbiguity (conv2 [("a", [])]) "a" 10 SeemsUnambiguous >>> checkAmbiguity (conv2 [("a", ["b"])]) "a" 10 SeemsUnambiguous >>> :{ checkAmbiguity (conv2 [ ("a", ["b"]), ("a", ["c"]) ]) "a" 10 :} SeemsUnambiguous >>> checkAmbiguity (conv2 [("a", ["a"])]) "a" 10 EmptyLang >>> :{ checkAmbiguity (conv2 [ ("a", ["a"]), ("a", ["b"]) ]) "a" 10 :} Ambiguous >>> checkAmbiguity (conv2 [("a", ["b", "a"])]) "a" 10 EmptyLang >>> :{ checkAmbiguity (conv2 [ ("a", ["b"]), ("a", ["b"]) ]) "a" 10 :} Ambiguous >>> :{ checkAmbiguity (conv2 [ ("a", []), ("a", ["(", "a", ")", "a"]) ]) "a" 10 :} SeemsUnambiguous >>> :{ checkAmbiguity (conv2 [ ("a", []), ("a", ["(", "a", ")"]), ("a", ["a", "a"]) ]) "a" 10 :} Ambiguous >>> :{ checkAmbiguity (conv2 [ ("term", ["id"]), ("term", ["(", "prod", ")"]), ("prod", ["term"]), ("prod", ["prod", "*", "term"]) ]) "prod" 10 :} SeemsUnambiguous >>> :{ checkAmbiguity (conv2 [ ("start", ["a"]), ("start", ["b"]), ("a", ["x"]), ("b", ["x"]) ]) "start" 10 :} Ambiguous >>> :{ checkAmbiguity (conv2 [ ("start", ["a1"]), ("start", ["b"]), ("a1", ["a2"]), ("a2", ["x"]), ("b", ["x"]) ]) "start" 10 :} Ambiguous >>> :{ checkAmbiguity (conv2 [ ("t1000", ["id"]), ("t1000", ["(", "t0", ")"]), ("t999", ["t999", "t1000"]), ("t3", ["t4", "::", "t3"]), ("t3", ["%", "id", "::", "t0", ".", "t3"]), ("t0", ["!!", "id", "::", "t0", ".", "t0"]), ("t1", ["t2", "==>", "t1"]), ("t0", ["t1"]), ("t1", ["t2"]), ("t2", ["t3"]), ("t3", ["t4"]), ("t4", ["t999"]), ("t999", ["t1000"]) ]) "t0" 15 :} SeemsUnambiguous >>> :{ checkAmbiguity (conv2 [ ("t1000", ["id"]), ("t1000", ["(", "t0", ")"]), ("t999", ["t999", "t1000"]), ("t3", ["t4", "::", "t0"]), ("t3", ["%", "id", "::", "t0", ".", "t3"]), ("t0", ["!!", "id", "::", "t0", ".", "t0"]), ("t1", ["t2", "==>", "t1"]), ("t0", ["t1"]), ("t1", ["t2"]), ("t2", ["t3"]), ("t3", ["t4"]), ("t4", ["t999"]), ("t999", ["t1000"]) ]) "t0" 15 :} Ambiguous -} {- $Earley The following two grammars are from https://github.com/ollef/Earley/issues/54 >>> checkAmbiguity (Data.Map.fromList [("r1", [[T "A"], [N "r1"]])]) "r1" 10 Ambiguous >>> checkAmbiguity (Data.Map.fromList [("r1", [[N "r1"], [T "A"]])]) "r1" 10 Ambiguous -}