{-- Parsers form a semigroup in two ways: Given parsers (p :: Parser s) and (q :: Parser s) where s is the type of input stream (or any other semigroup) there is the concatenation p <> q = do x <- p y <- q return (x<>y) as well as the (backtracking) choice, try p <|> q. In this test suite we test whether the ParsecT parser provided by megaparsec and the SimpleParser provided in this package behave identically under these two operations. To this end, we randomly generate languages and parsers for them and test both parsers on the words of the language. --} {-# LANGUAGE FlexibleContexts, FlexibleInstances, TypeFamilies, OverloadedStrings, DeriveGeneric, DeriveFunctor, RankNTypes, ScopedTypeVariables #-} import Text.Megaparsec import Text.Megaparsec.Simple import Data.Void (Void) import Data.String (IsString(..)) import Data.Text (Text) import qualified Data.Text as T import Data.Maybe (isJust) import Data.List (transpose) import Control.Applicative (liftA2) import GHC.Generics (Generic) import Test.QuickCheck (Arbitrary(..)) import qualified Test.QuickCheck as Q import Data.Set (Set) import qualified Data.Set as Set main :: IO () main = do putStrLn "\ntesting whether the fast parser accepts words of the language" Q.quickCheck (prop_AcceptsLang Fast) putStrLn "testing whether the fast parser accepts the entire language" Q.quickCheck (prop_AcceptsAllWords Fast) putStrLn "testing whether parsers accept the same words" Q.quickCheck (prop_AcceptSameWords) putStrLn "testing whether lookAhead behaves identically" Q.quickCheck prop_LookAhead putStrLn "testing whether takeWhileP behaves identically" Q.quickCheck prop_takeWhileP putStrLn "testing whether takeWhile1P behaves identically" Q.quickCheck prop_takeWhile1P putStrLn "testing whether takeP behaves identically" Q.quickCheck prop_takeP data WhichParser = Fast | Ordinary -- * Parsing languages -- | The language is designed to test the basic -- parser operations concatenation and choice. data Language x = Word String | Choice x (Language x) (Language x) | Concat x (Language x) (Language x) deriving (Show,Eq,Ord,Generic,Functor) instance IsString (Language x) where fromString = Word instance Semigroup (Language ()) where (<>) = Concat () instance Semigroup (Language (Set Text)) where (Word w1) <> (Word w2) = Word (w1<>w2) x@(Word w) <> y@(Choice ws _ _) = let pfx = fromString w in Concat (Set.mapMonotonic (pfx<>) ws) x y x@(Word w) <> y@(Concat ws _ _) = let pfx = fromString w in Concat (Set.mapMonotonic (pfx<>) ws) x y x <> y = let ws = Set.map (uncurry (<>)) (Set.cartesianProduct (allWords x) (allWords y)) in Concat ws x y class Monad m => NonDeterministic m where nonDeterministically :: [m a] -> m a instance NonDeterministic [] where nonDeterministically = concat . transpose -- can enumerate finite choice of infinite lists instance NonDeterministic Q.Gen where nonDeterministically = Q.oneof class Conditionable m where suchThat :: m a -> (a -> Bool) -> m a instance Conditionable [] where suchThat = flip filter instance Conditionable Q.Gen where suchThat = Q.suchThat instance Conditionable Set where suchThat = flip Set.filter infixl 3 <||> -- | The choice operator of 'Language's (<||>) :: Language (Set Text) -> Language (Set Text) -> Language (Set Text) x <||> y = Choice (choiceWords (allWords x) (allWords y)) x y -- |Even with backtracking, -- a parser may fail to recognize a word of the language -- if the choices are ordered in a way such that -- an earlier choice contains a proper prefix of a later choice. -- Consider for example the regular expression -- -- @ -- (a|ab)c -- @ -- -- and the word @abc@ which is not accepted by the parser -- -- @ -- (try (chunk "a") <|> chunk "ab") <> chunk "c" -- @ -- -- but is accepted by the parser -- -- @ -- (try (chunk "ab") <|> chunk "a") <> chunk "c" -- @ choiceWords :: Set Text -> Set Text -> Set Text choiceWords left right = Set.union left (right `suchThat` notSuffixOf left) where notSuffixOf earlier = \w -> not (any (\a -> a `T.isPrefixOf` w) earlier) genWords :: NonDeterministic gen => Language (Set Text) -> gen Text genWords = nonDeterministically . fmap pure . Set.toList . allWords -- | We record the set of words in the constructor. allWords :: Language (Set Text) -> Set Text allWords (Word w) = Set.singleton (fromString w) allWords (Choice ws _ _) = ws allWords (Concat ws _ _) = ws -- ** non-deterministic language generation -- non-deterministically generate a language -- -- >>> mapM_ print $ fmap (const ()) $ genLanguage ["Foo","Bar"] 1 -- >>> Q.sample' (arbitrary :: Q.Gen (Language (Set Text))) >>= (print.fmap (const ()).head) genLanguage :: NonDeterministic gen => gen String -> Int -> gen (Language (Set Text)) genLanguage genWord = let sizedLang = \size -> if size <= 0 then fmap Word genWord else let lang' = sizedLang (size `div` 2) in nonDeterministically [ fmap Word genWord, liftA2 (<>) lang' lang', liftA2 (<||>) lang' lang' ] in sizedLang genAlphaChar :: NonDeterministic gen => gen Char genAlphaChar = nonDeterministically [return c | c <- ['a'..'z']] genWordQ :: Q.Gen String genWordQ = fmap pure genAlphaChar -- use single-letter words as building blocks -- genWordQ = let g = genAlphaChar in liftA2 (:) (fmap toUpper g) (Q.listOf g) instance Arbitrary (Language (Set Text)) where arbitrary = Q.sized (genLanguage genWordQ) shrink (Word _) = [] shrink (Concat _ lang1 lang2) = [lang1,lang2] ++ [x <> y | (x,y) <- shrink (lang1,lang2)] shrink (Choice _ lang1 lang2) = [lang1,lang2] ++ [x <||> y | (x,y) <- shrink (lang1,lang2)] -- maximum word length maxWord :: Language (Set Text) -> Int maxWord = Set.foldl' (\imum w -> max imum (T.length w)) 0 . allWords -- ** checking the behaviour of parsers genParser :: MonadParsec Void Text p => Language x -> p Text genParser (Word txt) = chunk (fromString txt) genParser (Choice _ x y) = try (genParser x) <|> genParser y -- backtracking choice genParser (Concat _ x y) = liftA2 (<>) (genParser x) (genParser y) -- Tests whether 'genParser' accepts all words generated by 'genWords'. -- This can take loooong if the language is large. prop_AcceptsAllWords :: WhichParser -> Language (Set Text)-> Bool prop_AcceptsAllWords which lang = all acceptsWord (allWords lang) where acceptsWord :: Text -> Bool acceptsWord w = isJust (case which of Ordinary -> rightToMaybe (runParser (genParser lang <* eof :: Parsec Void Text Text) "test word" w) Fast -> simpleParse (genParser lang <* eof) w) -- generates random words of the language and checks whether -- the language parser accepts the word. prop_AcceptsLang :: WhichParser -> Language (Set Text) -> Q.Property prop_AcceptsLang which lang = let parseWord = case which of Ordinary -> rightToMaybe . runParser (genParser lang <* eof :: Parsec Void Text Text) "test word" Fast -> simpleParse (genParser lang <* eof) in Q.forAll (genWords lang) (\w -> isJust (parseWord w)) -- generates random words -- and checks whether both the fast and ordinary parsers -- accept them or not. prop_AcceptSameWords :: Language (Set Text) -> Q.Property prop_AcceptSameWords lang = let p1 = rightToMaybe . runParser (genParser lang :: Parsec Void Text Text) "test word" p2 = simpleParse (genParser lang) maxLen = maxWord lang in Q.forAll (fmap fromString (genWordQ `Q.suchThat` ((maxLen >=).length))) (\w -> isJust (p1 w) Q.=== isJust (p2 w)) -- * Testing the MonadParsec primitives rightToMaybe :: Either a b -> Maybe b rightToMaybe = either (const Nothing) Just simpleParse :: SimpleParser s a -> s -> Maybe a simpleParse p = fmap fst . runSimpleParser p type GenericParser a = forall p. MonadParsec Void Text p => p a behaveEquallyOn :: forall a. (Show a, Eq a) => GenericParser a -> Text -> Q.Property behaveEquallyOn p = let run1 = rightToMaybe . runParser (p :: Parsec Void Text a) "input" run2 = simpleParse p in \input -> run1 input Q.=== run2 input genShortText :: Q.Gen Text genShortText = Q.sized gen where gen size = if size <= 0 then fmap T.singleton genAlphaChar else let size' = size `div` 2 in liftA2 (<>) (gen size') (gen size') {-- newtype AlphaText = Alpha {getAlpha :: Text} instance Arbitrary AlphaText where arbitrary = fmap Alpha genShortText shrink = const [Alpha mempty] --} prop_LookAhead :: Q.Property prop_LookAhead = Q.forAll genShortText $ \needle -> Q.forAll genShortText $ \haystack -> lookAhead (chunk needle) `behaveEquallyOn` haystack prop_takeWhileP :: Q.Property prop_takeWhileP = Q.forAllBlind arbitrary $ \predicate -> Q.forAll genShortText $ \input -> (takeWhileP Nothing predicate) `behaveEquallyOn` input prop_takeWhile1P :: Q.Property prop_takeWhile1P = Q.forAllBlind arbitrary $ \predicate -> Q.forAll genShortText $ \input -> (takeWhile1P Nothing predicate) `behaveEquallyOn` input prop_takeP :: Q.Property prop_takeP = Q.forAll arbitrary $ \len -> Q.forAll genShortText $ \input -> (takeP Nothing len) `behaveEquallyOn` input