Safe Haskell | Safe |
---|---|
Language | Haskell2010 |
Synopsis
- data RE c
- empty :: RE c
- eps :: RE c
- char :: c -> RE c
- charRange :: Ord c => c -> c -> RE c
- anyChar :: Bounded c => RE c
- appends :: Eq c => [RE c] -> RE c
- unions :: (Ord c, Enum c, Bounded c) => [RE c] -> RE c
- star :: Ord c => RE c -> RE c
- string :: [c] -> RE c
- nullable :: RE c -> Bool
- derivate :: (Ord c, Enum c, Bounded c) => c -> RE c -> RE c
- transitionMap :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Map (RE c) (SF c (RE c))
- leadingChars :: (Ord c, Enum c, Bounded c) => RE c -> Partition c
- equivalent :: forall c. (Ord c, Enum c, Bounded c) => RE c -> RE c -> Bool
- generate :: (c -> c -> Gen c) -> Int -> RE c -> [[c]]
- isEmpty :: RE c -> Bool
- nullableProof :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Maybe (RE c)
Documentation
Regular expression
Constructors are exposed, but you should use
smart constructors in this module to construct RE
.
The Eq
and Ord
instances are structural.
The Kleene
etc constructors do "weak normalisation", so for values
constructed using those operations Eq
witnesses "weak equivalence".
See equivalent
for regular-expression equivalence.
Structure is exposed in Kleene.RE module but consider constructors as
half-internal. There are soft-invariants, but violating them shouldn't
break anything in the package. (e.g. transitionMap
will eventually
terminate, but may create more redundant states if starting regexp is not
"weakly normalised").
REChars (RSet c) | Single character |
REAppend [RE c] | Concatenation |
REUnion (RSet c) (Set (RE c)) | Union |
REStar (RE c) | Kleene star |
Instances
ToLatin1 RE Source # | |
(Ord c, Enum c, Bounded c) => Complement c (RE c) Source # | |
Defined in Kleene.DFA complement :: RE c -> RE c Source # | |
(Ord c, Enum c, Bounded c) => TransitionMap c (RE c) Source # | |
Defined in Kleene.Internal.RE | |
(Ord c, Enum c, Bounded c) => Equivalent c (RE c) Source # | |
Defined in Kleene.Internal.RE | |
(Ord c, Enum c, Bounded c) => Match c (RE c) Source # | |
(Ord c, Enum c, Bounded c) => Derivate c (RE c) Source # | |
(Ord c, Enum c, Bounded c) => FiniteKleene c (RE c) Source # | |
(Ord c, Enum c, Bounded c) => CharKleene c (RE c) Source # | |
Eq c => Eq (RE c) Source # | |
Ord c => Ord (RE c) Source # | |
Show c => Show (RE c) Source # | |
c ~ Char => IsString (RE c) Source # | |
Defined in Kleene.Internal.RE fromString :: String -> RE c # | |
Eq c => Semigroup (RE c) Source # | |
Eq c => Monoid (RE c) Source # | |
(Ord c, Enum c, Bounded c, Arbitrary c) => Arbitrary (RE c) Source # | |
CoArbitrary c => CoArbitrary (RE c) Source # | |
Defined in Kleene.Internal.RE coarbitrary :: RE c -> Gen b -> Gen b # | |
(Ord c, Enum c, Bounded c) => Lattice (RE c) Source # | WARNING: The
|
(Ord c, Enum c, Bounded c) => BoundedJoinSemiLattice (RE c) Source # | |
Defined in Kleene.DFA | |
(Ord c, Enum c, Bounded c) => BoundedMeetSemiLattice (RE c) Source # | |
Defined in Kleene.DFA | |
c ~ Char => Pretty (RE c) Source # | |
(Ord c, Enum c, Bounded c) => Kleene (RE c) Source # | |
Construction
Binary operators are
<>
for append\/
for union
Empty regex. Doesn't accept anything.
>>>
putPretty (empty :: RE Char)
^[]$
>>>
putPretty (bottom :: RE Char)
^[]$
match (empty :: RE Char) (s :: String) === False
Empty string. Note: different than empty
.
>>>
putPretty eps
^$
>>>
putPretty (mempty :: RE Char)
^$
match (eps :: RE Char) s === null (s :: String)
anyChar :: Bounded c => RE c Source #
Any character. Note: different than dot!
>>>
putPretty anyChar
^[^]$
appends :: Eq c => [RE c] -> RE c Source #
Concatenate regular expressions.
(asREChar r <> s) <> t === r <> (s <> t)
asREChar r <> empty === empty
empty <> asREChar r === empty
asREChar r <> eps === r
eps <> asREChar r === r
unions :: (Ord c, Enum c, Bounded c) => [RE c] -> RE c Source #
Union of regular expressions.
asREChar r \/ r === r
asREChar r \/ s === s \/ r
(asREChar r \/ s) \/ t === r \/ (s \/ t)
empty \/ asREChar r === r
asREChar r \/ empty === r
everything \/ asREChar r === everything
asREChar r \/ everything === everything
star :: Ord c => RE c -> RE c Source #
Kleene star.
star (star r) === star (asREChar r)
star eps === asREChar eps
star empty === asREChar eps
star anyChar === asREChar everything
star (r \/ eps) === star (asREChar r)
star (char c \/ eps) === star (asREChar (char c))
star (empty \/ eps) === asREChar eps
string :: [c] -> RE c Source #
Literal string.
>>>
putPretty ("foobar" :: RE Char)
^foobar$
>>>
putPretty ("(.)" :: RE Char)
^\(\.\)$
Derivative
nullable :: RE c -> Bool Source #
We say that a regular expression r is nullable if the language it defines contains the empty string.
>>>
nullable eps
True
>>>
nullable (star "x")
True
>>>
nullable "foo"
False
derivate :: (Ord c, Enum c, Bounded c) => c -> RE c -> RE c Source #
Intuitively, the derivative of a language \(\mathcal{L} \subset \Sigma^\star\) with respect to a symbol \(a \in \Sigma\) is the language that includes only those suffixes of strings with a leading symbol \(a\) in \(\mathcal{L}\).
>>>
putPretty $ derivate 'f' "foobar"
^oobar$
>>>
putPretty $ derivate 'x' $ "xyz" \/ "abc"
^yz$
>>>
putPretty $ derivate 'x' $ star "xyz"
^yz(xyz)*$
Transition map
transitionMap :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Map (RE c) (SF c (RE c)) Source #
Transition map. Used to construct DFA
.
>>>
void $ Map.traverseWithKey (\k v -> putStrLn $ pretty k ++ " : " ++ SF.showSF (fmap pretty v)) $ transitionMap ("ab" :: RE Char)
^[]$ : \_ -> "^[]$" ^b$ : \x -> if | x <= 'a' -> "^[]$" | x <= 'b' -> "^$" | otherwise -> "^[]$" ^$ : \_ -> "^[]$" ^ab$ : \x -> if | x <= '`' -> "^[]$" | x <= 'a' -> "^b$" | otherwise -> "^[]$"
leadingChars :: (Ord c, Enum c, Bounded c) => RE c -> Partition c Source #
Leading character sets of regular expression.
>>>
leadingChars "foo"
fromSeparators "ef"
>>>
leadingChars (star "b" <> star "e")
fromSeparators "abde"
>>>
leadingChars (charRange 'b' 'z')
fromSeparators "az"
Equivalence
equivalent :: forall c. (Ord c, Enum c, Bounded c) => RE c -> RE c -> Bool Source #
Whether two regexps are equivalent.
equivalent
re1 re2 = forall s.match
re1 s ===match
re1 s
>>>
let re1 = star "a" <> "a"
>>>
let re2 = "a" <> star "a"
These are different regular expressions, even we perform some normalisation-on-construction:
>>>
re1 == re2
False
They are however equivalent:
>>>
equivalent re1 re2
True
The algorithm works by executing states
on "product" regexp,
and checking whether all resulting states are both accepting or rejecting.
re1 == re2 ==> equivalent
re1 re2
More examples
>>>
let example re1 re2 = putPretty re1 >> putPretty re2 >> return (equivalent re1 re2)
>>>
example re1 re2
^a*a$ ^aa*$ True
>>>
example (star "aa") (star "aaa")
^(aa)*$ ^(aaa)*$ False
>>>
example (star "aa" <> star "aaa") (star "aaa" <> star "aa")
^(aa)*(aaa)*$ ^(aaa)*(aa)*$ True
>>>
example (star ("a" \/ "b")) (star $ star "a" <> star "b")
^[a-b]*$ ^(a*b*)*$ True
Generation
Generate random strings of the language RE c
describes.
>>>
let example = traverse_ print . take 3 . generate (curry QC.choose) 42
>>>
example "abc"
"abc" "abc" "abc"
>>>
example $ star $ "a" \/ "b"
"aaaaba" "bbba" "abbbbaaaa"
>>>
example empty
all (match r) $ take 10 $ generate (curry QC.choose) 42 (r :: RE Char)
Other
nullableProof :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Maybe (RE c) Source #
Not only we can decide whether RE
is nullable, we can also
remove the empty string:
>>>
putPretty $ nullableProof eps
^[]$
>>>
putPretty $ nullableProof $ star "x"
^xx*$
>>>
putPretty $ nullableProof "foo"
Nothing
nullableProof
is consistent with nullable
:
isJust (nullableProof r) === nullable (asREChar r)
The returned regular expression is not nullable:
maybe True (not . nullable) $ nullableProof $ asREChar r
If we union with empty regex, we get a equivalent regular expression we started with:
maybe r (eps \/) (nullableProof r) `equivalent` (asREChar r)