Portability | portable |
---|---|
Stability | stable |
Maintainer | Uwe Schmidt <uwe@fh-wedel.de> |
Safe Haskell | Safe-Inferred |
W3C XML Schema Regular Expression Matcher
Grammar can be found under http://www.w3.org/TR/xmlschema11-2/#regexs
- data GenRegex s
- mkZero :: s -> GenRegex s
- mkZero' :: StringLike s => String -> GenRegex s
- mkUnit :: GenRegex s
- mkSym :: StringLike s => CharSet -> GenRegex s
- mkSym1 :: StringLike s => Char -> GenRegex s
- mkSymRng :: StringLike s => Char -> Char -> GenRegex s
- mkWord :: StringLike s => [Char] -> GenRegex s
- mkDot :: GenRegex s
- mkStar :: StringLike s => GenRegex s -> GenRegex s
- mkAll :: StringLike s => GenRegex s
- mkAlt :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s
- mkElse :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s
- mkSeq :: GenRegex s -> GenRegex s -> GenRegex s
- mkSeqs :: [GenRegex s] -> GenRegex s
- mkRep :: StringLike s => Int -> GenRegex s -> GenRegex s
- mkRng :: StringLike s => Int -> Int -> GenRegex s -> GenRegex s
- mkOpt :: StringLike s => GenRegex s -> GenRegex s
- mkDiff :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s
- mkIsect :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s
- mkExor :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s
- mkInterleave :: GenRegex s -> GenRegex s -> GenRegex s
- mkCompl :: StringLike s => GenRegex s -> GenRegex s
- mkBr :: s -> GenRegex s -> GenRegex s
- mkBr' :: StringLike s => String -> GenRegex s -> GenRegex s
- isZero :: GenRegex s -> Bool
- errRegex :: StringLike s => GenRegex s -> s
- nullable :: StringLike s => GenRegex s -> Bool
- nullable' :: StringLike s => GenRegex s -> Nullable s
- delta1 :: StringLike s => Char -> s -> GenRegex s -> GenRegex s
- delta :: StringLike s => s -> GenRegex s -> GenRegex s
- firstChars :: StringLike s => GenRegex s -> CharSet
- matchWithRegex :: StringLike s => GenRegex s -> s -> Bool
- matchWithRegex' :: StringLike s => GenRegex s -> s -> Maybe (SubexResults s)
- splitWithRegex :: StringLike s => GenRegex s -> s -> Maybe (SubexResults s, s)
- splitWithRegex' :: StringLike s => GenRegex s -> s -> Maybe (GenRegex s, s)
- splitWithRegexCS :: StringLike s => GenRegex s -> CharSet -> s -> Maybe (SubexResults s, s)
- splitWithRegexCS' :: StringLike s => GenRegex s -> CharSet -> s -> Maybe (GenRegex s, s)
Documentation
Eq s => Eq (GenRegex s) | |
Ord s => Ord (GenRegex s) | |
StringLike s => Show (GenRegex s) |
mkZero :: s -> GenRegex sSource
construct the r.e. for the empty set. An (error-) message may be attached
mkZero' :: StringLike s => String -> GenRegex sSource
mkSym :: StringLike s => CharSet -> GenRegex sSource
construct the r.e. for a set of chars
mkSym1 :: StringLike s => Char -> GenRegex sSource
construct an r.e. for a single char set
mkSymRng :: StringLike s => Char -> Char -> GenRegex sSource
construct an r.e. for an intervall of chars
mkWord :: StringLike s => [Char] -> GenRegex sSource
mkSym generaized for strings
mkStar :: StringLike s => GenRegex s -> GenRegex sSource
construct r.e. for r*
mkAll :: StringLike s => GenRegex sSource
construct an r.e. for the set of all Unicode words
mkElse :: StringLike s => GenRegex s -> GenRegex s -> GenRegex sSource
construct the r.e. for r1{|}r2 (r1 orElse r2).
This represents the same r.e. as r1|r2, but when collecting the results of subexpressions in (...) and r1 succeeds, the subexpressions of r2 are discarded, so r1 matches are prioritized
example
splitSubex "({1}x)|({2}.)" "x" = ([("1","x"),("2","x")], "") splitSubex "({1}x){|}({2}.)" "x" = ([("1","x")], "")
mkRep :: StringLike s => Int -> GenRegex s -> GenRegex sSource
Construct repetition r{i,}
mkRng :: StringLike s => Int -> Int -> GenRegex s -> GenRegex sSource
Construct range r{i,j}
mkOpt :: StringLike s => GenRegex s -> GenRegex sSource
Construct option r?
mkDiff :: StringLike s => GenRegex s -> GenRegex s -> GenRegex sSource
Construct difference r.e.: r1 {\} r2
example
match "[a-z]+{\\}bush" "obama" = True match "[a-z]+{\\}bush" "clinton" = True match "[a-z]+{\\}bush" "bush" = False -- not important any more
mkIsect :: StringLike s => GenRegex s -> GenRegex s -> GenRegex sSource
Construct r.e. for intersection: r1 {&} r2
example
match ".*a.*{&}.*b.*" "-a-b-" = True match ".*a.*{&}.*b.*" "-b-a-" = True match ".*a.*{&}.*b.*" "-a-a-" = False match ".*a.*{&}.*b.*" "---b-" = False
mkExor :: StringLike s => GenRegex s -> GenRegex s -> GenRegex sSource
Construct r.e. for exclusive or: r1 {^} r2
example
match "[a-c]+{^}[c-d]+" "abc" = True match "[a-c]+{^}[c-d]+" "acdc" = False match "[a-c]+{^}[c-d]+" "ccc" = False match "[a-c]+{^}[c-d]+" "cdc" = True
mkInterleave :: GenRegex s -> GenRegex s -> GenRegex sSource
mkCompl :: StringLike s => GenRegex s -> GenRegex sSource
Construct the Complement of an r.e.: whole set of words - r
mkBr' :: StringLike s => String -> GenRegex s -> GenRegex sSource
errRegex :: StringLike s => GenRegex s -> sSource
nullable :: StringLike s => GenRegex s -> BoolSource
nullable' :: StringLike s => GenRegex s -> Nullable sSource
delta1 :: StringLike s => Char -> s -> GenRegex s -> GenRegex sSource
delta :: StringLike s => s -> GenRegex s -> GenRegex sSource
firstChars :: StringLike s => GenRegex s -> CharSetSource
FIRST for regular expressions
this is only an approximation, the real set of char may be smaller, when the expression contains intersection, set difference or exor operators
matchWithRegex :: StringLike s => GenRegex s -> s -> BoolSource
matchWithRegex' :: StringLike s => GenRegex s -> s -> Maybe (SubexResults s)Source
splitWithRegex :: StringLike s => GenRegex s -> s -> Maybe (SubexResults s, s)Source
This function wraps the whole regex in a subexpression before starting the parse. This is done for getting access to the whole parsed string. Therfore we need one special label, this label is the Nothing value, all explicit labels are Just labels.
splitWithRegex' :: StringLike s => GenRegex s -> s -> Maybe (GenRegex s, s)Source
The main scanner function
splitWithRegexCS :: StringLike s => GenRegex s -> CharSet -> s -> Maybe (SubexResults s, s)Source
splitWithRegexCS' :: StringLike s => GenRegex s -> CharSet -> s -> Maybe (GenRegex s, s)Source
speedup version for splitWithRegex'
This function checks whether the input starts with a char from FIRST re. If this is not the case, the split fails. The FIRST set can be computed once for a whole tokenizer and reused by every call of split