hxt-regex-xmlschema-9.2.0: A regular expression library for W3C XML Schema regular expressions

Portabilityportable
Stabilitystable
MaintainerUwe Schmidt <uwe@fh-wedel.de>
Safe HaskellSafe-Inferred

Text.Regex.XMLSchema.Generic.Regex

Description

W3C XML Schema Regular Expression Matcher

Grammar can be found under http://www.w3.org/TR/xmlschema11-2/#regexs

Synopsis

Documentation

data GenRegex s Source

Instances

Eq s => Eq (GenRegex s) 
Ord s => Ord (GenRegex s) 
StringLike s => Show (GenRegex s) 

mkZero :: s -> GenRegex sSource

construct the r.e. for the empty set. An (error-) message may be attached

mkZero' :: StringLike s => String -> GenRegex sSource

mkUnit :: GenRegex sSource

construct the r.e. for the set containing the empty word

mkSym :: StringLike s => CharSet -> GenRegex sSource

construct the r.e. for a set of chars

mkSym1 :: StringLike s => Char -> GenRegex sSource

construct an r.e. for a single char set

mkSymRng :: StringLike s => Char -> Char -> GenRegex sSource

construct an r.e. for an intervall of chars

mkWord :: StringLike s => [Char] -> GenRegex sSource

mkSym generaized for strings

mkDot :: GenRegex sSource

construct an r.e. for the set of all Unicode chars

mkStar :: StringLike s => GenRegex s -> GenRegex sSource

construct r.e. for r*

mkAll :: StringLike s => GenRegex sSource

construct an r.e. for the set of all Unicode words

mkAlt :: StringLike s => GenRegex s -> GenRegex s -> GenRegex sSource

construct the r.e for r1|r2

mkElse :: StringLike s => GenRegex s -> GenRegex s -> GenRegex sSource

construct the r.e. for r1{|}r2 (r1 orElse r2).

This represents the same r.e. as r1|r2, but when collecting the results of subexpressions in (...) and r1 succeeds, the subexpressions of r2 are discarded, so r1 matches are prioritized

example

 splitSubex "({1}x)|({2}.)"   "x" = ([("1","x"),("2","x")], "")

 splitSubex "({1}x){|}({2}.)" "x" = ([("1","x")], "")

mkSeq :: GenRegex s -> GenRegex s -> GenRegex sSource

Construct the sequence r.e. r1.r2

mkSeqs :: [GenRegex s] -> GenRegex sSource

mkSeq extened to lists

mkRep :: StringLike s => Int -> GenRegex s -> GenRegex sSource

Construct repetition r{i,}

mkRng :: StringLike s => Int -> Int -> GenRegex s -> GenRegex sSource

Construct range r{i,j}

mkOpt :: StringLike s => GenRegex s -> GenRegex sSource

Construct option r?

mkDiff :: StringLike s => GenRegex s -> GenRegex s -> GenRegex sSource

Construct difference r.e.: r1 {\} r2

example

 match "[a-z]+{\\}bush" "obama"     = True
 match "[a-z]+{\\}bush" "clinton"   = True
 match "[a-z]+{\\}bush" "bush"      = False     -- not important any more

mkIsect :: StringLike s => GenRegex s -> GenRegex s -> GenRegex sSource

Construct r.e. for intersection: r1 {&} r2

example

 match ".*a.*{&}.*b.*" "-a-b-"  = True
 match ".*a.*{&}.*b.*" "-b-a-"  = True
 match ".*a.*{&}.*b.*" "-a-a-"  = False
 match ".*a.*{&}.*b.*" "---b-"  = False

mkExor :: StringLike s => GenRegex s -> GenRegex s -> GenRegex sSource

Construct r.e. for exclusive or: r1 {^} r2

example

 match "[a-c]+{^}[c-d]+" "abc"  = True
 match "[a-c]+{^}[c-d]+" "acdc" = False
 match "[a-c]+{^}[c-d]+" "ccc"  = False
 match "[a-c]+{^}[c-d]+" "cdc"  = True

mkCompl :: StringLike s => GenRegex s -> GenRegex sSource

Construct the Complement of an r.e.: whole set of words - r

mkBr :: s -> GenRegex s -> GenRegex sSource

Construct a labeled subexpression: ({label}r)

mkBr' :: StringLike s => String -> GenRegex s -> GenRegex sSource

isZero :: GenRegex s -> BoolSource

nullable' :: StringLike s => GenRegex s -> Nullable sSource

delta1 :: StringLike s => Char -> s -> GenRegex s -> GenRegex sSource

firstChars :: StringLike s => GenRegex s -> CharSetSource

FIRST for regular expressions

this is only an approximation, the real set of char may be smaller, when the expression contains intersection, set difference or exor operators

matchWithRegex' :: StringLike s => GenRegex s -> s -> Maybe (SubexResults s)Source

splitWithRegex :: StringLike s => GenRegex s -> s -> Maybe (SubexResults s, s)Source

This function wraps the whole regex in a subexpression before starting the parse. This is done for getting access to the whole parsed string. Therfore we need one special label, this label is the Nothing value, all explicit labels are Just labels.

splitWithRegex' :: StringLike s => GenRegex s -> s -> Maybe (GenRegex s, s)Source

The main scanner function

splitWithRegexCS :: StringLike s => GenRegex s -> CharSet -> s -> Maybe (SubexResults s, s)Source

splitWithRegexCS' :: StringLike s => GenRegex s -> CharSet -> s -> Maybe (GenRegex s, s)Source

speedup version for splitWithRegex'

This function checks whether the input starts with a char from FIRST re. If this is not the case, the split fails. The FIRST set can be computed once for a whole tokenizer and reused by every call of split