Safe Haskell	None
Language	Haskell98

Text.Regex.TDFA.CorePattern

Description

The CorePattern module deconstructs the Pattern tree created by ReadRegex.parseRegex and returns a simpler Q/P tree with annotations at each Q node. This will be converted by the TNFA module into a QNFA finite automata.

Of particular note, this Pattern to Q/P conversion creates and assigns all the internal Tags that will be used during the matching process, and associates the captures groups with the tags that represent their starting and ending locations and with their immediate parent group.

Each Maximize and Minimize tag is held as either a preTag or a postTag by one and only one location in the Q/P tree. The Orbit tags are each held by one and only one Star node. Tags that stop a Group are also held in perhaps numerous preReset lists.

The additional nullQ::nullView field of Q records the potentially complex information about what tests and tags must be used if the pattern unQ::P matches 0 zero characters. There can be redundancy in nullView, which is eliminated by cleanNullView.

Uses recursive do notation.

2009 XXX TODO: we can avoid needing tags in the part of the pattern after the last capturing group (when right-associative). This is flipped for left-associative where the front of the pattern before the first capturing group needs no tags. The edge of these regions is subtle: both case needs a Maximize tag. One ought to be able to check the Pattern: if the root is PConcat then a scan from the end (start) looking for the first with an embedded PGroup can be found and the PGroup free elements can be wrapped in some new PNOTAG semantic indicator.

Synopsis

data Q = Q {
- nullQ :: NullView
- takes :: (Position, Maybe Position)
- preReset :: [Tag]
- postSet :: [Tag]
- preTag, postTag :: Maybe Tag
- tagged :: Bool
- childGroups :: Bool
- wants :: Wanted
- unQ :: P
}
data P
- = Empty
- | Or [Q]
- | Seq Q Q
- | Star {
  - getOrbit :: Maybe Tag
  - resetOrbits :: [Tag]
  - firstNull :: Bool
  - unStar :: Q
  }
- | Test TestInfo
- | OneChar Pattern
- | NonEmpty Q
data WhichTest
- = Test_BOL
- | Test_EOL
- | Test_BOB
- | Test_EOB
- | Test_BOW
- | Test_EOW
- | Test_EdgeWord
- | Test_NotEdgeWord
data Wanted
- = WantsQNFA
- | WantsQT
- | WantsBoth
- | WantsEither
type TestInfo = (WhichTest, DoPa)
data OP
- = Maximize
- | Minimize
- | Orbit
- | Ignore
newtype SetTestInfo = SetTestInfo {
- getTests :: EnumMap WhichTest (EnumSet DoPa)
}
type NullView = [(SetTestInfo, TagList)]
patternToQ :: CompOption -> (Pattern, (GroupIndex, DoPa)) -> (Q, Array Tag OP, Array GroupIndex [GroupInfo])
cleanNullView :: NullView -> NullView
cannotAccept :: Q -> Bool
mustAccept :: Q -> Bool

Documentation

data Q Source #

Constructors

Q
Fields nullQ :: NullView takes :: (Position, Maybe Position) preReset :: [Tag] postSet :: [Tag] preTag, postTag :: Maybe Tag tagged :: Bool childGroups :: Bool wants :: Wanted unQ :: P

Instances

Eq Q Source #
Instance details Defined in Text.Regex.TDFA.CorePattern Methods (==) :: Q -> Q -> Bool # (/=) :: Q -> Q -> Bool #
Show Q Source #
Instance details Defined in Text.Regex.TDFA.CorePattern Methods showsPrec :: Int -> Q -> ShowS # show :: Q -> String # showList :: [Q] -> ShowS #

data P Source #

Constructors

Empty
Or [Q]
Seq Q Q
Star
Fields getOrbit :: Maybe Tag resetOrbits :: [Tag] firstNull :: Bool unStar :: Q
Test TestInfo
OneChar Pattern
NonEmpty Q

Instances

Eq P Source #
Instance details Defined in Text.Regex.TDFA.CorePattern Methods (==) :: P -> P -> Bool # (/=) :: P -> P -> Bool #
Show P Source #
Instance details Defined in Text.Regex.TDFA.CorePattern Methods showsPrec :: Int -> P -> ShowS # show :: P -> String # showList :: [P] -> ShowS #

data WhichTest Source #

Known predicates, just Beginning of Line (^) and End of Line ($). Also support for GNU extensions is being added: \` beginning of buffer, \' end of buffer, \< and \> for begin and end of words, \b and \B for word boundary and not word boundary.

Constructors

Test_BOL
Test_EOL
Test_BOB
Test_EOB
Test_BOW
Test_EOW
Test_EdgeWord
Test_NotEdgeWord

Instances

Enum WhichTest Source #
Instance details Defined in Text.Regex.TDFA.Common Methods succ :: WhichTest -> WhichTest # pred :: WhichTest -> WhichTest # toEnum :: Int -> WhichTest # fromEnum :: WhichTest -> Int # enumFrom :: WhichTest -> [WhichTest] # enumFromThen :: WhichTest -> WhichTest -> [WhichTest] # enumFromTo :: WhichTest -> WhichTest -> [WhichTest] # enumFromThenTo :: WhichTest -> WhichTest -> WhichTest -> [WhichTest] #
Eq WhichTest Source #
Instance details Defined in Text.Regex.TDFA.Common Methods (==) :: WhichTest -> WhichTest -> Bool # (/=) :: WhichTest -> WhichTest -> Bool #
Ord WhichTest Source #
Instance details Defined in Text.Regex.TDFA.Common Methods compare :: WhichTest -> WhichTest -> Ordering # (<) :: WhichTest -> WhichTest -> Bool # (<=) :: WhichTest -> WhichTest -> Bool # (>) :: WhichTest -> WhichTest -> Bool # (>=) :: WhichTest -> WhichTest -> Bool # max :: WhichTest -> WhichTest -> WhichTest # min :: WhichTest -> WhichTest -> WhichTest #
Show WhichTest Source #
Instance details Defined in Text.Regex.TDFA.Common Methods showsPrec :: Int -> WhichTest -> ShowS # show :: WhichTest -> String # showList :: [WhichTest] -> ShowS #

data Wanted Source #

Constructors

WantsQNFA
WantsQT
WantsBoth
WantsEither

Instances

Eq Wanted Source #
Instance details Defined in Text.Regex.TDFA.CorePattern Methods (==) :: Wanted -> Wanted -> Bool # (/=) :: Wanted -> Wanted -> Bool #
Show Wanted Source #
Instance details Defined in Text.Regex.TDFA.CorePattern Methods showsPrec :: Int -> Wanted -> ShowS # show :: Wanted -> String # showList :: [Wanted] -> ShowS #

type TestInfo = (WhichTest, DoPa) Source #

data OP Source #

Internal use to indicate type of tag and preference for larger or smaller Positions

Constructors

Maximize
Minimize
Orbit
Ignore

Instances

Eq OP Source #
Instance details Defined in Text.Regex.TDFA.Common Methods (==) :: OP -> OP -> Bool # (/=) :: OP -> OP -> Bool #
Show OP Source #
Instance details Defined in Text.Regex.TDFA.Common Methods showsPrec :: Int -> OP -> ShowS # show :: OP -> String # showList :: [OP] -> ShowS #

newtype SetTestInfo Source #

Constructors

SetTestInfo
Fields getTests :: EnumMap WhichTest (EnumSet DoPa)

Instances

Eq SetTestInfo Source #
Instance details Defined in Text.Regex.TDFA.CorePattern Methods (==) :: SetTestInfo -> SetTestInfo -> Bool # (/=) :: SetTestInfo -> SetTestInfo -> Bool #
Show SetTestInfo Source #
Instance details Defined in Text.Regex.TDFA.CorePattern Methods showsPrec :: Int -> SetTestInfo -> ShowS # show :: SetTestInfo -> String # showList :: [SetTestInfo] -> ShowS #
Semigroup SetTestInfo Source #
Instance details Defined in Text.Regex.TDFA.CorePattern Methods (<>) :: SetTestInfo -> SetTestInfo -> SetTestInfo # sconcat :: NonEmpty SetTestInfo -> SetTestInfo # stimes :: Integral b => b -> SetTestInfo -> SetTestInfo #
Monoid SetTestInfo Source #
Instance details Defined in Text.Regex.TDFA.CorePattern Methods mempty :: SetTestInfo # mappend :: SetTestInfo -> SetTestInfo -> SetTestInfo # mconcat :: [SetTestInfo] -> SetTestInfo #

type NullView = [(SetTestInfo, TagList)] Source #

patternToQ :: CompOption -> (Pattern, (GroupIndex, DoPa)) -> (Q, Array Tag OP, Array GroupIndex [GroupInfo]) Source #

cleanNullView :: NullView -> NullView Source #

cannotAccept :: Q -> Bool Source #

mustAccept :: Q -> Bool Source #