Z-Data-0.6.0.0: Array, vector and text
Copyright(c) Dong Han 2017-2018
LicenseBSD
Maintainerwinterland1989@gmail.com
Stabilityexperimental
Portabilitynon-portable
Safe HaskellNone
LanguageHaskell2010

Z.Data.Text.Regex

Description

Binding to google's RE2, microsoft did a nice job on RE2 regex syntaxs: https://docs.microsoft.com/en-us/deployedge/edge-learnmore-regex. Note GHC string literals need \ to be escaped, e.g.

>>> match (regex "([a-z0-9_\\.-]+)@([\\da-z\\.-]+)\\.([a-z\\.]{2,6})") "please end email to hello@world.com, foo@bar.com"
>>> ("hello@world.com",[Just "hello",Just "world",Just "com"],", foo@bar.com")
Synopsis

RE2 regex

data Regex Source #

A compiled RE2 regex.

Instances

Instances details
Show Regex Source # 
Instance details

Defined in Z.Data.Text.Regex

Methods

showsPrec :: Int -> Regex -> ShowS #

show :: Regex -> String #

showList :: [Regex] -> ShowS #

Generic Regex Source # 
Instance details

Defined in Z.Data.Text.Regex

Associated Types

type Rep Regex :: Type -> Type #

Methods

from :: Regex -> Rep Regex x #

to :: Rep Regex x -> Regex #

Print Regex Source # 
Instance details

Defined in Z.Data.Text.Regex

Methods

toUTF8BuilderP :: Int -> Regex -> Builder () Source #

type Rep Regex Source # 
Instance details

Defined in Z.Data.Text.Regex

type Rep Regex = D1 ('MetaData "Regex" "Z.Data.Text.Regex" "Z-Data-0.6.0.0-6EPTULxocQtBtAKoZfHPhU" 'False) (C1 ('MetaCons "Regex" 'PrefixI 'True) (S1 ('MetaSel ('Just "regexPtr") 'SourceUnpack 'SourceStrict 'DecidedStrict) (Rec0 (ForeignPtr Regex)) :*: (S1 ('MetaSel ('Just "regexCaptureNum") 'SourceUnpack 'SourceStrict 'DecidedStrict) (Rec0 Int) :*: S1 ('MetaSel ('Just "regexPattern") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Text))))

regex :: HasCallStack => Text -> Regex Source #

Compile a regex pattern, throw InvalidRegexPattern in case of illegal patterns.

data RegexOpts Source #

RE2 Regex options.

The options are (defaultRegexOpts in parentheses):

  posix_syntax     (false) restrict regexps to POSIX egrep syntax
  longest_match    (false) search for longest match, not first match
  log_errors       (true)  log syntax and execution errors to ERROR
  max_mem          (8<<20)  approx. max memory footprint of RE2
  literal          (false) interpret string as literal, not regexp
  never_nl         (false) never match \n, even if it is in regexp
  dot_nl           (false) dot matches everything including new line
  never_capture    (false) parse all parens as non-capturing
  case_sensitive   (true)  match is case-sensitive (regexp can override
                             with (?i) unless in posix_syntax mode)

The following options are only consulted when posix_syntax == true. When posix_syntax == false, these features are always enabled and cannot be turned off; to perform multi-line matching in that case, begin the regexp with (?m).

  perl_classes     (false) allow Perl's \d \s \w \D \S \W
  word_boundary    (false) allow Perl's \b \B (word boundary and not)
  one_line         (false) ^ and $ only match beginning and end of text

Instances

Instances details
Eq RegexOpts Source # 
Instance details

Defined in Z.Data.Text.Regex

Ord RegexOpts Source # 
Instance details

Defined in Z.Data.Text.Regex

Show RegexOpts Source # 
Instance details

Defined in Z.Data.Text.Regex

Generic RegexOpts Source # 
Instance details

Defined in Z.Data.Text.Regex

Associated Types

type Rep RegexOpts :: Type -> Type #

Print RegexOpts Source # 
Instance details

Defined in Z.Data.Text.Regex

type Rep RegexOpts Source # 
Instance details

Defined in Z.Data.Text.Regex

defaultRegexOpts :: RegexOpts Source #

Default regex options, see RegexOpts.

regexOpts :: HasCallStack => RegexOpts -> Text -> Regex Source #

Compile a regex pattern withOptions, throw InvalidRegexPattern in case of illegal patterns.

escape :: Text -> Text Source #

Escape a piece of text literal so that it can be safely used in regex pattern.

>>> escape "(\\d+)"
>>> "\\(\\\\d\\+\\)"

regexCaptureNum :: Regex -> Int Source #

capturing group number(including \0)

regexPattern :: Regex -> Text Source #

Get back regex's pattern.

regex operations

test :: Regex -> Text -> Bool Source #

Check if text matched regex pattern.

match :: Regex -> Text -> (Text, [Maybe Text], Text) Source #

Check if text matched regex pattern, if so return matched part, all capturing groups(from \1) and the text after matched part.

Nothing indicate a non-matching capturing group, e.g.

>>> match (regex "(foo)|(bar)baz") "barbazbla"
>>> ("barbaz",[Nothing,Just "bar"], "bla")

replace Source #

Arguments

:: Regex 
-> Bool

globally replace?

-> Text

input

-> Text

rewrite

-> Text 

Replace matched part in input with a rewrite pattern. If no matched part found, return the original input.

>>> replace (regex "red") False "A red fox with red fur" "yellow"
>>> "A yellow fox with red fur"
>>> replace (regex "red") True  "A red fox with red fur" "yellow"
>>> "A yellow fox with yellow fur"

extract Source #

Arguments

:: Regex 
-> Text

input

-> Text

extract

-> Text 

Extract capturing group to an extract pattern. If no matched capturing group found, return an empty string.

>>> extract (regex "(\\d{4})-(\\d{2})-(\\d{2})") "Today is 2020-12-15" "month: \\2, date: \\3"
>>> "month: 12, date: 15"