Copyright | © 2015–2017 Megaparsec contributors © 2007 Paolo Martini © 1999–2001 Daan Leijen |
---|---|
License | FreeBSD |
Maintainer | Mark Karpov <markkarpov92@gmail.com> |
Stability | experimental |
Portability | non-portable |
Safe Haskell | None |
Language | Haskell2010 |
Commonly used character parsers.
- newline :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- crlf :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s)
- eol :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s)
- tab :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- space :: (MonadParsec e s m, Token s ~ Char) => m ()
- space1 :: (MonadParsec e s m, Token s ~ Char) => m ()
- controlChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- spaceChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- upperChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- lowerChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- letterChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- printChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- digitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- markChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- numberChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- symbolChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- separatorChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- asciiChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- latin1Char :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
- charCategory :: (MonadParsec e s m, Token s ~ Char) => GeneralCategory -> m (Token s)
- categoryName :: GeneralCategory -> String
- char :: MonadParsec e s m => Token s -> m (Token s)
- char' :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
- anyChar :: MonadParsec e s m => m (Token s)
- notChar :: MonadParsec e s m => Token s -> m (Token s)
- oneOf :: (Foldable f, MonadParsec e s m) => f (Token s) -> m (Token s)
- noneOf :: (Foldable f, MonadParsec e s m) => f (Token s) -> m (Token s)
- satisfy :: MonadParsec e s m => (Token s -> Bool) -> m (Token s)
- string :: MonadParsec e s m => Tokens s -> m (Tokens s)
- string' :: (MonadParsec e s m, FoldCase (Tokens s)) => Tokens s -> m (Tokens s)
Simple parsers
crlf :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s) Source #
Parse a carriage return character followed by a newline character. Return the sequence of characters parsed.
Categories of characters
controlChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a control character (a non-printing character of the Latin-1 subset of Unicode).
spaceChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a Unicode space character, and the control characters: tab, newline, carriage return, form feed, and vertical tab.
upperChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse an upper-case or title-case alphabetic Unicode character. Title case is used by a small number of letter ligatures like the single-character form of Lj.
lowerChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a lower-case alphabetic Unicode character.
letterChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse an alphabetic Unicode character: lower-case, upper-case, or title-case letter, or a letter of case-less scripts/modifier letter.
alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse an alphabetic or numeric digit Unicode characters.
Note that the numeric digits outside the ASCII range are parsed by this
parser but not by digitChar
. Such digits may be part of identifiers but
are not used by the printer and reader to represent numbers.
printChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a printable Unicode character: letter, number, mark, punctuation, symbol or space.
digitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse an ASCII digit, i.e between “0” and “9”.
octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse an octal digit, i.e. between “0” and “7”.
hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a hexadecimal digit, i.e. between “0” and “9”, or “a” and “f”, or “A” and “F”.
markChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a Unicode mark character (accents and the like), which combines with preceding characters.
numberChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a Unicode numeric character, including digits from various scripts, Roman numerals, etc.
punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a Unicode punctuation character, including various kinds of connectors, brackets and quotes.
symbolChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a Unicode symbol characters, including mathematical and currency symbols.
separatorChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a Unicode space and separator characters.
asciiChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a character from the first 128 characters of the Unicode character set, corresponding to the ASCII character set.
latin1Char :: (MonadParsec e s m, Token s ~ Char) => m (Token s) Source #
Parse a character from the first 256 characters of the Unicode character set, corresponding to the ISO 8859-1 (Latin-1) character set.
charCategory :: (MonadParsec e s m, Token s ~ Char) => GeneralCategory -> m (Token s) Source #
parses character in Unicode General Category
charCategory
catcat
, see GeneralCategory
.
categoryName :: GeneralCategory -> String Source #
Return the human-readable name of Unicode General Category.
More general parsers
char :: MonadParsec e s m => Token s -> m (Token s) Source #
parses a single character char
cc
.
semicolon = char ';'
char' :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s) Source #
The same as char
but case-insensitive. This parser returns the
actually parsed character preserving its case.
>>>
parseTest (char' 'e') "E"
'E'>>>
parseTest (char' 'e') "G"
1:1: unexpected 'G' expecting 'E' or 'e'
anyChar :: MonadParsec e s m => m (Token s) Source #
This parser succeeds for any character. Returns the parsed character.
notChar :: MonadParsec e s m => Token s -> m (Token s) Source #
Match any character but the given one. It's a good idea to attach a
label
to this parser manually.
Since: 6.0.0
oneOf :: (Foldable f, MonadParsec e s m) => f (Token s) -> m (Token s) Source #
succeeds if the current character is in the supplied
collection of characters oneOf
cscs
. Returns the parsed character. Note that
this parser cannot automatically generate the “expected” component of
error message, so usually you should label it manually with label
or
(<?>
).
See also: satisfy
.
digit = oneOf ['0'..'9'] <?> "digit"
Performance note: prefer satisfy
when you can because it's faster
when you have only a couple of tokens to compare to:
quoteFast = satisfy (\x -> x == '\'' || x == '\"') quoteSlow = oneOf "'\""
noneOf :: (Foldable f, MonadParsec e s m) => f (Token s) -> m (Token s) Source #
As the dual of oneOf
,
succeeds if the current character
not in the supplied list of characters noneOf
cscs
. Returns the parsed
character. Note that this parser cannot automatically generate the
“expected” component of error message, so usually you should label it
manually with label
or (<?>
).
See also: satisfy
.
Performance note: prefer satisfy
and notChar
when you can because
it's faster.
:: MonadParsec e s m | |
=> (Token s -> Bool) | Predicate to apply |
-> m (Token s) |