Copyright | © 2015–2017 Megaparsec contributors © 2007 Paolo Martini © 1999–2001 Daan Leijen |
---|---|
License | FreeBSD |
Maintainer | Mark Karpov <markkarpov@opmbx.org> |
Stability | experimental |
Portability | non-portable |
Safe Haskell | None |
Language | Haskell2010 |
Commonly used character parsers.
- newline :: (MonadParsec e s m, Token s ~ Char) => m Char
- crlf :: (MonadParsec e s m, Token s ~ Char) => m String
- eol :: (MonadParsec e s m, Token s ~ Char) => m String
- tab :: (MonadParsec e s m, Token s ~ Char) => m Char
- space :: (MonadParsec e s m, Token s ~ Char) => m ()
- controlChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- spaceChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- upperChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- lowerChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- letterChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- printChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- digitChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- markChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- numberChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- symbolChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- separatorChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- asciiChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- latin1Char :: (MonadParsec e s m, Token s ~ Char) => m Char
- charCategory :: (MonadParsec e s m, Token s ~ Char) => GeneralCategory -> m Char
- categoryName :: GeneralCategory -> String
- char :: (MonadParsec e s m, Token s ~ Char) => Char -> m Char
- char' :: (MonadParsec e s m, Token s ~ Char) => Char -> m Char
- anyChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- oneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
- oneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
- noneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
- noneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
- satisfy :: (MonadParsec e s m, Token s ~ Char) => (Char -> Bool) -> m Char
- string :: (MonadParsec e s m, Token s ~ Char) => String -> m String
- string' :: (MonadParsec e s m, Token s ~ Char) => String -> m String
Simple parsers
crlf :: (MonadParsec e s m, Token s ~ Char) => m String Source #
Parses a carriage return character followed by a newline character. Returns sequence of characters parsed.
Categories of characters
controlChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses control characters, which are the non-printing characters of the Latin-1 subset of Unicode.
spaceChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses a Unicode space character, and the control characters: tab, newline, carriage return, form feed, and vertical tab.
upperChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses an upper-case or title-case alphabetic Unicode character. Title case is used by a small number of letter ligatures like the single-character form of Lj.
lowerChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses a lower-case alphabetic Unicode character.
letterChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses alphabetic Unicode characters: lower-case, upper-case and title-case letters, plus letters of case-less scripts and modifiers letters.
alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses alphabetic or numeric digit Unicode characters.
Note that numeric digits outside the ASCII range are parsed by this
parser but not by digitChar
. Such digits may be part of identifiers but
are not used by the printer and reader to represent numbers.
printChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses printable Unicode characters: letters, numbers, marks, punctuation, symbols and spaces.
digitChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses an ASCII digit, i.e between “0” and “9”.
octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses an octal digit, i.e. between “0” and “7”.
hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses a hexadecimal digit, i.e. between “0” and “9”, or “a” and “f”, or “A” and “F”.
markChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses Unicode mark characters, for example accents and the like, which combine with preceding characters.
numberChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses Unicode numeric characters, including digits from various scripts, Roman numerals, et cetera.
punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses Unicode punctuation characters, including various kinds of connectors, brackets and quotes.
symbolChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses Unicode symbol characters, including mathematical and currency symbols.
separatorChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses Unicode space and separator characters.
asciiChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses a character from the first 128 characters of the Unicode character set, corresponding to the ASCII character set.
latin1Char :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parses a character from the first 256 characters of the Unicode character set, corresponding to the ISO 8859-1 (Latin-1) character set.
charCategory :: (MonadParsec e s m, Token s ~ Char) => GeneralCategory -> m Char Source #
charCategory cat
Parses character in Unicode General Category cat
,
see GeneralCategory
.
categoryName :: GeneralCategory -> String Source #
Returns human-readable name of Unicode General Category.
More general parsers
char :: (MonadParsec e s m, Token s ~ Char) => Char -> m Char Source #
char c
parses a single character c
.
semicolon = char ';'
char' :: (MonadParsec e s m, Token s ~ Char) => Char -> m Char Source #
The same as char
but case-insensitive. This parser returns actually
parsed character preserving its case.
>>>
parseTest (char' 'e') "E"
'E'>>>
parseTest (char' 'e') "G"
1:1: unexpected 'G' expecting 'E' or 'e'
anyChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
This parser succeeds for any character. Returns the parsed character.
oneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char Source #
oneOf cs
succeeds if the current character is in the supplied
list of characters cs
. Returns the parsed character. Note that this
parser doesn't automatically generate “expected” component of error
message, so usually you should label it manually with label
or
(<?>
).
See also: satisfy
.
digit = oneOf ['0'..'9'] <?> "digit"
oneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char Source #
The same as oneOf
, but case-insensitive. Returns the parsed character
preserving its case.
vowel = oneOf' "aeiou" <?> "vowel"
noneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char Source #
As the dual of oneOf
, noneOf cs
succeeds if the current
character not in the supplied list of characters cs
. Returns the
parsed character.
noneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char Source #
The same as noneOf
, but case-insensitive.
consonant = noneOf' "aeiou" <?> "consonant"
satisfy :: (MonadParsec e s m, Token s ~ Char) => (Char -> Bool) -> m Char Source #
The parser satisfy f
succeeds for any character for which the
supplied function f
returns True
. Returns the character that is
actually parsed.
digitChar = satisfy isDigit <?> "digit" oneOf cs = satisfy (`elem` cs)