Safe Haskell | Safe-Inferred |
---|---|
Language | Haskell2010 |
Parsers for textual data (UTF-8, ASCII).
Synopsis
- char :: Char -> Q Exp
- string :: String -> Q Exp
- anyChar :: ParserT st e Char
- skipAnyChar :: ParserT st e ()
- satisfy :: (Char -> Bool) -> ParserT st e Char
- skipSatisfy :: (Char -> Bool) -> ParserT st e ()
- fusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> ParserT st e Char
- skipFusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> Parser e ()
- takeLine :: ParserT st e String
- takeRestString :: ParserT st e String
- anyAsciiChar :: ParserT st e Char
- skipAnyAsciiChar :: ParserT st e ()
- satisfyAscii :: (Char -> Bool) -> ParserT st e Char
- skipSatisfyAscii :: (Char -> Bool) -> ParserT st e ()
- anyAsciiDecimalWord :: ParserT st e Word
- anyAsciiDecimalInt :: ParserT st e Int
- anyAsciiDecimalInteger :: ParserT st e Integer
- anyAsciiHexWord :: ParserT st e Word
- anyAsciiHexInt :: ParserT st e Int
- traceLine :: ParserT st e String
- traceRest :: ParserT st e String
UTF-8
char :: Char -> Q Exp Source #
Parse a UTF-8 character literal. This is a template function, you can use it as
$(char 'x')
, for example, and the splice in this case has type Parser e ()
.
string :: String -> Q Exp Source #
Parse a UTF-8 string literal. This is a template function, you can use it as $(string "foo")
,
for example, and the splice has type Parser e ()
.
anyChar :: ParserT st e Char Source #
Parse any single Unicode character encoded using UTF-8 as a Char
.
skipAnyChar :: ParserT st e () Source #
Skip any single Unicode character encoded using UTF-8.
satisfy :: (Char -> Bool) -> ParserT st e Char Source #
Parse a UTF-8 Char
for which a predicate holds.
skipSatisfy :: (Char -> Bool) -> ParserT st e () Source #
Skip a UTF-8 Char
for which a predicate holds.
fusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> ParserT st e Char Source #
This is a variant of satisfy
which allows more optimization. We can pick four testing
functions for the four cases for the possible number of bytes in the UTF-8 character. So in
fusedSatisfy f1 f2 f3 f4
, if we read a one-byte character, the result is scrutinized with
f1
, for two-bytes, with f2
, and so on. This can result in dramatic lexing speedups.
For example, if we want to accept any letter, the naive solution would be to use
isLetter
, but this accesses a large lookup table of Unicode character classes. We
can do better with fusedSatisfy isLatinLetter isLetter isLetter isLetter
, since here the
isLatinLetter
is inlined into the UTF-8 decoding, and it probably handles a great majority of
all cases without accessing the character table.
skipFusedSatisfy :: (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> (Char -> Bool) -> Parser e () Source #
Skipping variant of fusedSatisfy
.
takeLine :: ParserT st e String Source #
Parse the rest of the current line as a String
. Assumes UTF-8 encoding,
throws an error if the encoding is invalid.
takeRestString :: ParserT st e String Source #
Take the rest of the input as a String
. Assumes UTF-8 encoding.
ASCII
anyAsciiChar :: ParserT st e Char Source #
skipAnyAsciiChar :: ParserT st e () Source #
Skip any single ASCII character (a single byte).
More efficient than skipAnyChar
for ASCII-only input.
ASCII-encoded numbers
anyAsciiDecimalWord :: ParserT st e Word Source #
Parse a non-empty ASCII decimal digit sequence as a Word
.
Fails on overflow.
anyAsciiDecimalInt :: ParserT st e Int Source #
Parse a non-empty ASCII decimal digit sequence as a positive Int
.
Fails on overflow.
anyAsciiDecimalInteger :: ParserT st e Integer Source #
Parse a non-empty ASCII decimal digit sequence as a positive Integer
.
anyAsciiHexWord :: ParserT st e Word Source #
Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence as a
Word
.
Fails on overflow.
anyAsciiHexInt :: ParserT st e Int Source #
Parse a non-empty, case-insensitive ASCII hexadecimal digit sequence as a
positive Int
.
Fails on overflow.