Safe Haskell | None |
---|---|
Language | Haskell2010 |
Collection of parsing algorithms with a common interface, operating on grammars represented as records with rank-2 field types.
Synopsis
- failureDescription :: forall s. (Ord s, TextualMonoid s) => s -> ParseFailure s -> Int -> s
- simply :: (Only r (p (Only r) s) -> s -> Only r f) -> p (Only r) s r -> s -> f r
- type Grammar (g :: (* -> *) -> *) p s = g (p g s)
- type GrammarBuilder (g :: (* -> *) -> *) (g' :: (* -> *) -> *) (p :: ((* -> *) -> *) -> * -> * -> *) (s :: *) = g (p g' s) -> g (p g' s)
- type ParseResults s = Either (ParseFailure s)
- data ParseFailure s = ParseFailure Int [Expected s]
- data Expected s
- = Expected String
- | ExpectedInput s
- newtype Ambiguous a = Ambiguous {
- getAmbiguous :: NonEmpty a
- data Position
- class Parsing m => DeterministicParsing (m :: Type -> Type) where
- class Alternative m => AmbiguousParsing m where
- class LookAheadParsing m => InputParsing (m :: Type -> Type) where
- type ParserInput (m :: Type -> Type)
- getInput :: m (ParserInput m)
- getSourcePos :: m Position
- anyToken :: m (ParserInput m)
- take :: Int -> m (ParserInput m)
- satisfy :: (ParserInput m -> Bool) -> m (ParserInput m)
- notSatisfy :: (ParserInput m -> Bool) -> m ()
- scan :: state -> (state -> ParserInput m -> Maybe state) -> m (ParserInput m)
- string :: ParserInput m -> m (ParserInput m)
- takeWhile :: (ParserInput m -> Bool) -> m (ParserInput m)
- takeWhile1 :: (ParserInput m -> Bool) -> m (ParserInput m)
- class (CharParsing m, InputParsing m) => InputCharParsing (m :: Type -> Type) where
- satisfyCharInput :: (Char -> Bool) -> m (ParserInput m)
- notSatisfyChar :: (Char -> Bool) -> m ()
- scanChars :: state -> (state -> Char -> Maybe state) -> m (ParserInput m)
- takeCharsWhile :: (Char -> Bool) -> m (ParserInput m)
- takeCharsWhile1 :: (Char -> Bool) -> m (ParserInput m)
- class InputParsing m => ConsumedInputParsing (m :: Type -> Type) where
- match :: m a -> m (ParserInput m, a)
- class InputParsing m => MultiParsing m where
- type ResultFunctor m :: * -> *
- type GrammarConstraint m (g :: (* -> *) -> *) :: Constraint
- parseComplete :: (ParserInput m ~ s, GrammarConstraint m g, Eq s, FactorialMonoid s) => g m -> s -> g (ResultFunctor m)
- parsePrefix :: (ParserInput m ~ s, GrammarConstraint m g, Eq s, FactorialMonoid s) => g m -> s -> g (Compose (ResultFunctor m) ((,) s))
- class MultiParsing m => GrammarParsing m where
- type ParserGrammar m :: (* -> *) -> *
- type GrammarFunctor m :: * -> *
- parsingResult :: ParserInput m -> GrammarFunctor m a -> ResultFunctor m (ParserInput m, a)
- nonTerminal :: (g ~ ParserGrammar m, GrammarConstraint m g) => (g (GrammarFunctor m) -> GrammarFunctor m a) -> m a
- selfReferring :: (g ~ ParserGrammar m, GrammarConstraint m g, Distributive g) => g m
- fixGrammar :: (g ~ ParserGrammar m, GrammarConstraint m g, Distributive g) => (g m -> g m) -> g m
- recursive :: m a -> m a
- class CharParsing m => TokenParsing (m :: Type -> Type) where
- class (DeterministicParsing m, InputCharParsing m, TokenParsing m) => LexicalParsing m where
- lexicalWhiteSpace :: m ()
- someLexicalSpace :: m ()
- lexicalComment :: m ()
- lexicalSemicolon :: m Char
- lexicalToken :: m a -> m a
- identifierToken :: m (ParserInput m) -> m (ParserInput m)
- isIdentifierStartChar :: Char -> Bool
- isIdentifierFollowChar :: Char -> Bool
- identifier :: m (ParserInput m)
- keyword :: ParserInput m -> m ()
- class Parsing m => CharParsing (m :: Type -> Type) where
- class Alternative m => Parsing (m :: Type -> Type) where
- (<?>) :: m a -> String -> m a
- skipMany :: m a -> m ()
- skipSome :: m a -> m ()
- unexpected :: String -> m a
- notFollowedBy :: Show a => m a -> m ()
- class Parsing m => LookAheadParsing (m :: Type -> Type) where
- lookAhead :: m a -> m a
- concatMany :: (Alternative p, Monoid a) => p a -> p a
- concatSome :: (Alternative p, Semigroup a) => p a -> p a
Parsing methods
failureDescription :: forall s. (Ord s, TextualMonoid s) => s -> ParseFailure s -> Int -> s Source #
Given the textual parse input, the parse failure on the input, and the number of lines preceding the failure to show, produce a human-readable failure description.
simply :: (Only r (p (Only r) s) -> s -> Only r f) -> p (Only r) s r -> s -> f r Source #
Apply the given parse
function to the given grammar-free parser and its input.
Types
type Grammar (g :: (* -> *) -> *) p s = g (p g s) Source #
A type synonym for a fixed grammar record type g
with a given parser type p
on input streams of type s
type GrammarBuilder (g :: (* -> *) -> *) (g' :: (* -> *) -> *) (p :: ((* -> *) -> *) -> * -> * -> *) (s :: *) = g (p g' s) -> g (p g' s) Source #
A type synonym for an endomorphic function on a grammar record type g
, whose parsers of type p
build grammars
of type g'
, parsing input streams of type s
type ParseResults s = Either (ParseFailure s) Source #
data ParseFailure s Source #
A ParseFailure
contains the offset of the parse failure and the list of things expected at that offset.
ParseFailure Int [Expected s] |
Instances
Eq s => Eq (ParseFailure s) Source # | |
Defined in Text.Grampa.Class (==) :: ParseFailure s -> ParseFailure s -> Bool # (/=) :: ParseFailure s -> ParseFailure s -> Bool # | |
Show s => Show (ParseFailure s) Source # | |
Defined in Text.Grampa.Class showsPrec :: Int -> ParseFailure s -> ShowS # show :: ParseFailure s -> String # showList :: [ParseFailure s] -> ShowS # |
Instances
Functor Expected Source # | |
Eq s => Eq (Expected s) Source # | |
Ord s => Ord (Expected s) Source # | |
Read s => Read (Expected s) Source # | |
Show s => Show (Expected s) Source # | |
An Ambiguous
parse result, produced by the ambiguous
combinator, contains a NonEmpty
list of
alternative results.
Instances
Functor Ambiguous Source # | |
Applicative Ambiguous Source # | |
Foldable Ambiguous Source # | |
Defined in Text.Grampa.Class fold :: Monoid m => Ambiguous m -> m # foldMap :: Monoid m => (a -> m) -> Ambiguous a -> m # foldMap' :: Monoid m => (a -> m) -> Ambiguous a -> m # foldr :: (a -> b -> b) -> b -> Ambiguous a -> b # foldr' :: (a -> b -> b) -> b -> Ambiguous a -> b # foldl :: (b -> a -> b) -> b -> Ambiguous a -> b # foldl' :: (b -> a -> b) -> b -> Ambiguous a -> b # foldr1 :: (a -> a -> a) -> Ambiguous a -> a # foldl1 :: (a -> a -> a) -> Ambiguous a -> a # toList :: Ambiguous a -> [a] # length :: Ambiguous a -> Int # elem :: Eq a => a -> Ambiguous a -> Bool # maximum :: Ord a => Ambiguous a -> a # minimum :: Ord a => Ambiguous a -> a # | |
Traversable Ambiguous Source # | |
Show1 Ambiguous Source # | |
Eq a => Eq (Ambiguous a) Source # | |
Data a => Data (Ambiguous a) Source # | |
Defined in Text.Grampa.Class gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> Ambiguous a -> c (Ambiguous a) # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c (Ambiguous a) # toConstr :: Ambiguous a -> Constr # dataTypeOf :: Ambiguous a -> DataType # dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c (Ambiguous a)) # dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c (Ambiguous a)) # gmapT :: (forall b. Data b => b -> b) -> Ambiguous a -> Ambiguous a # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> Ambiguous a -> r # gmapQr :: forall r r'. (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> Ambiguous a -> r # gmapQ :: (forall d. Data d => d -> u) -> Ambiguous a -> [u] # gmapQi :: Int -> (forall d. Data d => d -> u) -> Ambiguous a -> u # gmapM :: Monad m => (forall d. Data d => d -> m d) -> Ambiguous a -> m (Ambiguous a) # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> Ambiguous a -> m (Ambiguous a) # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> Ambiguous a -> m (Ambiguous a) # | |
Ord a => Ord (Ambiguous a) Source # | |
Defined in Text.Grampa.Class | |
Show a => Show (Ambiguous a) Source # | |
Semigroup a => Semigroup (Ambiguous a) Source # | |
Monoid a => Monoid (Ambiguous a) Source # | |
Opaque data type that represents an input position.
Parser combinators and primitives
class Parsing m => DeterministicParsing (m :: Type -> Type) where #
Combinator methods for constructing deterministic parsers, i.e., parsers that can succeed with only a single result.
Nothing
(<<|>) :: m a -> m a -> m a infixl 3 #
Left-biased choice: if the left alternative succeeds, the right one is never tried.
takeOptional :: m a -> m (Maybe a) #
Like optional
, but never succeeds with Nothing
if the argument parser can succeed.
Like some
, but always consuming the longest matching sequence of input.
Like some
, but always consuming the longest matching sequence of input.
concatAll :: Monoid a => m a -> m a #
Like concatMany
, but always consuming the longest matching sequence of input.
Like skipMany
, but always consuming the longest matching sequence of input.
Instances
class Alternative m => AmbiguousParsing m where Source #
Parsers that can produce alternative parses and collect them into an Ambiguous
node
ambiguous :: m a -> m (Ambiguous a) Source #
Collect all alternative parses of the same length into a NonEmpty
list of results.
Instances
AmbiguousParsing (Parser g s) Source # | |
(Applicative m, Eq (m ())) => AmbiguousParsing (ParserT m g s) Source # | |
AmbiguousParsing (p g s) => AmbiguousParsing (Fixed p g s) Source # | |
class LookAheadParsing m => InputParsing (m :: Type -> Type) where #
Methods for parsing monoidal inputs
type ParserInput (m :: Type -> Type) #
The type of the input stream that the parser m
expects to parse.
getInput :: m (ParserInput m) #
Always sucessful parser that returns the entire remaining input without consuming it.
getSourcePos :: m Position #
Retrieve the Position
reached by the parser in the input source.
anyToken :: m (ParserInput m) #
A parser that accepts any single atomic prefix of the input stream.
anyToken == satisfy (const True) anyToken == take 1
take :: Int -> m (ParserInput m) #
A parser that accepts exactly the given number of input atoms.
take n == count n anyToken
satisfy :: (ParserInput m -> Bool) -> m (ParserInput m) #
A parser that accepts an input atom only if it satisfies the given predicate.
notSatisfy :: (ParserInput m -> Bool) -> m () #
A parser that succeeds exactly when satisfy doesn't, equivalent to
notFollowedBy
.
satisfy
scan :: state -> (state -> ParserInput m -> Maybe state) -> m (ParserInput m) #
A stateful scanner. The predicate modifies a state argument, and each transformed state is passed to successive
invocations of the predicate on each token of the input until one returns Nothing
or the input ends.
This parser does not fail. It will return an empty string if the predicate returns Nothing
on the first
character.
Note: Because this parser does not fail, do not use it with combinators such as many
,
because such parsers loop until a failure occurs. Careless use will thus result in an infinite loop.
string :: ParserInput m -> m (ParserInput m) #
A parser that consumes and returns the given prefix of the input.
takeWhile :: (ParserInput m -> Bool) -> m (ParserInput m) #
A parser accepting the longest sequence of input atoms that match the given predicate; an optimized version of
concat
.
many
.
satisfy
.
Note: Because this parser does not fail, do not use it with combinators such as many
,
because such parsers loop until a failure occurs. Careless use will thus result in an infinite loop.
takeWhile1 :: (ParserInput m -> Bool) -> m (ParserInput m) #
Instances
class (CharParsing m, InputParsing m) => InputCharParsing (m :: Type -> Type) where #
Methods for parsing textual monoid inputs
satisfyCharInput :: (Char -> Bool) -> m (ParserInput m) #
Specialization of satisfy
on textual inputs, accepting an input character only if it satisfies the given
predicate, and returning the input atom that represents the character. Equivalent to fmap singleton
. Char.satisfy
notSatisfyChar :: (Char -> Bool) -> m () #
A parser that succeeds exactly when satisfy doesn't, equivalent to notFollowedBy . Char.satisfy
scanChars :: state -> (state -> Char -> Maybe state) -> m (ParserInput m) #
Stateful scanner like scan
, but specialized for TextualMonoid
inputs.
takeCharsWhile :: (Char -> Bool) -> m (ParserInput m) #
Specialization of takeWhile
on TextualMonoid
inputs, accepting the longest sequence of input characters that
match the given predicate; an optimized version of fmap fromString . many . Char.satisfy
.
Note: Because this parser does not fail, do not use it with combinators such as many
,
because such parsers loop until a failure occurs. Careless use will thus result in an infinite loop.
takeCharsWhile1 :: (Char -> Bool) -> m (ParserInput m) #
Specialization of takeWhile1
on TextualMonoid
inputs, accepting the longest sequence of input characters
that match the given predicate; an optimized version of fmap fromString . some . Char.satisfy
.
Instances
class InputParsing m => ConsumedInputParsing (m :: Type -> Type) where #
Parsers that keep track of the consumed input.
match :: m a -> m (ParserInput m, a) #
Return both the result of a parse and the portion of the input that the argument parser consumed.
Instances
class InputParsing m => MultiParsing m where Source #
Choose one of the instances of this class to parse with.
type ResultFunctor m :: * -> * Source #
Some parser types produce a single result, others a list of results.
type GrammarConstraint m (g :: (* -> *) -> *) :: Constraint Source #
type GrammarConstraint m g = Functor g Source #
parseComplete :: (ParserInput m ~ s, GrammarConstraint m g, Eq s, FactorialMonoid s) => g m -> s -> g (ResultFunctor m) Source #
Given a rank-2 record of parsers and input, produce a record of parses of the complete input.
parsePrefix :: (ParserInput m ~ s, GrammarConstraint m g, Eq s, FactorialMonoid s) => g m -> s -> g (Compose (ResultFunctor m) ((,) s)) Source #
Given a rank-2 record of parsers and input, produce a record of prefix parses paired with the remaining input suffix.
Instances
class MultiParsing m => GrammarParsing m where Source #
Parsers that belong to this class can memoize the parse results to avoid exponential performance complexity.
type ParserGrammar m :: (* -> *) -> * Source #
The record of grammar productions associated with the parser
type GrammarFunctor m :: * -> * Source #
For internal use by notTerminal
parsingResult :: ParserInput m -> GrammarFunctor m a -> ResultFunctor m (ParserInput m, a) Source #
Converts the intermediate to final parsing result.
nonTerminal :: (g ~ ParserGrammar m, GrammarConstraint m g) => (g (GrammarFunctor m) -> GrammarFunctor m a) -> m a Source #
Used to reference a grammar production, only necessary from outside the grammar itself
selfReferring :: (g ~ ParserGrammar m, GrammarConstraint m g, Distributive g) => g m Source #
Construct a grammar whose every production refers to itself.
fixGrammar :: (g ~ ParserGrammar m, GrammarConstraint m g, Distributive g) => (g m -> g m) -> g m Source #
Convert a self-referring grammar function to a grammar.
recursive :: m a -> m a Source #
Mark a parser that relies on primitive recursion to prevent an infinite loop in fixGrammar
.
Instances
class CharParsing m => TokenParsing (m :: Type -> Type) where #
Additional functionality that is needed to tokenize input while ignoring whitespace.
Nothing
Usually, someSpace consists of one or more occurrences of a space
.
Some parsers may choose to recognize line comments or block (multi line)
comments as white space as well.
Called when we enter a nested pair of symbols. Overloadable to enable disabling layout
The token parser |semi| parses the character ';' and skips any trailing white space. Returns the character ';'. Overloadable to permit automatic semicolon insertion or Haskell-style layout.
highlight :: Highlight -> m a -> m a #
Tag a region of parsed text with a bit of semantic information. Most parsers won't use this, but it is indispensible for highlighters.
token p
first applies parser p
and then the whiteSpace
parser, returning the value of p
. Every lexical
token (token) is defined using token
, this way every parse
starts at a point without white space. Parsers that use token
are
called token parsers in this document.
The only point where the whiteSpace
parser should be
called explicitly is the start of the main parser in order to skip
any leading white space.
Alternatively, one might define token
as first parsing whiteSpace
and then parser p
. By parsing whiteSpace first, the parser is able
to return before parsing additional whiteSpace, improving laziness.
mainParser = sum <$ whiteSpace <*> many (token digit) <* eof
Instances
class (DeterministicParsing m, InputCharParsing m, TokenParsing m) => LexicalParsing m where Source #
If a grammar is Lexical
, its parsers can instantiate the TokenParsing
class.
Nothing
lexicalWhiteSpace :: m () Source #
Always succeeds, consuming all white space and comments
someLexicalSpace :: m () Source #
Consumes all whitespace and comments, failing if there are none
lexicalComment :: m () Source #
Consumes a single comment, defaults to empty
lexicalSemicolon :: m Char Source #
Consumes a single semicolon and any trailing whitespace, returning the character |';'|. The method can be overridden for automatic semicolon insertion, but if it succeeds on semicolon or white space input it must consume it.
lexicalToken :: m a -> m a Source #
Applies the argument parser and consumes the trailing lexicalWhitespace
identifierToken :: m (ParserInput m) -> m (ParserInput m) Source #
Applies the argument parser, determines whether its result is a legal identifier, and consumes the trailing
lexicalWhitespace
isIdentifierStartChar :: Char -> Bool Source #
Determines whether the given character can start an identifier token, allows only a letter or underscore by default
isIdentifierFollowChar :: Char -> Bool Source #
Determines whether the given character can be any part of an identifier token, also allows numbers
identifier :: m (ParserInput m) Source #
Parses a valid identifier and consumes the trailing lexicalWhitespace
default identifier :: TextualMonoid (ParserInput m) => m (ParserInput m) Source #
keyword :: ParserInput m -> m () Source #
Parses the argument word whole, not followed by any identifier character, and consumes the trailing
lexicalWhitespace
default keyword :: (Show (ParserInput m), TextualMonoid (ParserInput m)) => ParserInput m -> m () Source #
class Parsing m => CharParsing (m :: Type -> Type) where #
Additional functionality needed to parse character streams.
Nothing
notChar c
parses any single character other than c
. Returns the parsed
character.
This parser succeeds for any character. Returns the parsed character.
Instances
class Alternative m => Parsing (m :: Type -> Type) where #
Additional functionality needed to describe parsers independent of input type.
(<?>) :: m a -> String -> m a infixr 0 #
Give a parser a name
A version of many that discards its input. Specialized because it can often be implemented more cheaply.
skipSome p
applies the parser p
one or more times, skipping
its result. (aka skipMany1 in parsec)
unexpected :: String -> m a #
Used to emit an error on an unexpected token
notFollowedBy :: Show a => m a -> m () #
notFollowedBy p
only succeeds when parser p
fails. This parser
does not consume any input. This parser can be used to implement the
'longest match' rule. For example, when recognizing keywords (for
example let
), we want to make sure that a keyword is not followed
by a legal identifier character, in which case the keyword is
actually an identifier (for example lets
). We can program this
behaviour as follows:
keywordLet = try $ string "let" <* notFollowedBy alphaNum
Instances
class Parsing m => LookAheadParsing (m :: Type -> Type) where #
Additional functionality needed to describe parsers independent of input type.
Instances
concatMany :: (Alternative p, Monoid a) => p a -> p a Source #
Zero or more argument occurrences like many
, with concatenated monoidal results.
concatSome :: (Alternative p, Semigroup a) => p a -> p a Source #
One or more argument occurrences like some
, with concatenated monoidal results.