{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
module Text.Megaparsec.Char
(
newline
, crlf
, eol
, tab
, space
, space1
, controlChar
, spaceChar
, upperChar
, lowerChar
, letterChar
, alphaNumChar
, printChar
, digitChar
, binDigitChar
, octDigitChar
, hexDigitChar
, markChar
, numberChar
, punctuationChar
, symbolChar
, separatorChar
, asciiChar
, latin1Char
, charCategory
, categoryName
, char
, char'
, string
, string' )
where
import Control.Applicative
import Data.Char
import Data.Functor (void)
import Data.Proxy
import Text.Megaparsec
import Text.Megaparsec.Common
newline :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
newline = char '\n'
{-# INLINE newline #-}
crlf :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s)
crlf = string (tokensToChunk (Proxy :: Proxy s) "\r\n")
{-# INLINE crlf #-}
eol :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s)
eol = (tokenToChunk (Proxy :: Proxy s) <$> newline)
<|> crlf
<?> "end of line"
{-# INLINE eol #-}
tab :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
tab = char '\t'
{-# INLINE tab #-}
space :: (MonadParsec e s m, Token s ~ Char) => m ()
space = void $ takeWhileP (Just "white space") isSpace
{-# INLINE space #-}
space1 :: (MonadParsec e s m, Token s ~ Char) => m ()
space1 = void $ takeWhile1P (Just "white space") isSpace
{-# INLINE space1 #-}
controlChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
controlChar = satisfy isControl <?> "control character"
{-# INLINE controlChar #-}
spaceChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
spaceChar = satisfy isSpace <?> "white space"
{-# INLINE spaceChar #-}
upperChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
upperChar = satisfy isUpper <?> "uppercase letter"
{-# INLINE upperChar #-}
lowerChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
lowerChar = satisfy isLower <?> "lowercase letter"
{-# INLINE lowerChar #-}
letterChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
letterChar = satisfy isLetter <?> "letter"
{-# INLINE letterChar #-}
alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
alphaNumChar = satisfy isAlphaNum <?> "alphanumeric character"
{-# INLINE alphaNumChar #-}
printChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
printChar = satisfy isPrint <?> "printable character"
{-# INLINE printChar #-}
digitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
digitChar = satisfy isDigit <?> "digit"
{-# INLINE digitChar #-}
binDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
binDigitChar = satisfy isBinDigit <?> "binary digit"
where
isBinDigit x = x == '0' || x == '1'
{-# INLINE binDigitChar #-}
octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
octDigitChar = satisfy isOctDigit <?> "octal digit"
{-# INLINE octDigitChar #-}
hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
hexDigitChar = satisfy isHexDigit <?> "hexadecimal digit"
{-# INLINE hexDigitChar #-}
markChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
markChar = satisfy isMark <?> "mark character"
{-# INLINE markChar #-}
numberChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
numberChar = satisfy isNumber <?> "numeric character"
{-# INLINE numberChar #-}
punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
punctuationChar = satisfy isPunctuation <?> "punctuation"
{-# INLINE punctuationChar #-}
symbolChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
symbolChar = satisfy isSymbol <?> "symbol"
{-# INLINE symbolChar #-}
separatorChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
separatorChar = satisfy isSeparator <?> "separator"
{-# INLINE separatorChar #-}
asciiChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
asciiChar = satisfy isAscii <?> "ASCII character"
{-# INLINE asciiChar #-}
latin1Char :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
latin1Char = satisfy isLatin1 <?> "Latin-1 character"
{-# INLINE latin1Char #-}
charCategory :: (MonadParsec e s m, Token s ~ Char)
=> GeneralCategory
-> m (Token s)
charCategory cat = satisfy ((== cat) . generalCategory) <?> categoryName cat
{-# INLINE charCategory #-}
categoryName :: GeneralCategory -> String
categoryName = \case
UppercaseLetter -> "uppercase letter"
LowercaseLetter -> "lowercase letter"
TitlecaseLetter -> "titlecase letter"
ModifierLetter -> "modifier letter"
OtherLetter -> "other letter"
NonSpacingMark -> "non-spacing mark"
SpacingCombiningMark -> "spacing combining mark"
EnclosingMark -> "enclosing mark"
DecimalNumber -> "decimal number character"
LetterNumber -> "letter number character"
OtherNumber -> "other number character"
ConnectorPunctuation -> "connector punctuation"
DashPunctuation -> "dash punctuation"
OpenPunctuation -> "open punctuation"
ClosePunctuation -> "close punctuation"
InitialQuote -> "initial quote"
FinalQuote -> "final quote"
OtherPunctuation -> "other punctuation"
MathSymbol -> "math symbol"
CurrencySymbol -> "currency symbol"
ModifierSymbol -> "modifier symbol"
OtherSymbol -> "other symbol"
Space -> "white space"
LineSeparator -> "line separator"
ParagraphSeparator -> "paragraph separator"
Control -> "control character"
Format -> "format character"
Surrogate -> "surrogate character"
PrivateUse -> "private-use Unicode character"
NotAssigned -> "non-assigned Unicode character"
char :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
char = single
{-# INLINE char #-}
char' :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
char' c = choice
[ char (toLower c)
, char (toUpper c)
, char (toTitle c)
]
{-# INLINE char' #-}