{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
module Text.Megaparsec.Char
(
newline
, crlf
, eol
, tab
, space
, space1
, controlChar
, spaceChar
, upperChar
, lowerChar
, letterChar
, alphaNumChar
, printChar
, digitChar
, octDigitChar
, hexDigitChar
, markChar
, numberChar
, punctuationChar
, symbolChar
, separatorChar
, asciiChar
, latin1Char
, charCategory
, categoryName
, char
, char'
, anyChar
, notChar
, oneOf
, noneOf
, satisfy
, string
, string' )
where
import Control.Applicative
import Data.Char
import Data.Function (on)
import Data.Functor (void)
import Data.List.NonEmpty (NonEmpty (..))
import Data.Proxy
import Text.Megaparsec
import qualified Data.CaseInsensitive as CI
import qualified Data.Set as E
#if !MIN_VERSION_base(4,8,0)
import Data.Foldable (Foldable (), elem, notElem)
import Prelude hiding (elem, notElem)
#endif
newline :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
newline = char '\n'
{-# INLINE newline #-}
crlf :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s)
crlf = string (tokensToChunk (Proxy :: Proxy s) "\r\n")
{-# INLINE crlf #-}
eol :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s)
eol = (tokenToChunk (Proxy :: Proxy s) <$> newline)
<|> crlf
<?> "end of line"
{-# INLINE eol #-}
tab :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
tab = char '\t'
{-# INLINE tab #-}
space :: (MonadParsec e s m, Token s ~ Char) => m ()
space = void $ takeWhileP (Just "white space") isSpace
{-# INLINE space #-}
space1 :: (MonadParsec e s m, Token s ~ Char) => m ()
space1 = void $ takeWhile1P (Just "white space") isSpace
{-# INLINE space1 #-}
controlChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
controlChar = satisfy isControl <?> "control character"
{-# INLINE controlChar #-}
spaceChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
spaceChar = satisfy isSpace <?> "white space"
{-# INLINE spaceChar #-}
upperChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
upperChar = satisfy isUpper <?> "uppercase letter"
{-# INLINE upperChar #-}
lowerChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
lowerChar = satisfy isLower <?> "lowercase letter"
{-# INLINE lowerChar #-}
letterChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
letterChar = satisfy isLetter <?> "letter"
{-# INLINE letterChar #-}
alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
alphaNumChar = satisfy isAlphaNum <?> "alphanumeric character"
{-# INLINE alphaNumChar #-}
printChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
printChar = satisfy isPrint <?> "printable character"
{-# INLINE printChar #-}
digitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
digitChar = satisfy isDigit <?> "digit"
{-# INLINE digitChar #-}
octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
octDigitChar = satisfy isOctDigit <?> "octal digit"
{-# INLINE octDigitChar #-}
hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
hexDigitChar = satisfy isHexDigit <?> "hexadecimal digit"
{-# INLINE hexDigitChar #-}
markChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
markChar = satisfy isMark <?> "mark character"
{-# INLINE markChar #-}
numberChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
numberChar = satisfy isNumber <?> "numeric character"
{-# INLINE numberChar #-}
punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
punctuationChar = satisfy isPunctuation <?> "punctuation"
{-# INLINE punctuationChar #-}
symbolChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
symbolChar = satisfy isSymbol <?> "symbol"
{-# INLINE symbolChar #-}
separatorChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
separatorChar = satisfy isSeparator <?> "separator"
{-# INLINE separatorChar #-}
asciiChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
asciiChar = satisfy isAscii <?> "ASCII character"
{-# INLINE asciiChar #-}
latin1Char :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
latin1Char = satisfy isLatin1 <?> "Latin-1 character"
{-# INLINE latin1Char #-}
charCategory :: (MonadParsec e s m, Token s ~ Char)
=> GeneralCategory
-> m (Token s)
charCategory cat = satisfy ((== cat) . generalCategory) <?> categoryName cat
{-# INLINE charCategory #-}
categoryName :: GeneralCategory -> String
categoryName = \case
UppercaseLetter -> "uppercase letter"
LowercaseLetter -> "lowercase letter"
TitlecaseLetter -> "titlecase letter"
ModifierLetter -> "modifier letter"
OtherLetter -> "other letter"
NonSpacingMark -> "non-spacing mark"
SpacingCombiningMark -> "spacing combining mark"
EnclosingMark -> "enclosing mark"
DecimalNumber -> "decimal number character"
LetterNumber -> "letter number character"
OtherNumber -> "other number character"
ConnectorPunctuation -> "connector punctuation"
DashPunctuation -> "dash punctuation"
OpenPunctuation -> "open punctuation"
ClosePunctuation -> "close punctuation"
InitialQuote -> "initial quote"
FinalQuote -> "final quote"
OtherPunctuation -> "other punctuation"
MathSymbol -> "math symbol"
CurrencySymbol -> "currency symbol"
ModifierSymbol -> "modifier symbol"
OtherSymbol -> "other symbol"
Space -> "white space"
LineSeparator -> "line separator"
ParagraphSeparator -> "paragraph separator"
Control -> "control character"
Format -> "format character"
Surrogate -> "surrogate character"
PrivateUse -> "private-use Unicode character"
NotAssigned -> "non-assigned Unicode character"
char :: MonadParsec e s m => Token s -> m (Token s)
char c = token testChar (Just c)
where
f x = Tokens (x:|[])
testChar x =
if x == c
then Right x
else Left (pure (f x), E.singleton (f c))
{-# INLINE char #-}
char' :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
char' c = choice [char c, char (swapCase c)]
where
swapCase x
| isUpper x = toLower x
| isLower x = toUpper x
| otherwise = x
{-# INLINE char' #-}
anyChar :: MonadParsec e s m => m (Token s)
anyChar = satisfy (const True) <?> "character"
{-# INLINE anyChar #-}
notChar :: MonadParsec e s m => Token s -> m (Token s)
notChar c = satisfy (/= c)
{-# INLINE notChar #-}
oneOf :: (Foldable f, MonadParsec e s m)
=> f (Token s)
-> m (Token s)
oneOf cs = satisfy (`elem` cs)
{-# INLINE oneOf #-}
noneOf :: (Foldable f, MonadParsec e s m)
=> f (Token s)
-> m (Token s)
noneOf cs = satisfy (`notElem` cs)
{-# INLINE noneOf #-}
satisfy :: MonadParsec e s m
=> (Token s -> Bool)
-> m (Token s)
satisfy f = token testChar Nothing
where
testChar x =
if f x
then Right x
else Left (pure (Tokens (x:|[])), E.empty)
{-# INLINE satisfy #-}
string :: MonadParsec e s m
=> Tokens s
-> m (Tokens s)
string = tokens (==)
{-# INLINE string #-}
string' :: (MonadParsec e s m, CI.FoldCase (Tokens s))
=> Tokens s
-> m (Tokens s)
string' = tokens ((==) `on` CI.mk)
{-# INLINE string' #-}