module Text.Megaparsec.Char
(
newline
, crlf
, eol
, tab
, space
, controlChar
, spaceChar
, upperChar
, lowerChar
, letterChar
, alphaNumChar
, printChar
, digitChar
, octDigitChar
, hexDigitChar
, markChar
, numberChar
, punctuationChar
, symbolChar
, separatorChar
, asciiChar
, latin1Char
, charCategory
, categoryName
, char
, char'
, anyChar
, oneOf
, oneOf'
, noneOf
, noneOf'
, satisfy
, string
, string' )
where
import Control.Applicative ((<|>))
import Data.Char
import Data.List (nub)
import Data.Maybe (fromJust)
import Text.Megaparsec.Combinator
import Text.Megaparsec.Error (Message (..))
import Text.Megaparsec.Pos
import Text.Megaparsec.Prim
import Text.Megaparsec.ShowToken
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>), pure)
#endif
newline :: MonadParsec s m Char => m Char
newline = char '\n'
crlf :: MonadParsec s m Char => m String
crlf = string "\r\n"
eol :: MonadParsec s m Char => m String
eol = (pure <$> newline) <|> crlf <?> "end of line"
tab :: MonadParsec s m Char => m Char
tab = char '\t'
space :: MonadParsec s m Char => m ()
space = skipMany spaceChar
controlChar :: MonadParsec s m Char => m Char
controlChar = satisfy isControl <?> "control character"
spaceChar :: MonadParsec s m Char => m Char
spaceChar = satisfy isSpace <?> "white space"
upperChar :: MonadParsec s m Char => m Char
upperChar = satisfy isUpper <?> "uppercase letter"
lowerChar :: MonadParsec s m Char => m Char
lowerChar = satisfy isLower <?> "lowercase letter"
letterChar :: MonadParsec s m Char => m Char
letterChar = satisfy isLetter <?> "letter"
alphaNumChar :: MonadParsec s m Char => m Char
alphaNumChar = satisfy isAlphaNum <?> "alphanumeric character"
printChar :: MonadParsec s m Char => m Char
printChar = satisfy isPrint <?> "printable character"
digitChar :: MonadParsec s m Char => m Char
digitChar = satisfy isDigit <?> "digit"
octDigitChar :: MonadParsec s m Char => m Char
octDigitChar = satisfy isOctDigit <?> "octal digit"
hexDigitChar :: MonadParsec s m Char => m Char
hexDigitChar = satisfy isHexDigit <?> "hexadecimal digit"
markChar :: MonadParsec s m Char => m Char
markChar = satisfy isMark <?> "mark character"
numberChar :: MonadParsec s m Char => m Char
numberChar = satisfy isNumber <?> "numeric character"
punctuationChar :: MonadParsec s m Char => m Char
punctuationChar = satisfy isPunctuation <?> "punctuation"
symbolChar :: MonadParsec s m Char => m Char
symbolChar = satisfy isSymbol <?> "symbol"
separatorChar :: MonadParsec s m Char => m Char
separatorChar = satisfy isSeparator <?> "separator"
asciiChar :: MonadParsec s m Char => m Char
asciiChar = satisfy isAscii <?> "ASCII character"
latin1Char :: MonadParsec s m Char => m Char
latin1Char = satisfy isLatin1 <?> "Latin-1 character"
charCategory :: MonadParsec s m Char => GeneralCategory -> m Char
charCategory cat = satisfy ((== cat) . generalCategory) <?> categoryName cat
categoryName :: GeneralCategory -> String
categoryName cat =
fromJust $ lookup cat
[ (UppercaseLetter , "uppercase letter")
, (LowercaseLetter , "lowercase letter")
, (TitlecaseLetter , "titlecase letter")
, (ModifierLetter , "modifier letter")
, (OtherLetter , "other letter")
, (NonSpacingMark , "non-spacing mark")
, (SpacingCombiningMark, "spacing combining mark")
, (EnclosingMark , "enclosing mark")
, (DecimalNumber , "decimal number character")
, (LetterNumber , "letter number character")
, (OtherNumber , "other number character")
, (ConnectorPunctuation, "connector punctuation")
, (DashPunctuation , "dash punctuation")
, (OpenPunctuation , "open punctuation")
, (ClosePunctuation , "close punctuation")
, (InitialQuote , "initial quote")
, (FinalQuote , "final quote")
, (OtherPunctuation , "other punctuation")
, (MathSymbol , "math symbol")
, (CurrencySymbol , "currency symbol")
, (ModifierSymbol , "modifier symbol")
, (OtherSymbol , "other symbol")
, (Space , "white space")
, (LineSeparator , "line separator")
, (ParagraphSeparator , "paragraph separator")
, (Control , "control character")
, (Format , "format character")
, (Surrogate , "surrogate character")
, (PrivateUse , "private-use Unicode character")
, (NotAssigned , "non-assigned Unicode character") ]
char :: MonadParsec s m Char => Char -> m Char
char c = satisfy (== c) <?> showToken c
char' :: MonadParsec s m Char => Char -> m Char
char' = choice . fmap char . extendi . pure
extendi :: String -> String
extendi cs = nub (cs >>= f)
where f c | isLower c = [c, toUpper c]
| isUpper c = [c, toLower c]
| otherwise = [c]
anyChar :: MonadParsec s m Char => m Char
anyChar = satisfy (const True) <?> "character"
oneOf :: MonadParsec s m Char => String -> m Char
oneOf cs = satisfy (`elem` cs)
oneOf' :: MonadParsec s m Char => String -> m Char
oneOf' = oneOf . extendi
noneOf :: MonadParsec s m Char => String -> m Char
noneOf cs = satisfy (`notElem` cs)
noneOf' :: MonadParsec s m Char => String -> m Char
noneOf' = noneOf . extendi
satisfy :: MonadParsec s m Char => (Char -> Bool) -> m Char
satisfy f = token updatePosChar testChar
where testChar x = if f x
then Right x
else Left . pure . Unexpected . showToken $ x
string :: MonadParsec s m Char => String -> m String
string = tokens updatePosString (==)
string' :: MonadParsec s m Char => String -> m String
string' = tokens updatePosString test
where test x y = toLower x == toLower y