module Data.SCargot.Common (
parseR5RSIdent
, parseR6RSIdent
, parseR7RSIdent
, parseXIDIdentStrict
, parseXIDIdentGeneral
, parseHaskellIdent
, parseHaskellVariable
, parseHaskellConstructor
, signed
, prefixedNumber
, signedPrefixedNumber
, binNumber
, signedBinNumber
, octNumber
, signedOctNumber
, decNumber
, signedDecNumber
, dozNumber
, signedDozNumber
, hexNumber
, signedHexNumber
, commonLispNumberAnyBase
, gnuM4NumberAnyBase
) where
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative hiding ((<|>), many)
#endif
import Control.Monad (guard)
import Data.Char
import Data.Text (Text)
import qualified Data.Text as T
import Text.Parsec
import Text.Parsec.Text (Parser)
parseR5RSIdent :: Parser Text
parseR5RSIdent =
T.pack <$> ((:) <$> initial <*> many subsequent <|> peculiar)
where initial = letter <|> oneOf "!$%&*/:<=>?^_~"
subsequent = initial <|> digit <|> oneOf "+-.@"
peculiar = string "+" <|> string "-" <|> string "..."
hasCategory :: Char -> [GeneralCategory] -> Bool
hasCategory c cs = generalCategory c `elem` cs
parseR6RSIdent :: Parser Text
parseR6RSIdent =
T.pack <$> ((:) <$> initial <*> many subsequent <|> peculiar)
where initial = constituent <|> oneOf "!$%&*/:<=>?^_~" <|> inlineHex
constituent = letter
<|> uniClass (\ c -> isLetter c ||
isSymbol c ||
hasCategory c
[ NonSpacingMark
, LetterNumber
, OtherNumber
, DashPunctuation
, ConnectorPunctuation
, OtherPunctuation
, PrivateUse
])
inlineHex = (chr . fromIntegral) <$> (string "\\x" *> hexNumber <* char ';')
subsequent = initial <|> digit <|> oneOf "+-.@"
<|> uniClass (\ c -> hasCategory c
[ DecimalNumber
, SpacingCombiningMark
, EnclosingMark
])
peculiar = string "+" <|> string "-" <|> string "..." <|>
((++) <$> string "->" <*> many subsequent)
uniClass :: (Char -> Bool) -> Parser Char
uniClass sp = satisfy (\ c -> c > '\x7f' && sp c)
parseR7RSIdent :: Parser Text
parseR7RSIdent = T.pack <$>
( (:) <$> initial <*> many subsequent
<|> char '|' *> many1 symbolElement <* char '|'
<|> peculiar
)
where initial = letter <|> specInit
specInit = oneOf "!$%&*/:<=>?^_~"
subsequent = initial <|> digit <|> specSubsequent
specSubsequent = expSign <|> oneOf ".@"
expSign = oneOf "+-"
symbolElement = noneOf "\\|"
<|> hexEscape
<|> mnemEscape
<|> ('|' <$ string "\\|")
hexEscape = chr . fromIntegral <$> (string "\\x" *> hexNumber <* char ';')
mnemEscape = '\a' <$ string "\\a"
<|> '\b' <$ string "\\b"
<|> '\t' <$ string "\\t"
<|> '\n' <$ string "\\n"
<|> '\r' <$ string "\\r"
peculiar = (:[]) <$> expSign
<|> cons2 <$> expSign <*> signSub <*> many subsequent
<|> cons3 <$> expSign
<*> char '.'
<*> dotSub
<*> many subsequent
<|> cons2 <$> char '.' <*> dotSub <*> many subsequent
dotSub = signSub <|> char '.'
signSub = initial <|> expSign <|> char '@'
cons2 a b cs = a : b : cs
cons3 a b c ds = a : b : c : ds
parseHaskellVariable :: Parser Text
parseHaskellVariable =
T.pack <$> ((:) <$> small <*> many (small <|>
large <|>
digit' <|>
char '\'' <|>
char '_'))
where small = satisfy isLower
large = satisfy isUpper
digit' = satisfy isDigit
parseHaskellConstructor :: Parser Text
parseHaskellConstructor =
T.pack <$> ((:) <$> large <*> many (small <|>
large <|>
digit' <|>
char '\'' <|>
char '_'))
where small = satisfy isLower
large = satisfy isUpper
digit' = satisfy isDigit
parseHaskellIdent :: Parser Text
parseHaskellIdent =
T.pack <$> ((:) <$> (large <|> small)
<*> many (small <|>
large <|>
digit' <|>
char '\'' <|>
char '_'))
where small = satisfy isLower
large = satisfy isUpper
digit' = satisfy isDigit
hasCat :: [GeneralCategory] -> Parser Char
hasCat cats = satisfy (flip hasCategory cats)
xidStart :: [GeneralCategory]
xidStart = [ UppercaseLetter
, LowercaseLetter
, TitlecaseLetter
, ModifierLetter
, OtherLetter
, LetterNumber
]
xidContinue :: [GeneralCategory]
xidContinue = xidStart ++ [ NonSpacingMark
, SpacingCombiningMark
, DecimalNumber
, ConnectorPunctuation
]
parseXIDIdentStrict :: Parser Text
parseXIDIdentStrict = T.pack <$> ((:) <$> hasCat xidStart
<*> many (hasCat xidContinue))
parseXIDIdentGeneral :: Parser Text
parseXIDIdentGeneral = T.pack <$> ((:) <$> (hasCat xidStart <|> char '_')
<*> many (hasCat xidContinue))
number :: Integer -> Parser Char -> Parser Integer
number base digits = foldl go 0 <$> many1 digits
where go x d = base * x + toInteger (value d)
value c
| c >= 'a' && c <= 'z' = 0xa + (fromEnum c fromEnum 'a')
| c >= 'A' && c <= 'Z' = 0xa + (fromEnum c fromEnum 'A')
| c >= '0' && c <= '9' = fromEnum c fromEnum '0'
| c == '\x218a' = 0xa
| c == '\x218b' = 0xb
| otherwise = error ("Unknown letter in number: " ++ show c)
digitsFor :: Int -> [Char]
digitsFor n
| n <= 10 = take n ['0'..'9']
| n <= 36 = take (n10) ['A'..'Z'] ++ take (n10) ['a'..'z'] ++ ['0'..'9']
| otherwise = error ("Invalid base for parser: " ++ show n)
anyBase :: Integer -> Parser Integer
anyBase n = number n (oneOf (digitsFor (fromIntegral n)))
commonLispNumberAnyBase :: Parser Integer
commonLispNumberAnyBase = do
_ <- char '#'
n <- decNumber
guard (n >= 2 && n <= 36)
_ <- char 'r'
signed (anyBase n)
gnuM4NumberAnyBase :: Parser Integer
gnuM4NumberAnyBase = do
_ <- string "0r"
n <- decNumber
guard (n >= 2 && n <= 36)
_ <- char ':'
signed (anyBase n)
sign :: Num a => Parser (a -> a)
sign = (pure id <* char '+')
<|> (pure negate <* char '-')
<|> pure id
signed :: Num a => Parser a -> Parser a
signed p = ($) <$> sign <*> p
signedPrefixedNumber :: Parser Integer
signedPrefixedNumber = signed prefixedNumber
prefixedNumber :: Parser Integer
prefixedNumber = (string "0x" <|> string "0X") *> hexNumber
<|> (string "0o" <|> string "0O") *> octNumber
<|> (string "0z" <|> string "0Z") *> dozNumber
<|> (string "0b" <|> string "0B") *> binNumber
<|> decNumber
binNumber :: Parser Integer
binNumber = number 2 (char '0' <|> char '1')
signedBinNumber :: Parser Integer
signedBinNumber = signed binNumber
octNumber :: Parser Integer
octNumber = number 8 (oneOf "01234567")
signedOctNumber :: Parser Integer
signedOctNumber = ($) <$> sign <*> octNumber
decNumber :: Parser Integer
decNumber = number 10 digit
signedDecNumber :: Parser Integer
signedDecNumber = ($) <$> sign <*> decNumber
dozDigit :: Parser Char
dozDigit = digit <|> oneOf "AaBb\x218a\x218b"
dozNumber :: Parser Integer
dozNumber = number 12 dozDigit
signedDozNumber :: Parser Integer
signedDozNumber = ($) <$> sign <*> dozNumber
hexNumber :: Parser Integer
hexNumber = number 16 hexDigit
signedHexNumber :: Parser Integer
signedHexNumber = ($) <$> sign <*> hexNumber