module Language.Scheme.Parser
(
lispDef
, mainParser
, readOrThrow
, readExpr
, readExprList
, parseExpr
, parseAtom
, parseBool
, parseChar
, parseOctalNumber
, parseBinaryNumber
, parseHexNumber
, parseDecimalNumber
, parseNumber
, parseRealNumber
, parseRationalNumber
, parseComplexNumber
, parseEscapedChar
, parseString
, parseVector
, parseByteVector
, parseHashTable
, parseList
, parseDottedList
, parseQuoted
, parseQuasiQuoted
, parseUnquoted
, parseUnquoteSpliced
) where
import Language.Scheme.Types
import Control.Monad.Error
import Data.Array
import qualified Data.ByteString as BS
import qualified Data.Char as DC
import Data.Complex
import qualified Data.Map
import Data.Ratio
import Data.Word
import Numeric
import Text.ParserCombinators.Parsec hiding (spaces)
import Text.Parsec.Language
import qualified Text.Parsec.Token as P
#if __GLASGOW_HASKELL__ >= 702
import Data.Functor.Identity (Identity)
import Text.Parsec.Prim (ParsecT)
#endif
lispDef :: LanguageDef ()
lispDef
= emptyDef
{ P.commentStart = "#|"
, P.commentEnd = "|#"
, P.commentLine = ";"
, P.nestedComments = True
, P.identStart = letter <|> symbol
, P.identLetter = letter <|> digit <|> symbol
, P.reservedNames = []
, P.caseSensitive = True
}
#if __GLASGOW_HASKELL__ >= 702
lexer :: P.GenTokenParser String () Data.Functor.Identity.Identity
#endif
lexer = P.makeTokenParser lispDef
#if __GLASGOW_HASKELL__ >= 702
dot :: ParsecT String () Identity String
#endif
dot = P.dot lexer
#if __GLASGOW_HASKELL__ >= 702
parens :: ParsecT String () Identity a -> ParsecT String () Identity a
#endif
parens = P.parens lexer
#if __GLASGOW_HASKELL__ >= 702
brackets :: ParsecT String () Identity a -> ParsecT String () Identity a
#endif
brackets = P.brackets lexer
#if __GLASGOW_HASKELL__ >= 702
identifier :: ParsecT String () Identity String
#endif
identifier = P.identifier lexer
#if __GLASGOW_HASKELL__ >= 702
whiteSpace :: ParsecT String () Identity ()
#endif
whiteSpace = P.whiteSpace lexer
#if __GLASGOW_HASKELL__ >= 702
lexeme :: ParsecT String () Identity a -> ParsecT String () Identity a
#endif
lexeme = P.lexeme lexer
symbol :: Parser Char
symbol = oneOf "!$%&|*+-/:<=>?@^_~."
parseAtom :: Parser LispVal
parseAtom = do
atom <- identifier
if atom == "."
then pzero
else return $ Atom atom
parseBool :: Parser LispVal
parseBool = do _ <- string "#"
x <- oneOf "tf"
return $ case x of
't' -> Bool True
'f' -> Bool False
_ -> Bool False
parseChar :: Parser LispVal
parseChar = do
_ <- try (string "#\\")
c <- anyChar
r <- many (letter <|> digit)
let pchr = c : r
case pchr of
"space" -> return $ Char ' '
"newline" -> return $ Char '\n'
"alarm" -> return $ Char '\a'
"backspace" -> return $ Char '\b'
"delete" -> return $ Char '\DEL'
"escape" -> return $ Char '\ESC'
"null" -> return $ Char '\0'
"return" -> return $ Char '\n'
"tab" -> return $ Char '\t'
_ -> case (c : r) of
[ch] -> return $ Char ch
('x' : hexs) -> do
rv <- parseHexScalar hexs
return $ Char rv
_ -> pzero
parseOctalNumber :: Parser LispVal
parseOctalNumber = do
_ <- try (string "#o")
sign <- many (oneOf "-")
num <- many1 (oneOf "01234567")
case (length sign) of
0 -> return $ Number $ fst $ head (Numeric.readOct num)
1 -> return $ Number $ fromInteger $ (*) (1) $ fst $ head (Numeric.readOct num)
_ -> pzero
parseBinaryNumber :: Parser LispVal
parseBinaryNumber = do
_ <- try (string "#b")
sign <- many (oneOf "-")
num <- many1 (oneOf "01")
case (length sign) of
0 -> return $ Number $ fst $ head (Numeric.readInt 2 (`elem` "01") DC.digitToInt num)
1 -> return $ Number $ fromInteger $ (*) (1) $ fst $ head (Numeric.readInt 2 (`elem` "01") DC.digitToInt num)
_ -> pzero
parseHexNumber :: Parser LispVal
parseHexNumber = do
_ <- try (string "#x")
sign <- many (oneOf "-")
num <- many1 (digit <|> oneOf "abcdefABCDEF")
case (length sign) of
0 -> return $ Number $ fst $ head (Numeric.readHex num)
1 -> return $ Number $ fromInteger $ (*) (1) $ fst $ head (Numeric.readHex num)
_ -> pzero
parseDecimalNumber :: Parser LispVal
parseDecimalNumber = do
_ <- try (many (string "#d"))
sign <- many (oneOf "-")
num <- many1 digit
if (length sign) > 1
then pzero
else return $ (Number . read) $ sign ++ num
parseDecimalNumberMaybeExponent :: Parser LispVal
parseDecimalNumberMaybeExponent = do
num <- parseDecimalNumber
parseNumberExponent num
parseNumber :: Parser LispVal
parseNumber = parseDecimalNumberMaybeExponent <|>
parseHexNumber <|>
parseBinaryNumber <|>
parseOctalNumber <?>
"Unable to parse number"
parseRealNumber :: Parser LispVal
parseRealNumber = do
sign <- many (oneOf "-+")
num <- many digit
_ <- char '.'
frac <- many1 digit
let dec = if not (null num)
then num ++ "." ++ frac
else "0." ++ frac
f <- case (length sign) of
0 -> return $ Float $ fst $ head (Numeric.readFloat dec)
1 -> if sign == "-"
then return $ Float $ (*) (1.0) $ fst $ head (Numeric.readFloat dec)
else return $ Float $ fst $ head (Numeric.readFloat dec)
_ -> pzero
parseNumberExponent f
parseNumberExponent :: LispVal -> Parser LispVal
parseNumberExponent n = do
expnt <- many $ oneOf "Ee"
case (length expnt) of
0 -> return n
1 -> do
num <- try parseDecimalNumber
case num of
Number nexp -> buildResult n nexp
_ -> pzero
_ -> pzero
where
buildResult (Number num) nexp = return $ Float $ (fromIntegral num) * (10 ** (fromIntegral nexp))
buildResult (Float num) nexp = return $ Float $ num * (10 ** (fromIntegral nexp))
buildResult _ _ = pzero
parseRationalNumber :: Parser LispVal
parseRationalNumber = do
pnumerator <- parseDecimalNumber
case pnumerator of
Number n -> do
_ <- char '/'
sign <- many (oneOf "-")
num <- many1 digit
if (length sign) > 1
then pzero
else do
let pdenominator = read $ sign ++ num
if pdenominator == 0
then return $ Number 0
else return $ Rational $ n % pdenominator
_ -> pzero
parseComplexNumber :: Parser LispVal
parseComplexNumber = do
lispreal <- (try parseRealNumber <|> try parseRationalNumber <|> parseDecimalNumber)
let real = case lispreal of
Number n -> fromInteger n
Rational r -> fromRational r
Float f -> f
_ -> 0
_ <- char '+'
lispimag <- (try parseRealNumber <|> try parseRationalNumber <|> parseDecimalNumber)
let imag = case lispimag of
Number n -> fromInteger n
Rational r -> fromRational r
Float f -> f
_ -> 0
_ <- char 'i'
return $ Complex $ real :+ imag
parseEscapedChar :: forall st .
GenParser Char st Char
parseEscapedChar = do
_ <- char '\\'
c <- anyChar
case c of
'a' -> return '\a'
'b' -> return '\b'
'n' -> return '\n'
't' -> return '\t'
'r' -> return '\r'
'x' -> do
num <- many $ letter <|> digit
_ <- char ';'
parseHexScalar num
_ -> return c
parseHexScalar :: Monad m => String -> m Char
parseHexScalar num = do
let ns = Numeric.readHex num
case ns of
[] -> fail $ "Unable to parse hex value " ++ show num
_ -> return $ DC.chr $ fst $ head ns
parseString :: Parser LispVal
parseString = do
_ <- char '"'
x <- many (parseEscapedChar <|> noneOf "\"")
_ <- char '"'
return $ String x
parseVector :: Parser LispVal
parseVector = do
vals <- sepBy parseExpr whiteSpace
return $ Vector (listArray (0, (length vals 1)) vals)
parseByteVector :: Parser LispVal
parseByteVector = do
ns <- sepBy parseNumber whiteSpace
return $ ByteVector $ BS.pack $ map conv ns
where
conv (Number n) = fromInteger n :: Word8
conv _ = 0 :: Word8
parseHashTable :: Parser LispVal
parseHashTable = do
let f :: [(LispVal, LispVal)] -> [LispVal] -> Maybe [(LispVal, LispVal)]
f acc [] = Just acc
f acc (List [a, b] :ls) = f (acc ++ [(a, b)]) ls
f acc (DottedList [a] b :ls) = f (acc ++ [(a, b)]) ls
f _ (_:_) = Nothing
vals <- sepBy parseExpr whiteSpace
let mvals = f [] vals
case mvals of
Just m -> return $ HashTable $ Data.Map.fromList m
Nothing -> pzero
parseList :: Parser LispVal
parseList = liftM List $ sepBy parseExpr whiteSpace
parseDottedList :: Parser LispVal
parseDottedList = do
phead <- endBy parseExpr whiteSpace
case phead of
[] -> pzero
_ -> do
ptail <- dot >> parseExpr
case ptail of
DottedList ls l -> return $ DottedList (phead ++ ls) l
List (Atom "unquote" : _) -> return $ DottedList phead ptail
List ls -> return $ List $ phead ++ ls
_ -> return $ DottedList phead ptail
parseQuoted :: Parser LispVal
parseQuoted = do
_ <- lexeme $ char '\''
x <- parseExpr
return $ List [Atom "quote", x]
parseQuasiQuoted :: Parser LispVal
parseQuasiQuoted = do
_ <- lexeme $ char '`'
x <- parseExpr
return $ List [Atom "quasiquote", x]
parseUnquoted :: Parser LispVal
parseUnquoted = do
_ <- try (lexeme $ char ',')
x <- parseExpr
return $ List [Atom "unquote", x]
parseUnquoteSpliced :: Parser LispVal
parseUnquoteSpliced = do
_ <- try (lexeme $ string ",@")
x <- parseExpr
return $ List [Atom "unquote-splicing", x]
parseExpr :: Parser LispVal
parseExpr =
try (lexeme parseComplexNumber)
<|> try (lexeme parseRationalNumber)
<|> try (lexeme parseRealNumber)
<|> try (lexeme parseNumber)
<|> lexeme parseChar
<|> parseUnquoteSpliced
<|> do _ <- try (lexeme $ string "#(")
x <- parseVector
_ <- lexeme $ char ')'
return x
<|> do _ <- try (lexeme $ string "#u8(")
x <- parseByteVector
_ <- lexeme $ char ')'
return x
<|> try parseAtom
<|> lexeme parseString
<|> lexeme parseBool
<|> parseQuoted
<|> parseQuasiQuoted
<|> parseUnquoted
<|> try (parens parseList)
<|> parens parseDottedList
<|> try (brackets parseList)
<|> brackets parseDottedList
<?> "Expression"
mainParser :: Parser LispVal
mainParser = do
_ <- whiteSpace
parseExpr
readOrThrow :: Parser a -> String -> ThrowsError a
readOrThrow parser input = case parse parser "lisp" input of
Left err -> throwError $ Parser err
Right val -> return val
readExpr :: String -> ThrowsError LispVal
readExpr = readOrThrow mainParser
readExprList :: String -> ThrowsError [LispVal]
readExprList = readOrThrow (endBy mainParser whiteSpace)