module Text.Parse
(
TextParser
, Parse(..)
, parseByRead
, readByParse
, readsPrecByParsePrec
, word
, isWord
, literal
, optionalParens
, parens
, field
, constructors
, enumeration
, parseSigned
, parseInt
, parseDec
, parseOct
, parseHex
, parseFloat
, parseLitChar
, parseLitChar'
, module Text.ParserCombinators.Poly
, allAsString
) where
import Data.Char as Char (isSpace,toLower,isUpper,isDigit,isOctDigit
,isHexDigit,digitToInt,isAlpha,isAlphaNum,ord,chr)
import Data.List (intersperse)
import Data.Ratio
import Text.ParserCombinators.Poly
type TextParser a = Parser Char a
class Parse a where
parse :: TextParser a
parse = parsePrec 0
parsePrec :: Int -> TextParser a
parsePrec _ = optionalParens parse
parseList :: TextParser [a]
parseList = do { isWord "[]"; return [] }
`onFail`
do { isWord "["; isWord "]"; return [] }
`onFail`
bracketSep (isWord "[") (isWord ",") (isWord "]")
(optionalParens parse)
`adjustErr` ("Expected a list, but\n"++)
parseByRead :: Read a => String -> TextParser a
parseByRead name =
P (\s-> case reads s of
[] -> Failure s ("no parse, expected a "++name)
[(a,s')] -> Success s' a
_ -> Failure s ("ambiguous parse, expected a "++name)
)
readByParse :: TextParser a -> ReadS a
readByParse p = \inp->
case runParser p inp of
(Left err, rest) -> []
(Right val, rest) -> [(val,rest)]
readsPrecByParsePrec :: (Int -> TextParser a) -> Int -> ReadS a
readsPrecByParsePrec p = \prec inp->
case runParser (p prec) inp of
(Left err, rest) -> []
(Right val, rest) -> [(val,rest)]
word :: TextParser String
word = P p
where
p "" = Failure "" "end of input"
p (c:s) | isSpace c = p (dropWhile isSpace s)
p ('\'':s) = let (P lit) = parseLitChar' in fmap show (lit ('\'':s))
p ('"':s) = lexString "\"" s
where lexString acc ('"':s) = Success s (reverse ('"':acc))
lexString acc [] = Failure [] ("end of input in "
++"string literal "
++acc)
lexString acc s = let (P lit) = parseLitChar
in case lit s of
Failure a b -> Failure a b
Success t c -> lexString (c:acc) t
p ('0':'x':s) = Success t ('0':'x':ds) where (ds,t) = span isHexDigit s
p ('0':'X':s) = Success t ('0':'X':ds) where (ds,t) = span isHexDigit s
p ('0':'o':s) = Success t ('0':'o':ds) where (ds,t) = span isOctDigit s
p ('0':'O':s) = Success t ('0':'O':ds) where (ds,t) = span isOctDigit s
p (c:s) | isSingle c = Success s [c]
| isSym c = let (sym,t) = span isSym s in Success t (c:sym)
| isIdInit c = let (nam,t) = span isIdChar s in Success t (c:nam)
| isDigit c = let (ds,t) = span isDigit s in
lexFracExp (c:ds) t
| otherwise = Failure (c:s) ("Bad character: "++show c)
where isSingle c = c `elem` ",;()[]{}`"
isSym c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
isIdInit c = isAlpha c || c == '_'
isIdChar c = isAlphaNum c || c `elem` "_'"
lexFracExp acc ('.':d:s) | isDigit d =
lexExp (acc++'.':d:ds) t
where (ds,t) = span isDigit s
lexFracExp acc s = lexExp acc s
lexExp acc (e:s) | e`elem`"eE" =
case s of
('+':d:t) | isDigit d ->
let (ds,u)=span isDigit t in
Success u (acc++"e+"++d:ds)
('-':d:t) | isDigit d ->
let (ds,u)=span isDigit t in
Success u (acc++"e-"++d:ds)
(d:t) |isDigit d ->
let (ds,u)=span isDigit t in
Success u (acc++"e"++d:ds)
_ -> Failure s ("missing +/-/digit "
++"after e in float "
++"literal: "
++show (acc++"e"++"..."))
lexExp acc s = Success s acc
oldword :: TextParser String
oldword = P (\s-> case lex s of
[] -> Failure s ("no input? (impossible)")
[("","")] -> Failure "" ("no input?")
[("",s')] -> Failure s ("lexing failed?")
((x,s'):_) -> Success s' x
)
isWord :: String -> TextParser String
isWord w = do { w' <- word
; if w'==w then return w else fail ("expected "++w++" got "++w')
}
literal :: String -> TextParser String
literal w = do { w' <- walk w
; if w'==w then return w else fail ("expected "++w++" got "++w')
}
where walk [] = return w
walk (c:cs) = do { x <- next
; if x==c then walk cs
else return []
}
optionalParens :: TextParser a -> TextParser a
optionalParens p = parens False p
parens :: Bool -> TextParser a -> TextParser a
parens True p = bracket (isWord "(") (isWord ")") (parens False p)
parens False p = parens True p `onFail` p
field :: Parse a => String -> TextParser a
field name = do { isWord name; commit $ do { isWord "="; parse } }
constructors :: [(String,TextParser a)] -> TextParser a
constructors cs = oneOf' (map cons cs)
where cons (name,p) =
( name
, do { isWord name
; p `adjustErrBad` (("got constructor, but within "
++name++",\n")++)
}
)
enumeration :: (Show a) => String -> [a] -> TextParser a
enumeration typ cs = oneOf (map (\c-> do { isWord (show c); return c }) cs)
`adjustErr`
(++("\n expected "++typ++" value ("++e++")"))
where e = concat (intersperse ", " (map show (init cs)))
++ ", or " ++ show (last cs)
parseSigned :: Real a => TextParser a -> TextParser a
parseSigned p = do '-' <- next; commit (fmap negate p)
`onFail`
do p
parseInt :: (Integral a) => String ->
a -> (Char -> Bool) -> (Char -> Int) ->
TextParser a
parseInt base radix isDigit digitToInt =
do cs <- many1 (satisfy isDigit)
return (foldl1 (\n d-> n*radix+d)
(map (fromIntegral.digitToInt) cs))
`adjustErr` (++("\nexpected one or more "++base++" digits"))
parseDec, parseOct, parseHex :: (Integral a) => TextParser a
parseDec = parseInt "decimal" 10 Char.isDigit Char.digitToInt
parseOct = parseInt "octal" 8 Char.isOctDigit Char.digitToInt
parseHex = parseInt "hex" 16 Char.isHexDigit Char.digitToInt
parseFloat :: (RealFrac a) => TextParser a
parseFloat = do ds <- many1 (satisfy isDigit)
frac <- (do '.' <- next
many (satisfy isDigit)
`adjustErrBad` (++"expected digit after .")
`onFail` return [] )
exp <- exponent `onFail` return 0
( return . fromRational . (* (10^^(exp - length frac)))
. (%1) . (\ (Right x)->x) . fst
. runParser parseDec ) (ds++frac)
`onFail`
do w <- many (satisfy (not.isSpace))
case map toLower w of
"nan" -> return (0/0)
"infinity" -> return (1/0)
_ -> fail "expected a floating point number"
where exponent = do 'e' <- fmap toLower next
commit (do '+' <- next; parseDec
`onFail`
parseSigned parseDec )
parseLitChar' :: TextParser Char
parseLitChar' = do '\'' <- next `adjustErr` (++"expected a literal char")
char <- parseLitChar
'\'' <- next `adjustErrBad` (++"literal char has no final '")
return char
parseLitChar :: TextParser Char
parseLitChar = do c <- next
char <- case c of
'\\' -> next >>= escape
'\'' -> fail "expected a literal char, got ''"
_ -> return c
return char
where
escape 'a' = return '\a'
escape 'b' = return '\b'
escape 'f' = return '\f'
escape 'n' = return '\n'
escape 'r' = return '\r'
escape 't' = return '\t'
escape 'v' = return '\v'
escape '\\' = return '\\'
escape '"' = return '"'
escape '\'' = return '\''
escape '^' = do ctrl <- next
if ctrl >= '@' && ctrl <= '_'
then return (chr (ord ctrl - ord '@'))
else fail ("literal char ctrl-escape malformed: \\^"
++[ctrl])
escape d | isDigit d
= fmap chr $ (reparse [d] >> parseDec)
escape 'o' = fmap chr $ parseOct
escape 'x' = fmap chr $ parseHex
escape c | isUpper c
= mnemonic c
escape c = fail ("unrecognised escape sequence in literal char: \\"++[c])
mnemonic 'A' = do 'C' <- next; 'K' <- next; return '\ACK'
`wrap` "'\\ACK'"
mnemonic 'B' = do 'E' <- next; 'L' <- next; return '\BEL'
`onFail`
do 'S' <- next; return '\BS'
`wrap` "'\\BEL' or '\\BS'"
mnemonic 'C' = do 'R' <- next; return '\CR'
`onFail`
do 'A' <- next; 'N' <- next; return '\CAN'
`wrap` "'\\CR' or '\\CAN'"
mnemonic 'D' = do 'E' <- next; 'L' <- next; return '\DEL'
`onFail`
do 'L' <- next; 'E' <- next; return '\DLE'
`onFail`
do 'C' <- next; ( do '1' <- next; return '\DC1'
`onFail`
do '2' <- next; return '\DC2'
`onFail`
do '3' <- next; return '\DC3'
`onFail`
do '4' <- next; return '\DC4' )
`wrap` "'\\DEL' or '\\DLE' or '\\DC[1..4]'"
mnemonic 'E' = do 'T' <- next; 'X' <- next; return '\ETX'
`onFail`
do 'O' <- next; 'T' <- next; return '\EOT'
`onFail`
do 'N' <- next; 'Q' <- next; return '\ENQ'
`onFail`
do 'T' <- next; 'B' <- next; return '\ETB'
`onFail`
do 'M' <- next; return '\EM'
`onFail`
do 'S' <- next; 'C' <- next; return '\ESC'
`wrap` "one of '\\ETX' '\\EOT' '\\ENQ' '\\ETB' '\\EM' or '\\ESC'"
mnemonic 'F' = do 'F' <- next; return '\FF'
`onFail`
do 'S' <- next; return '\FS'
`wrap` "'\\FF' or '\\FS'"
mnemonic 'G' = do 'S' <- next; return '\GS'
`wrap` "'\\GS'"
mnemonic 'H' = do 'T' <- next; return '\HT'
`wrap` "'\\HT'"
mnemonic 'L' = do 'F' <- next; return '\LF'
`wrap` "'\\LF'"
mnemonic 'N' = do 'U' <- next; 'L' <- next; return '\NUL'
`onFail`
do 'A' <- next; 'K' <- next; return '\NAK'
`wrap` "'\\NUL' or '\\NAK'"
mnemonic 'R' = do 'S' <- next; return '\RS'
`wrap` "'\\RS'"
mnemonic 'S' = do 'O' <- next; 'H' <- next; return '\SOH'
`onFail`
do 'O' <- next; return '\SO'
`onFail`
do 'T' <- next; 'X' <- next; return '\STX'
`onFail`
do 'I' <- next; return '\SI'
`onFail`
do 'Y' <- next; 'N' <- next; return '\SYN'
`onFail`
do 'U' <- next; 'B' <- next; return '\SUB'
`onFail`
do 'P' <- next; return '\SP'
`wrap` "'\\SOH' '\\SO' '\\STX' '\\SI' '\\SYN' '\\SUB' or '\\SP'"
mnemonic 'U' = do 'S' <- next; return '\US'
`wrap` "'\\US'"
mnemonic 'V' = do 'T' <- next; return '\VT'
`wrap` "'\\VT'"
wrap p s = p `onFail` fail ("expected literal char "++s)
instance Parse Int where
parse = fmap fromInteger $
do many (satisfy isSpace); parseSigned parseDec
instance Parse Integer where
parse = do many (satisfy isSpace); parseSigned parseDec
instance Parse Float where
parse = do many (satisfy isSpace); parseSigned parseFloat
instance Parse Double where
parse = do many (satisfy isSpace); parseSigned parseFloat
instance Parse Char where
parse = do many (satisfy isSpace); parseLitChar'
parseList = do { w <- word; if head w == '"' then return (init (tail w))
else fail "not a string" }
instance Parse Bool where
parse = enumeration "Bool" [False,True]
instance Parse Ordering where
parse = enumeration "Ordering" [LT,EQ,GT]
instance Parse () where
parse = P p
where p [] = Failure [] "no input: expected a ()"
p ('(':cs) = case dropWhile isSpace cs of
(')':s) -> Success s ()
_ -> Failure cs "Expected ) after ("
p (c:cs) | isSpace c = p cs
| otherwise = Failure (c:cs) ("Expected a (), got "++show c)
instance (Parse a, Parse b) => Parse (a,b) where
parse = do{ isWord "(" `adjustErr` ("Opening a 2-tuple\n"++)
; x <- parse `adjustErr` ("In 1st item of a 2-tuple\n"++)
; isWord "," `adjustErr` ("Separating a 2-tuple\n"++)
; y <- parse `adjustErr` ("In 2nd item of a 2-tuple\n"++)
; isWord ")" `adjustErr` ("Closing a 2-tuple\n"++)
; return (x,y) }
instance (Parse a, Parse b, Parse c) => Parse (a,b,c) where
parse = do{ isWord "(" `adjustErr` ("Opening a 3-tuple\n"++)
; x <- parse `adjustErr` ("In 1st item of a 3-tuple\n"++)
; isWord "," `adjustErr` ("Separating(1) a 3-tuple\n"++)
; y <- parse `adjustErr` ("In 2nd item of a 3-tuple\n"++)
; isWord "," `adjustErr` ("Separating(2) a 3-tuple\n"++)
; z <- parse `adjustErr` ("In 3rd item of a 3-tuple\n"++)
; isWord ")" `adjustErr` ("Closing a 3-tuple\n"++)
; return (x,y,z) }
instance Parse a => Parse (Maybe a) where
parsePrec p =
optionalParens (do { isWord "Nothing"; return Nothing })
`onFail`
parens (p>9) (do { isWord "Just"
; fmap Just $ parsePrec 10
`adjustErrBad` ("but within Just, "++) })
`adjustErr` (("expected a Maybe (Just or Nothing)\n"++).indent 2)
instance (Parse a, Parse b) => Parse (Either a b) where
parsePrec p =
parens (p>9) $
constructors [ ("Left", do { fmap Left $ parsePrec 10 } )
, ("Right", do { fmap Right $ parsePrec 10 } )
]
instance Parse a => Parse [a] where
parse = parseList
allAsString :: TextParser String
allAsString = P (\s-> Success [] s)