{- | Module : Text/ParserCombinators/Parsec/Number.hs Description : portable number parsers Copyright : (c) C. Maeder 2011 License : BSD Maintainer : chr.maeder@web.de Stability : provisional Portability : portable adjusted and portable number parsers stolen from Text.ParserCombinators.Parsec.Token The basic top-level number parsers are 'decimal', 'nat', 'int', 'fractional', 'decimalFract', 'natFract', 'floating', 'decimalFloat', 'natFloat'. `natFloat` parses numeric literals as defined for Haskell. All numbers are unsigned, i.e. non-negative. Leading zeros are allowed. At least a single digit is required. A decimal point must be preceded and followed by at least one digit. A result type @(Either Integer Double)@ can be converted to a final @Double@ using @(either fromInteger id)@ as is done for the parsers 'fractional2' and 'floating2'. The parser 'nat', 'natFract' and 'natFloat' parse hexadecimal and octal integrals (beginning with @0x@, @0X@, @0o@ or @0O@) that are disallowed when using 'decimal', 'decimalFract' and 'decimalFloat'. The parsers 'decimalFract' and 'natFract' only allow a decimal point, whereas 'decimalFloat' and 'natFloat' also allow the exponent notation using @e@ or @E@. The parser 'fractional' requires a decimal point between at least two digits and 'floating' requires either a decimal point or the exponent notation using @e@ or @E@. (Both parsers do not return integral values and do not support hexadecimal or octal values). Signed numbers can be parsed using \"'Control.Monad.ap' 'sign'\" as is done for the 'int' parser. A couple of parsers have been added that take a @Bool@ argument, where @False@ does not require any digit following the decimal dot. The parsers 'fractional3' and 'floating3' allow even to start a number with the decimal dot. Also parsers 'hexFract' and 'hexFloat' for hexadecimal fractions and floats have been added. Note that most top-level parsers succeed on a string like \"@1.0e-100@\", but only the floating point parsers consume the whole string. The fractional parsers stop before the exponent and the integral parsers before the decimal point. You may wish to check for the end of a string using 'Text.ParserCombinators.Parsec.eof', i.e. \"@liftM2 const nat eof@\". The returned values may be inaccurate. 'Int' may overflow. Fractional numbers should be accurate as only one division is performed. Floating point numbers with decimal exponents may be inaccurate due to using '**'. Rational numbers are needed for correct conversions, but large positive or negative exponents may be a problem and the class `RealFloat` is needed to check for minimal and maximal exponents. -} module Text.ParserCombinators.Parsec.Number where import Text.ParserCombinators.Parsec import Data.Char (digitToInt) import Control.Monad (liftM, ap) -- * floats -- | parse a decimal unsigned floating point number containing a dot, e or E floating :: Floating f => CharParser st f floating = do n <- decimal fractExponent n -- | parse a floating point number possibly containing a decimal dot, e or E floating2 :: Floating f => Bool -> CharParser st f floating2 = liftM (either fromInteger id) . decFloat {- | parse a floating point number possibly starting with a decimal dot. Note, that a single decimal point or a number starting with @.E@ is illegal. -} floating3 :: Floating f => Bool -> CharParser st f floating3 b = genFractAndExp 0 (fraction True) exponentFactor <|> floating2 b {- | same as 'floating' but returns a non-negative integral wrapped by Left if a fractional part and exponent is missing -} decimalFloat :: (Integral i, Floating f) => CharParser st (Either i f) decimalFloat = decFloat True {- | same as 'floating' but returns a non-negative integral wrapped by Left if a fractional part and exponent is missing -} decFloat :: (Integral i, Floating f) => Bool -> CharParser st (Either i f) decFloat b = do n <- decimal option (Left n) $ liftM Right $ fractExp (toInteger n) b -- | parse a hexadecimal floating point number hexFloat :: (Integral i, Floating f) => Bool -> CharParser st (Either i f) hexFloat b = do n <- hexnum option (Left n) $ liftM Right $ hexFractExp (toInteger n) b -- | parse hexadecimal, octal or decimal integrals or 'floating' natFloat :: (Integral i, Floating f) => CharParser st (Either i f) natFloat = (char '0' >> zeroNumFloat) <|> decimalFloat -- ** float parts {- | parse any hexadecimal, octal, decimal or floating point number following a zero -} zeroNumFloat :: (Integral i, Floating f) => CharParser st (Either i f) zeroNumFloat = liftM Left hexOrOct <|> decimalFloat <|> liftM Right (fractExponent 0) <|> return (Left 0) -- | parse a floating point number given the number before a dot, e or E fractExponent :: Floating f => Integer -> CharParser st f fractExponent i = fractExp i True -- | parse a hex floating point number given the number before a dot or p hexFractExp :: Floating f => Integer -> Bool -> CharParser st f hexFractExp i b = genFractExp i (hexFraction b) hexExponentFactor -- | parse a floating point number given the number before a dot, e or E fractExp :: Floating f => Integer -> Bool -> CharParser st f fractExp i b = genFractExp i (fraction b) exponentFactor {- | parse a floating point number given the number before the fraction and exponent -} genFractExp :: Floating f => Integer -> CharParser st f -> CharParser st (f -> f) -> CharParser st f genFractExp i frac expo = case fromInteger i of f -> genFractAndExp f frac expo <|> liftM ($ f) expo {- | parse a floating point number given the number before the fraction and exponent that must follow the fraction -} genFractAndExp :: Floating f => f -> CharParser st f -> CharParser st (f -> f) -> CharParser st f genFractAndExp f frac = ap (liftM (flip id . (f +)) frac) . option id -- | parse a floating point exponent starting with e or E exponentFactor :: Floating f => CharParser st (f -> f) exponentFactor = oneOf "eE" >> extExponentFactor 10 "exponent" -- | pare a hexadecimal floating point starting with p (IEEE 754) hexExponentFactor :: Floating f => CharParser st (f -> f) hexExponentFactor = char 'p' >> extExponentFactor 2 "hex-exponent" {- | parse a signed decimal and compute the exponent factor given a base. For hexadecimal exponential notation (IEEE 754) the base is 2 and the leading character a p. -} extExponentFactor :: Floating f => Int -> CharParser st (f -> f) extExponentFactor base = liftM (flip (*) . exponentValue base) (ap sign (decimal "exponent")) {- | compute the factor given by the number following e or E. This implementation uses @**@ rather than @^@ for more efficiency for large integers. -} exponentValue :: Int -> Floating f => Integer -> f exponentValue base = (fromIntegral base **) . fromInteger -- * fractional numbers (with just a decimal point between digits) -- | parse a fractional number containing a decimal dot fractional :: Fractional f => CharParser st f fractional = do n <- decimal fractFract n True -- | parse a fractional number possibly containing a decimal dot fractional2 :: Fractional f => Bool -> CharParser st f fractional2 = liftM (either fromInteger id) . decFract -- | parse a fractional number possibly starting with a decimal dot fractional3 :: Fractional f => Bool -> CharParser st f fractional3 b = fractFract 0 True <|> fractional2 b -- | a decimal fractional decFract :: (Integral i, Fractional f) => Bool -> CharParser st (Either i f) decFract b = do n <- decimal option (Left n) $ liftM Right $ fractFract (toInteger n) b -- | a hexadecimal fractional hexFract :: (Integral i, Fractional f) => Bool -> CharParser st (Either i f) hexFract b = do n <- hexnum option (Left n) $ liftM Right $ genFractFract (toInteger n) $ hexFraction b {- | same as 'fractional' but returns a non-negative integral wrapped by Left if a fractional part is missing -} decimalFract :: (Integral i, Fractional f) => CharParser st (Either i f) decimalFract = decFract True -- | parse hexadecimal, octal or decimal integrals or 'fractional' natFract :: (Integral i, Fractional f) => CharParser st (Either i f) natFract = (char '0' >> zeroNumFract) <|> decimalFract {- | parse any hexadecimal, octal, decimal or fractional number following a zero -} zeroNumFract :: (Integral i, Fractional f) => CharParser st (Either i f) zeroNumFract = liftM Left hexOrOct <|> decimalFract <|> liftM Right (fractFract 0 True) <|> return (Left 0) -- ** fractional parts -- | parse a fractional number given the number before the dot fractFract :: Fractional f => Integer -> Bool -> CharParser st f fractFract i = genFractFract i . fraction {- | combine the given number before the dot with a parser for the fractional part -} genFractFract :: Fractional f => Integer -> CharParser st f -> CharParser st f genFractFract i = liftM (fromInteger i +) -- | parse a dot followed by decimal digits as fractional part fraction :: Fractional f => Bool -> CharParser st f fraction b = baseFraction b 10 digit -- | parse a dot followed by hexadecimal digits as fractional part hexFraction :: Fractional f => Bool -> CharParser st f hexFraction b = baseFraction b 16 hexDigit -- | parse a dot followed by base dependent digits as fractional part baseFraction :: Fractional f => Bool -> Int -> CharParser st Char -> CharParser st f baseFraction requireDigit base baseDigit = char '.' >> liftM (fractionValue base) ((if requireDigit then many1 else many) baseDigit "fraction") "fraction" {- | compute the fraction given by a sequence of digits following the dot. Only one division is performed and trailing zeros are ignored. -} fractionValue :: Fractional f => Int -> String -> f fractionValue base = uncurry (/) . foldl (\ (s, p) d -> (p * fromIntegral (digitToInt d) + s, p * fromIntegral base)) (0, 1) . dropWhile (== '0') . reverse -- * integers and naturals {- | parse an optional 'sign' immediately followed by a 'nat'. Note, that in Daan Leijen's code the sign was wrapped as lexeme in order to skip comments and spaces in between. -} int :: Integral i => CharParser st i int = ap sign nat -- | parse an optional plus or minus sign, returning 'negate' or 'id' sign :: Num a => CharParser st (a -> a) sign = (char '-' >> return negate) <|> (optional (char '+') >> return id) {- | parse plain non-negative decimal numbers given by a non-empty sequence of digits -} decimal :: Integral i => CharParser st i decimal = number 10 digit -- | parse a binary number binary :: Integral i => CharParser st i binary = number 2 $ oneOf "01" -- | parse non-negative hexadecimal, octal or decimal numbers nat :: Integral i => CharParser st i nat = zeroNumber <|> decimal -- ** natural parts -- | parse a 'nat' syntactically starting with a zero zeroNumber :: Integral i => CharParser st i zeroNumber = char '0' >> (hexOrOct <|> decimal <|> return 0) "" -- | hexadecimal or octal number hexOrOct :: Integral i => CharParser st i hexOrOct = hexadecimal <|> octal -- | parse a hexadecimal number preceded by an x or X character hexadecimal :: Integral i => CharParser st i hexadecimal = oneOf "xX" >> hexnum -- | parse a hexadecimal number hexnum :: Integral i => CharParser st i hexnum = number 16 hexDigit -- | parse an octal number preceded by an o or O character octal :: Integral i => CharParser st i octal = oneOf "oO" >> number 8 octDigit -- | parse a non-negative number given a base and a parser for the digits number :: Integral i => Int -> GenParser tok st Char -> GenParser tok st i number base baseDigit = do n <- liftM (numberValue base) (many1 baseDigit) seq n (return n) -- | compute the value from a string of digits using a base numberValue :: Integral i => Int -> String -> i numberValue base = foldl (\ x -> (fromIntegral base * x +) . fromIntegral . digitToInt) 0