{-| Stability: experimental The functions here parse numbers in a variety of formats. Examples: >>> readSciRational "-0.0e+3" -- result: Just ((0 % 1) .^ 0) >>> readSciRational "0.25e+2" -- result: Just ((25 % 1) .^ 0) >>> readSciRational "-1.0e-1" -- result: Just (((-1) % 1) .^ (-1)) >>> readSciRational "5.0e+20/6.e0" -- result: Just ((25 % 3) .^ 19) >>> readSciRational "0xfeedface" -- result: Just ((4277009102 % 1) .^ 0) >>> readSciRational "1e99999999" -- result: Just ((1 % 1) .^ 99999999) All the @'ReadP'@ parsers here can be safely used with @'P.gather'@ as they do not contain any occurences of @'P.readS_to_P'@. For writing lexers, a simple regular expression to detect numbers would be: > [-+]? [.]? [0-9] [-+/.0-9a-zA-Z]* Note that this is more lenient than what the grammar of @'readNumber'@ accepts. If Unicode is supported, one can also include: * @U+23E8@ (DECIMAL EXPONENT SYMBOL) * @U+2044@ (FRACTION SLASH) and @U+2215@ (DIVISION SLASH) * @U+2212@ (MINUS SIGN) -} module Data.SciRatio.Read ( -- * Simple parsers readNumber , readSciRational , readSign -- * @'ReadP'@ parsers , readNumberP , readSciRationalP , readScientificP , readDecimalP , readIntegerP , readUnsignedP , readHexP , readDecP , readOctP , readBinP -- * Character predicates , isBinDigit , isDecimalExponentSymbol , isFractionSlash ) where import Control.Monad (ap, mzero) import Data.Char (isDigit, isHexDigit, isOctDigit, toLower) import Data.SciRatio (SciRational) import Text.ParserCombinators.ReadP (ReadP, (<++)) import Text.Read.Lex (readIntP) import qualified Text.ParserCombinators.ReadP as P -- | Read a number (see @'readNumberP'@). readNumber :: Fractional a => String -> Maybe a readNumber = runReadP readNumberP -- | Read a number (see @'readNumberP'@). readSciRational :: String -> Maybe SciRational readSciRational = runReadP readSciRationalP -- | Interpret a sign: -- -- > sign = [-+] -- -- Note: @U+2212@ (MINUS SIGN) is also accepted. readSign :: Num a => Char -> Maybe a readSign c = case c of '+' -> Just 1 '-' -> Just (-1) '\x2212' -> Just (-1) _ -> Nothing -- | Read a rational number in scientific notation: -- -- > number = [0] [bB] [0-1]+ -- > | [0] [oO] [0-7]+ -- > | [0] [xX] [0-9a-fA-F]+ -- > | scientific ( fraction_slash scientific )? -- readNumberP :: Fractional a => ReadP a readNumberP = readUnsignedP' <++ readRatioP readScientificP -- | Read a number (see @'readNumberP'@). readSciRationalP :: ReadP SciRational readSciRationalP = readNumberP -- | Read a ratio of two numbers, each read with the given parser. readRatioP :: Fractional a => ReadP a -- ^ Parser for the numerator and denominator. -> ReadP a readRatioP p = fmap (/) p `ap` ((P.satisfy isFractionSlash >> p) <++ return 1) -- | Read a decimal fraction in scientific notation: -- -- > scientific = decimal (decimal_exponent_symbol sign? dec)? -- readScientificP :: Fractional a => ReadP a readScientificP = readSignedP $ do r <- readDecimalP e <- (P.satisfy isDecimalExponentSymbol >> readIntegerP) <++ return 0 return (r * pow10 e) where pow10 = (10 ^^) :: Fractional a => Integer -> a -- | Read a decimal fraction: -- -- > decimal = sign? ( [0-9]+ [.]? [0-9]* -- > | [.] [0-9]+ ) -- readDecimalP :: Fractional a => ReadP a readDecimalP = readSignedP $ do intPart <- P.munch isDigit string <- P.look case string of '.' : _ -> do _ <- P.get fracPart <- P.munch isDigit if length intPart + length fracPart == 0 then mzero else return $ readInt' intPart + readInt' fracPart / 10 ^^ length fracPart _ -> case intPart of "" -> mzero _ -> return $ readInt' intPart where readInt' "" = 0 readInt' s = fromInteger (read s) -- | Read a signed integer in base 10. -- -- > integer = sign? [0-9]+ -- readIntegerP :: Num a => ReadP a readIntegerP = readSignedP readDecP -- | Read an unsigned number in either binary (@0b@), octal (@0o@), decimal, -- or hexadecimal (@0x@) format: -- -- > unsigned = [0] [bB] [0-1]+ -- > | [0] [oO] [0-7]+ -- > | [0] [xX] [0-9a-fA-F]+ -- > | dec -- readUnsignedP :: Num a => ReadP a readUnsignedP = readUnsignedP' <++ readDecP -- | Read an unsigned number in either binary (@0b@), octal (@0o@), or -- hexadecimal (@0x@) format, but /not/ in decimal format. The prefix is -- not case-sensitive. readUnsignedP' :: Num a => ReadP a readUnsignedP' = do _ <- P.char '0' prefixChar <- P.get fromInteger `fmap` case toLower prefixChar of 'b' -> readBinP 'o' -> readOctP 'x' -> readHexP _ -> mzero -- | Read a number preceded by an optional sign. readSignedP :: Num a => ReadP a -> ReadP a readSignedP p = do sign <- (<++ return 1) $ do c <- P.get case readSign c of Just x -> return x Nothing -> mzero num <- p return (sign * num) -- | Read an unsigned integer in hexadecimal notation: -- -- > hex = [0-9A-Fa-f]+ -- readHexP :: Num a => ReadP a readHexP = readIntP 16 isHexDigit digitToInt where digitToInt c = case fromEnum c of c' | c <= '9' -> c' - fromEnum '0' | c <= 'F' -> c' - (fromEnum 'A' - 10) | otherwise -> c' - (fromEnum 'a' - 10) -- | Read an unsigned integer in base 10. -- -- > dec = [0-9]+ -- -- Note: Although similar functions exist in @'Text.Read.Lex'@, the versions -- here do not require @'Eq'@. readDecP :: Num a => ReadP a readDecP = readIntP 10 isDigit digitToInt where digitToInt c = fromEnum c - fromEnum '0' -- | Read an unsigned integer in octal notation: -- -- > oct = [0-7]+ -- readOctP :: Num a => ReadP a readOctP = readIntP 8 isOctDigit digitToInt where digitToInt c = fromEnum c - fromEnum '0' -- | Read an unsigned integer in binary notation: -- -- > bin = [01]+ -- readBinP :: Num a => ReadP a readBinP = readIntP 2 isBinDigit digitToInt where digitToInt c = fromEnum c - fromEnum '0' -- | Run a @'ReadP'@ parser to the very end. Only a single, unique parse is -- accepted and it must consume the entire input. runReadP :: ReadP a -> String -> Maybe a runReadP p s = case P.readP_to_S p s of [(x, [])] -> Just x _ -> Nothing -- | Whether the character is a binary digit: -- -- > bin_digit = [01] -- isBinDigit :: Char -> Bool isBinDigit c = fromEnum '0' <= x && x <= fromEnum '1' where x = fromEnum c -- | Whether the character can be used as an exponent symbol in scientific -- notation: -- -- > decimal_exponent_symbol = [eE] -- -- Note: @U+23E8@ (DECIMAL EXPONENT SYMBOL) is also accepted. isDecimalExponentSymbol :: Char -> Bool isDecimalExponentSymbol = (`elem` "eE\x23e8") -- | Whether the character is a fraction slash: -- -- > fraction_slash = [/] -- -- Note: @U+2044@ (FRACTION SLASH) and @U+2215@ (DIVISION SLASH) are also -- accepted. isFractionSlash :: Char -> Bool isFractionSlash = (`elem` "/\x2044\x2215")