{-# LANGUAGE CPP #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE TupleSections #-} -- | -- Module : Text.Numerals.Algorithm -- Description : A module that contains functions to construct algorithmic conversions from numbers to words. -- Maintainer : hapytexeu+gh@gmail.com -- Stability : experimental -- Portability : POSIX -- -- A module that contains data types and functions to automatically convert a number to words. It has tooling for a 'NumeralsAlgorithm' -- as well as a 'HighNumberAlgorithm' that is used to generate a 'ShortScale' or 'LongScale'. module Text.Numerals.Algorithm ( -- * Data types for number algorithms NumeralsAlgorithm, numeralsAlgorithm, -- * Large number algorithms HighNumberAlgorithm (ShortScale, LongScale), shortScale, longScale, shortScaleTitle, longScaleTitle, valueSplit', -- * Conversion to a 'NumberSegment' toSegments, toSegmentLow, toSegmentMid, toSegmentHigh, -- * Segment compression compressSegments, ) where import Control.DeepSeq (NFData) import Data.Data (Data) import Data.Default.Class (Default (def)) import Data.Foldable (toList) import Data.List (sortOn) #if __GLASGOW_HASKELL__ < 803 import Data.Semigroup((<>)) #endif import Data.Text (Text, cons, toTitle) import Data.Vector (Vector, fromList, (!), (!?)) import qualified Data.Vector as V import GHC.Generics (Generic) import Test.QuickCheck (oneof) import Test.QuickCheck.Arbitrary (Arbitrary (arbitrary, shrink)) import Text.Numerals.Class ( ClockText, FreeMergerFunction, FreeNumberToWords, FreeValueSplitter, MNumberSegment, MergerFunction, NumToWord (toCardinal, toOrdinal, toShortOrdinal, toTimeText'), NumberSegment (NumberSegment), NumberSegmenting, ValueSplit (valueSplit), ValueSplitter, toClockSegment, toDaySegment, ) import Text.Numerals.Internal (_genText, _iLogFloor, _shrinkText, _thousand) import Text.Numerals.Prefix (latinPrefixes) -- | A data type for algorithmic number to word conversions. Most western -- languages /likely/ can work with this data type. data NumeralsAlgorithm = NumeralsAlgorithm { -- | The word used as prefix to denote negative numbers. minusWord :: Text, -- | The word used to denote /one/ in the language. oneWord :: Text, -- | A 'Vector' of small numbers, the first item is the word for /two/ and each successor is the word for the next number. lowWords :: Vector Text, -- | A list of 2-tuples where the first item contains the value, and the second the corresponding word, the values are ordered in descending value order. midWords :: [(Integer, Text)], -- | A function that is used to generate words for large values (greater than or equal to one /million/), often constructed with the /short scale/ or /long scale/. highWords :: FreeValueSplitter, -- | A function that specifies how to merge words based on the grammar of that specific language. mergeFunction :: FreeMergerFunction, -- | A function to conver the /cardinal/ form of a number in an /ordinal/ one. ordinize :: Text -> Text, -- | A function that converts a number to its /short ordinal/ form. shortOrdinal :: FreeNumberToWords, -- | A function that converts the clock segment and day segment to a /Text/ that describes the time of the day in words. clockText :: ClockText } instance NumToWord NumeralsAlgorithm where toCardinal NumeralsAlgorithm {minusWord = _minusWord, oneWord = _oneWord, lowWords = _lowWords, midWords = _midWords, highWords = _highWords, mergeFunction = _mergeFunction} = cardinal where cardinal i | i < 0 = _minusWord <> cons ' ' (go (-j)) | otherwise = go j where go = compressSegments _oneWord _mergeFunction . toSegments _lowWords _midWords _highWords j = fromIntegral i :: Integer toOrdinal na@NumeralsAlgorithm {ordinize = _ordinize} = _ordinize . toCardinal na toShortOrdinal = shortOrdinal toTimeText' alg h m = clockText alg (toClockSegment m) (toDaySegment h) h m _toNumberScale :: (Integral i, Integral j) => i -> (j, i) _toNumberScale i = (l, k) where ~(_, l, k) = _iLogFloor _thousand i -- | A data type used for to map larger numbers to words. This data type -- supports the /short scale/ and /long scale/ with /Latin/ prefixes, and -- custom suffixes. The 'Default' value is the /short scale/ with /illion/ -- as suffix. This is used in /English/ for large numbers. data HighNumberAlgorithm = ShortScale Text | LongScale Text Text deriving (Data, Eq, Generic, Ord, Read, Show) instance NFData HighNumberAlgorithm instance Arbitrary HighNumberAlgorithm where arbitrary = oneof [ShortScale <$> _genText, LongScale <$> _genText <*> _genText] shrink (ShortScale t) = ShortScale <$> _shrinkText t shrink (LongScale ta tb) = ((`LongScale` tb) <$> _shrinkText ta) <> (LongScale ta <$> _shrinkText tb) instance Default HighNumberAlgorithm where def = ShortScale "illion" -- | Construct a 'FreeValueSplitter' function for the given suffix for a /short scale/. shortScale :: Text -> FreeValueSplitter shortScale = valueSplit . ShortScale -- | Construct a 'FreeValueSplitter' function for the given suffixes for a /long scale/. longScale :: Text -> Text -> FreeValueSplitter longScale suf1 = valueSplit . LongScale suf1 -- | Construct a 'FreeValueSplitter' function for the given suffix for a /short scale/, the names are written in /title case/. shortScaleTitle :: Text -> FreeValueSplitter shortScaleTitle = valueSplit' toTitle . ShortScale -- | Construct a 'FreeValueSplitter' function for the given suffixes for a /long scale/, the names are written in /title case/. longScaleTitle :: Text -> Text -> FreeValueSplitter longScaleTitle suf1 = valueSplit' toTitle . LongScale suf1 _highWithSuffix :: Text -> Int -> Maybe Text _highWithSuffix suf = fmap (<> suf) . (latinPrefixes !?) _highToText :: HighNumberAlgorithm -> Int -> Maybe Text _highToText (ShortScale suf) j = _highWithSuffix suf j _highToText (LongScale suf1 suf2) j | even j = _highWithSuffix suf1 k | otherwise = _highWithSuffix suf2 k where k = div j 2 -- | Generate a /value splitter/ for a 'HighNumberAlgorithm' but where the result -- is post-processed by a function. valueSplit' :: -- | The post-processing function. (Text -> Text) -> -- | The 'HighNumberAlgorithm' that is used. HighNumberAlgorithm -> -- | The 'FreeValueSplitter' result. FreeValueSplitter valueSplit' f vs i = (m,) . f <$> _highToText vs (j - 2) where ~(j, m) = _toNumberScale i instance ValueSplit HighNumberAlgorithm where valueSplit = valueSplit' id -- | A /smart constructor/ for the 'NumeralsAlgorithm' type. This constructor -- allows one to use an arbitrary 'Foldable' type for the low words and mid -- words. It will also order the midwords accordingly. numeralsAlgorithm :: (Foldable f, Foldable g) => Text -> Text -> Text -> f Text -> g (Integer, Text) -> FreeValueSplitter -> FreeMergerFunction -> (Text -> Text) -> FreeNumberToWords -> ClockText -> NumeralsAlgorithm numeralsAlgorithm minus zero one _lowWords _midWords = NumeralsAlgorithm minus one (fromList (zero : one : toList _lowWords)) (sortOn (negate . fst) (toList _midWords)) _maybeSegment :: Integral i => (i -> NumberSegment i) -> i -> MNumberSegment i _maybeSegment f = go where go 0 = Nothing go i = Just (f i) -- | Convert the given number to a 'NumberSegment' with the given 'Vector' of -- low numbers. Mid words and large numbers are not taken into account. This -- is often the next step after the 'toSegmentMid'. toSegmentLow :: Integral i => -- | A 'Vector' of low words. Vector Text -> -- | The function that maps the number to the 'NumberSegment'. NumberSegmenting i toSegmentLow vs = go where go i | i >= nvs = NumberSegment (Just (go dv)) nvs lv (tl md) | otherwise = NumberSegment Nothing i (vs ! fromIntegral i) Nothing where (dv, md) = divMod i nvs lv = V.last vs nvs = fromIntegral (V.length vs) - 1 tl = _maybeSegment go _splitRecurse :: Integral i => (i -> NumberSegment i) -> (i -> NumberSegment i) -> i -> Text -> i -> NumberSegment i _splitRecurse f g im v j = NumberSegment hd im v (_maybeSegment g md) where hd | dv == 1 = Nothing | otherwise = Just (f dv) ~(dv, md) = divMod j im -- | Convert the given number to a 'NumberSegment' with the given 'Vector' of -- low numbers, and the /sorted/ list of mid numbers. Large numbers are not -- taken into account. This is often the next step after the 'toSegmentHigh'. toSegmentMid :: Integral i => -- | A 'Vector' of low words. Vector Text -> -- | The list of name and the names of these numbers in /descending/ order for the mid words. [(Integer, Text)] -> -- | The function that maps the number to the 'NumberSegment'. NumberSegmenting i toSegmentMid lows = go where go [] n = toSegmentLow lows n go ma@((m, v) : ms) n | im > n = goms n | otherwise = _splitRecurse (go ma) goms im v n where im = fromIntegral m goms = go ms -- | Convert the given number to a 'NumberSegment' with the given 'Vector' of -- low numbers, the /sorted/ list of mid numbers, and a 'FreeValueSplitter' for -- large numbers. toSegmentHigh :: Integral i => -- | A 'Vector' of low words. Vector Text -> -- | The list of name and the names of these numbers in /descending/ order for the mid words. [(Integer, Text)] -> -- | The 'ValueSplitter' used for large numbers, likely a splitter from a /short scale/ or /long scale/. ValueSplitter i -> -- | The function that maps the number to the 'NumberSegment'. NumberSegmenting i toSegmentHigh lows mids highs = go where go v | Just (i, t) <- highs v = _splitRecurse go go i t v | otherwise = toSegmentMid lows mids v -- | Convert the given number to a 'NumberSegment' with the given 'Vector' of -- low numbers, the /sorted/ list of mid numbers, and a 'FreeValueSplitter' for -- large numbers. toSegments :: Integral i => -- | A 'Vector' of low words. Vector Text -> -- | The list of name and the names of these numbers in /descending/ order for the mid words. [(Integer, Text)] -> -- | The 'ValueSplitter' used for large numbers, likely a splitter from a /short scale/ or /long scale/. ValueSplitter i -> -- | The function that maps the number to the 'NumberSegment'. NumberSegmenting i toSegments = toSegmentHigh -- | Use the given 'MergerFunction' to compress the 'NumberSegment' to a single -- 'Text' object that represents the given number. compressSegments :: Integral i => -- | The value used for /one/ in the specific language. Text -> -- | The 'MergerFunction' for the specific language that implements the grammar rules how to merge values. MergerFunction i -> -- | The given 'NumberSegment' value to turn into a 'Text' object. NumberSegment i -> -- | The 'Text' object that contains the name of the number stored in the 'NumberSegment'. Text compressSegments one' merger = snd . go where go (NumberSegment dv' i t md') = _mergeTail md' (dvi * i, merger dvi i dv t) where (dvi, dv) = _unwrap dv' _unwrap = maybe (1, one') go _mergeTail Nothing r = r _mergeTail (Just md') (vi, v) = (vi + mdi, merger vi mdi v md) where (mdi, md) = go md'