Copyright	(c) 2010 Bryan O'Sullivan
License	BSD-style
Maintainer	bos@serpentine.com
Stability	experimental
Portability	GHC
Safe Haskell	Safe-Inferred
Language	Haskell98

Data.Text.ICU

Contents

Data representation
Types
Locales
Boundary analysis
Case mapping
Iteration
Normalization
- Checks for normalization
String comparison
- Normalization-sensitive string comparison
- Locale-sensitive string collation
Regular expressions
Spoof checking
Calendars
- Construction
- Operations on calendars
Number formatting
Date formatting

Description

Commonly used functions for Unicode, implemented as bindings to the International Components for Unicode (ICU) libraries.

This module contains only the most commonly used types and functions. Other modules in this package expose richer interfaces.

Synopsis

data LocaleName
- = Root
- | Locale String
- | Current
availableLocales :: IO [String]
data Breaker a
data Break a
brkPrefix :: Break a -> Text
brkBreak :: Break a -> Text
brkSuffix :: Break a -> Text
brkStatus :: Break a -> a
data Line
- = Soft
- | Hard
data Word
- = Uncategorized
- | Number
- | Letter
- | Kana
- | Ideograph
breakCharacter :: LocaleName -> Breaker ()
breakLine :: LocaleName -> Breaker Line
breakSentence :: LocaleName -> Breaker ()
breakWord :: LocaleName -> Breaker Word
breaks :: Breaker a -> Text -> [Break a]
breaksRight :: Breaker a -> Text -> [Break a]
toCaseFold :: Bool -> Text -> Text
toLower :: LocaleName -> Text -> Text
toUpper :: LocaleName -> Text -> Text
data CharIterator
fromString :: String -> CharIterator
fromText :: Text -> CharIterator
fromUtf8 :: ByteString -> CharIterator
nfc :: Text -> Text
nfd :: Text -> Text
nfkc :: Text -> Text
nfkd :: Text -> Text
nfkcCasefold :: Text -> Text
quickCheck :: NormalizationMode -> Text -> Maybe Bool
isNormalized :: NormalizationMode -> Text -> Bool
data CompareOption
- = InputIsFCD
- | CompareIgnoreCase
- | FoldCaseExcludeSpecialI
compareUnicode :: [CompareOption] -> Text -> Text -> Ordering
data Collator
collator :: LocaleName -> Collator
collatorWith :: LocaleName -> [Attribute] -> Collator
collatorFromRules :: Text -> Either ParseError Collator
collatorFromRulesWith :: Text -> [Attribute] -> Either ParseError Collator
collate :: Collator -> Text -> Text -> Ordering
collateIter :: Collator -> CharIterator -> CharIterator -> Ordering
sortKey :: Collator -> Text -> ByteString
uca :: Collator
data MatchOption
- = CaseInsensitive
- | Comments
- | DotAll
- | Literal
- | Multiline
- | HaskellLines
- | UnicodeWord
- | ErrorOnUnknownEscapes
- | WorkLimit Int
- | StackLimit Int
data ParseError
data Match
data Regex
class Regular r
regex :: [MatchOption] -> Text -> Regex
regex' :: [MatchOption] -> Text -> Either ParseError Regex
pattern :: Regular r => r -> Text
find :: Regex -> Text -> Maybe Match
findAll :: Regex -> Text -> [Match]
groupCount :: Regular r => r -> Int
unfold :: (Int -> Match -> Maybe Text) -> Match -> [Text]
span :: Match -> Text
group :: Int -> Match -> Maybe Text
prefix :: Int -> Match -> Maybe Text
suffix :: Int -> Match -> Maybe Text
data Spoof
data SpoofParams = SpoofParams {
- spoofChecks :: Maybe [SpoofCheck]
- level :: Maybe RestrictionLevel
- locales :: Maybe [String]
}
data SpoofCheck
- = SingleScriptConfusable
- | MixedScriptConfusable
- | WholeScriptConfusable
- | AnyCase
- | RestrictionLevel
- | Invisible
- | CharLimit
- | MixedNumbers
- | AllChecks
- | AuxInfo
data RestrictionLevel
- = ASCII
- | SingleScriptRestrictive
- | HighlyRestrictive
- | ModeratelyRestrictive
- | MinimallyRestrictive
- | Unrestrictive
data SpoofCheckResult
- = CheckOK
- | CheckFailed [SpoofCheck]
- | CheckFailedWithRestrictionLevel {
  - failedChecks :: [SpoofCheck]
  - failedLevel :: RestrictionLevel
  }
spoof :: Spoof
spoofWithParams :: SpoofParams -> Spoof
spoofFromSource :: (ByteString, ByteString) -> SpoofParams -> Spoof
spoofFromSerialized :: ByteString -> SpoofParams -> Spoof
areConfusable :: Spoof -> Text -> Text -> SpoofCheckResult
spoofCheck :: Spoof -> Text -> SpoofCheckResult
getSkeleton :: Spoof -> Maybe SkeletonTypeOverride -> Text -> Text
getChecks :: Spoof -> [SpoofCheck]
getAllowedLocales :: Spoof -> [String]
getRestrictionLevel :: Spoof -> Maybe RestrictionLevel
serialize :: Spoof -> ByteString
data Calendar
data CalendarType
- = TraditionalCalendarType
- | DefaultCalendarType
- | GregorianCalendarType
data SystemTimeZoneType
- = AnyTimeZone
- | CanonicalTimeZone
- | CanonicalLocationTimeZone
data CalendarField
- = Era
- | Year
- | Month
- | WeekOfYear
- | WeekOfMonth
- | DayOfMonth
- | DayOfYear
- | DayOfWeek
- | DayOfWeekInMonth
- | AmPm
- | Hour
- | HourOfDay
- | Minute
- | Second
- | Millisecond
- | ZoneOffset
- | DstOffset
- | YearWoY
- | DoWLocal
- | ExtendedYear
- | JulianDay
- | MillisecondsInDay
- | IsLeapMonth
calendar :: Text -> LocaleName -> CalendarType -> IO Calendar
roll :: Calendar -> [(CalendarField, Int)] -> Calendar
add :: Calendar -> [(CalendarField, Int)] -> Calendar
set1 :: Calendar -> CalendarField -> Int -> Calendar
set :: Calendar -> [(CalendarField, Int)] -> Calendar
get :: Calendar -> CalendarField -> Int
data NumberFormatter
numberFormatter :: Text -> LocaleName -> IO NumberFormatter
formatIntegral :: Integral a => NumberFormatter -> a -> Text
formatIntegral' :: Integral a => Text -> LocaleName -> a -> Text
formatDouble :: NumberFormatter -> Double -> Text
formatDouble' :: Text -> LocaleName -> Double -> Text
data DateFormatter
data FormatStyle
- = FullFormatStyle
- | LongFormatStyle
- | MediumFormatStyle
- | ShortFormatStyle
- | DefaultFormatStyle
- | RelativeFormatStyle
- | NoFormatStyle
data DateFormatSymbolType
- = Eras
- | Months
- | ShortMonths
- | Weekdays
- | ShortWeekdays
- | AmPms
- | LocalizedChars
- | EraNames
- | NarrowMonths
- | NarrowWeekdays
- | StandaloneMonths
- | StandaloneWeekdays
- | StandaoneShortWeekdays
- | StandaloneNarrowWeekdays
- | Quarters
- | ShortQuarters
- | StandaloneQuarters
- | ShorterWeekdays
- | StandaloneShorterWeekdays
- | CyclicYearsWide
- | CyclicYearsAbbreviated
- | CyclicYearsNarrow
- | ZodiacNamesWide
- | ZodiacNamesAbbreviated
- | ZodiacNamesNarrow
standardDateFormatter :: FormatStyle -> FormatStyle -> LocaleName -> Text -> IO DateFormatter
patternDateFormatter :: Text -> LocaleName -> Text -> IO DateFormatter
dateSymbols :: DateFormatter -> DateFormatSymbolType -> [Text]
formatCalendar :: DateFormatter -> Calendar -> Text

Data representation

The Haskell Text type is implemented as an array in the Haskell heap. This means that its location is not pinned; it may be copied during a garbage collection pass. ICU, on the other hand, works with strings that are allocated in the normal system heap and have a fixed address.

To accommodate this need, these bindings use the functions from Data.Text.Foreign to copy data between the Haskell heap and the system heap. The copied strings are still managed automatically, but the need to duplicate data does add some performance and memory overhead.

Types

data LocaleName Source #

The name of a locale.

Constructors

Root	The root locale. For a description of resource bundles and the root resource, see http://userguide.icu-project.org/locale/resources.
Locale String	A specific locale.
Current	The program's current locale.

Instances

Instances details

IsString LocaleName Source #
Instance details Defined in Data.Text.ICU.Internal Methods fromString :: String -> LocaleName #
Read LocaleName Source #
Instance details Defined in Data.Text.ICU.Internal Methods readsPrec :: Int -> ReadS LocaleName # readList :: ReadS [LocaleName] # readPrec :: ReadPrec LocaleName # readListPrec :: ReadPrec [LocaleName] #
Show LocaleName Source #
Instance details Defined in Data.Text.ICU.Internal Methods showsPrec :: Int -> LocaleName -> ShowS # show :: LocaleName -> String # showList :: [LocaleName] -> ShowS #
NFData LocaleName Source #
Instance details Defined in Data.Text.ICU.Internal Methods rnf :: LocaleName -> () #
Eq LocaleName Source #
Instance details Defined in Data.Text.ICU.Internal Methods (==) :: LocaleName -> LocaleName -> Bool # (/=) :: LocaleName -> LocaleName -> Bool #
Ord LocaleName Source #
Instance details Defined in Data.Text.ICU.Internal Methods compare :: LocaleName -> LocaleName -> Ordering # (<) :: LocaleName -> LocaleName -> Bool # (<=) :: LocaleName -> LocaleName -> Bool # (>) :: LocaleName -> LocaleName -> Bool # (>=) :: LocaleName -> LocaleName -> Bool # max :: LocaleName -> LocaleName -> LocaleName # min :: LocaleName -> LocaleName -> LocaleName #

Locales

availableLocales :: IO [String] Source #

Get the available default locales, i.e. locales that return data when passed to ICU APIs, but not including legacy or alias locales.

Boundary analysis

Text boundary analysis is the process of locating linguistic boundaries while formatting and handling text. Examples of this process include:

Locating appropriate points to word-wrap text to fit within specific margins while displaying or printing.
Counting characters, words, sentences, or paragraphs.
Making a list of the unique words in a document.
Figuring out if a given range of text contains only whole words.
Capitalizing the first letter of each word.
Locating a particular unit of the text (For example, finding the third word in the document).

The Breaker type was designed to support these kinds of tasks.

For the impure boundary analysis API (which is richer, but less easy to use than the pure API), see the Data.Text.ICU.Break module. The impure API supports some uses that may be less efficient via the pure API, including:

Locating the beginning of a word that the user has selected.
Determining how far to move the text cursor when the user hits an arrow key (Some characters require more than one position in the text store and some characters in the text store do not display at all).

data Breaker a Source #

A boundary analyser.

data Break a Source #

A break in a string.

Instances

Instances details

Show a => Show (Break a) Source #
Instance details Defined in Data.Text.ICU.Break.Pure Methods showsPrec :: Int -> Break a -> ShowS # show :: Break a -> String # showList :: [Break a] -> ShowS #
NFData a => NFData (Break a) Source #
Instance details Defined in Data.Text.ICU.Break.Pure Methods rnf :: Break a -> () #
Eq a => Eq (Break a) Source #
Instance details Defined in Data.Text.ICU.Break.Pure Methods (==) :: Break a -> Break a -> Bool # (/=) :: Break a -> Break a -> Bool #

brkPrefix :: Break a -> Text Source #

Prefix of the current break.

brkBreak :: Break a -> Text Source #

Text of the current break.

brkSuffix :: Break a -> Text Source #

Suffix of the current break.

brkStatus :: Break a -> a Source #

Status of the current break (only meaningful if Line or Word).

data Line Source #

Line break status.

Constructors

Soft	A soft line break is a position at which a line break is acceptable, but not required.
Hard

Instances

Instances details

Enum Line Source #
Instance details Defined in Data.Text.ICU.Break Methods succ :: Line -> Line # pred :: Line -> Line # toEnum :: Int -> Line # fromEnum :: Line -> Int # enumFrom :: Line -> [Line] # enumFromThen :: Line -> Line -> [Line] # enumFromTo :: Line -> Line -> [Line] # enumFromThenTo :: Line -> Line -> Line -> [Line] #
Show Line Source #
Instance details Defined in Data.Text.ICU.Break Methods showsPrec :: Int -> Line -> ShowS # show :: Line -> String # showList :: [Line] -> ShowS #
NFData Line Source #
Instance details Defined in Data.Text.ICU.Break Methods rnf :: Line -> () #
Eq Line Source #
Instance details Defined in Data.Text.ICU.Break Methods (==) :: Line -> Line -> Bool # (/=) :: Line -> Line -> Bool #

data Word Source #

Word break status.

Constructors

Uncategorized	A "word" that does not fit into another category. Includes spaces and most punctuation.
Number	A word that appears to be a number.
Letter	A word containing letters, excluding hiragana, katakana or ideographic characters.
Kana	A word containing kana characters.
Ideograph	A word containing ideographic characters.

Instances

Instances details

Enum Word Source #
Instance details Defined in Data.Text.ICU.Break Methods succ :: Word -> Word # pred :: Word -> Word # toEnum :: Int -> Word # fromEnum :: Word -> Int # enumFrom :: Word -> [Word] # enumFromThen :: Word -> Word -> [Word] # enumFromTo :: Word -> Word -> [Word] # enumFromThenTo :: Word -> Word -> Word -> [Word] #
Show Word Source #
Instance details Defined in Data.Text.ICU.Break Methods showsPrec :: Int -> Word -> ShowS # show :: Word -> String # showList :: [Word] -> ShowS #
NFData Word Source #
Instance details Defined in Data.Text.ICU.Break Methods rnf :: Word -> () #
Eq Word Source #
Instance details Defined in Data.Text.ICU.Break Methods (==) :: Word -> Word -> Bool # (/=) :: Word -> Word -> Bool #

breakCharacter :: LocaleName -> Breaker () Source #

Break a string on character boundaries.

Character boundary analysis identifies the boundaries of "Extended Grapheme Clusters", which are groupings of codepoints that should be treated as character-like units for many text operations. Please see Unicode Standard Annex #29, Unicode Text Segmentation, http://www.unicode.org/reports/tr29/ for additional information on grapheme clusters and guidelines on their use.

breakLine :: LocaleName -> Breaker Line Source #

Break a string on line boundaries.

Line boundary analysis determines where a text string can be broken when line wrapping. The mechanism correctly handles punctuation and hyphenated words.

breakSentence :: LocaleName -> Breaker () Source #

Break a string on sentence boundaries.

Sentence boundary analysis allows selection with correct interpretation of periods within numbers and abbreviations, and trailing punctuation marks such as quotation marks and parentheses.

breakWord :: LocaleName -> Breaker Word Source #

Break a string on word boundaries.

Word boundary analysis is used by search and replace functions, as well as within text editing applications that allow the user to select words with a double click. Word selection provides correct interpretation of punctuation marks within and following words. Characters that are not part of a word, such as symbols or punctuation marks, have word breaks on both sides.

breaks :: Breaker a -> Text -> [Break a] Source #

Return a list of all breaks in a string, from left to right.

breaksRight :: Breaker a -> Text -> [Break a] Source #

Return a list of all breaks in a string, from right to left.

Case mapping

toCaseFold Source #

Arguments

:: Bool	Whether to include or exclude mappings for dotted and dotless I and i that are marked with `I` in `CaseFolding.txt`.
-> Text
-> Text

Case-fold the characters in a string.

Case folding is locale independent and not context sensitive, but there is an option for treating the letter I specially for Turkic languages. The result may be longer or shorter than the original.

toLower :: LocaleName -> Text -> Text Source #

Lowercase the characters in a string.

Casing is locale dependent and context sensitive. The result may be longer or shorter than the original.

toUpper :: LocaleName -> Text -> Text Source #

Uppercase the characters in a string.

Casing is locale dependent and context sensitive. The result may be longer or shorter than the original.

Iteration

data CharIterator Source #

A type that supports efficient iteration over Unicode characters.

As an example of where this may be useful, a function using this type may be able to iterate over a UTF-8 ByteString directly, rather than first copying and converting it to an intermediate form. This type also allows e.g. comparison between Text and ByteString, with minimal overhead.

Instances

Instances details

Show CharIterator Source #
Instance details Defined in Data.Text.ICU.Internal Methods showsPrec :: Int -> CharIterator -> ShowS # show :: CharIterator -> String # showList :: [CharIterator] -> ShowS #
Eq CharIterator Source #
Instance details Defined in Data.Text.ICU.Iterator Methods (==) :: CharIterator -> CharIterator -> Bool # (/=) :: CharIterator -> CharIterator -> Bool #
Ord CharIterator Source #
Instance details Defined in Data.Text.ICU.Iterator Methods compare :: CharIterator -> CharIterator -> Ordering # (<) :: CharIterator -> CharIterator -> Bool # (<=) :: CharIterator -> CharIterator -> Bool # (>) :: CharIterator -> CharIterator -> Bool # (>=) :: CharIterator -> CharIterator -> Bool # max :: CharIterator -> CharIterator -> CharIterator # min :: CharIterator -> CharIterator -> CharIterator #

fromString :: String -> CharIterator Source #

Construct a CharIterator from a Unicode string.

fromText :: Text -> CharIterator Source #

Construct a CharIterator from a Unicode string.

fromUtf8 :: ByteString -> CharIterator Source #

Construct a CharIterator from a Unicode string encoded as a UTF-8 ByteString. The validity of the encoded string is *not* checked.

Normalization

See module Normalization2 for the full interface which provides some compatibility with the former API.

nfc :: Text -> Text Source #

Create an NFC normalizer and apply this to the given text.

Let's have a look at a concrete example that contains the letter a with an acute accent twice. First as a combination of two codepoints and second as a canonical composite or precomposed character. Both look exactly the same but one character consists of two and one of only one codepoint. A bytewise comparison does not give equality of these.

>>> import Data.Text
>>> let t = pack "a\x301á"
>>> t
"a\769\225"
>>> putStr t
áá
pack "a\x301" == pack "á"
False

But now lets apply some normalization functions and see how these characters coincide afterwards in two different ways:

>>> nfc t
"\225\225"
>>> nfd t
"a\769a\769"

That is exactly what compareUnicode' does:

>>> pack "a\x301" `compareUnicode'` pack "á"

nfd :: Text -> Text Source #

Create an NFD normalizer and apply this to the given text.

nfkc :: Text -> Text Source #

Create an NFKC normalizer and apply this to the given text.

nfkd :: Text -> Text Source #

Create an NFC normalizer and apply this to the given text.

nfkcCasefold :: Text -> Text Source #

Create an NFKCCasefold normalizer and apply this to the given text.

Checks for normalization

quickCheck :: NormalizationMode -> Text -> Maybe Bool Source #

isNormalized :: NormalizationMode -> Text -> Bool Source #

String comparison

Normalization-sensitive string comparison

data CompareOption Source #

Options to compare.

Constructors

InputIsFCD	The caller knows that both strings fulfill the `FCD` conditions. If not set, `compare` will `quickCheck` for `FCD` and normalize if necessary.
CompareIgnoreCase	Compare strings case-insensitively using case folding, instead of case-sensitively. If set, then the following case folding options are used.
FoldCaseExcludeSpecialI	When case folding, exclude the special I character. For use with Turkic (Turkish/Azerbaijani) text data.

Instances

Instances details

Enum CompareOption Source #
Instance details Defined in Data.Text.ICU.Normalize2 Methods succ :: CompareOption -> CompareOption # pred :: CompareOption -> CompareOption # toEnum :: Int -> CompareOption # fromEnum :: CompareOption -> Int # enumFrom :: CompareOption -> [CompareOption] # enumFromThen :: CompareOption -> CompareOption -> [CompareOption] # enumFromTo :: CompareOption -> CompareOption -> [CompareOption] # enumFromThenTo :: CompareOption -> CompareOption -> CompareOption -> [CompareOption] #
Show CompareOption Source #
Instance details Defined in Data.Text.ICU.Normalize2 Methods showsPrec :: Int -> CompareOption -> ShowS # show :: CompareOption -> String # showList :: [CompareOption] -> ShowS #
Eq CompareOption Source #
Instance details Defined in Data.Text.ICU.Normalize2 Methods (==) :: CompareOption -> CompareOption -> Bool # (/=) :: CompareOption -> CompareOption -> Bool #

compareUnicode :: [CompareOption] -> Text -> Text -> Ordering Source #

Compare two strings for canonical equivalence. Further options include case-insensitive comparison and codepoint order (as opposed to code unit order).

Canonical equivalence between two strings is defined as their normalized forms (NFD or NFC) being identical. This function compares strings incrementally instead of normalizing (and optionally case-folding) both strings entirely, improving performance significantly.

Bulk normalization is only necessary if the strings do not fulfill the FCD conditions. Only in this case, and only if the strings are relatively long, is memory allocated temporarily. For FCD strings and short non-FCD strings there is no memory allocation.

Locale-sensitive string collation

For the impure collation API (which is richer, but less easy to use than the pure API), see the Data.Text.ICU.Collate module.

data Collator Source #

String collator type.

collator :: LocaleName -> Collator Source #

Create an immutable Collator for comparing strings.

If Root is passed as the locale, UCA collation rules will be used.

collatorWith :: LocaleName -> [Attribute] -> Collator Source #

Create an immutable Collator with the given Attributes.

collatorFromRules :: Text -> Either ParseError Collator Source #

Create an immutable Collator from the given collation rules.

collatorFromRulesWith :: Text -> [Attribute] -> Either ParseError Collator Source #

Create an immutable Collator from the given collation rules with the given Attributes.

collate :: Collator -> Text -> Text -> Ordering Source #

Compare two strings.

collateIter :: Collator -> CharIterator -> CharIterator -> Ordering Source #

Compare two CharIterators.

If either iterator was constructed from a ByteString, it does not need to be copied or converted beforehand, so this function can be quite cheap.

sortKey :: Collator -> Text -> ByteString Source #

Create a key for sorting the Text using the given Collator. The result of comparing two ByteStrings that have been transformed with sortKey will be the same as the result of collate on the two untransformed Texts.

uca :: Collator Source #

A Collator that uses the Unicode Collation Algorithm (UCA).

Regular expressions

data MatchOption Source #

Options for controlling matching behaviour.

Constructors

CaseInsensitive	Enable case insensitive matching.
Comments	Allow comments and white space within patterns.
DotAll	If set, `'.'` matches line terminators. Otherwise `'.'` matching stops at line end.
Literal	If set, treat the entire pattern as a literal string. Metacharacters or escape sequences in the input sequence will be given no special meaning. The option `CaseInsensitive` retains its meanings on matching when used in conjunction with this option. Other options become superfluous.
Multiline	Control behaviour of `'$'` and `'^'`. If set, recognize line terminators within string, Otherwise, match only at start and end of input string.
HaskellLines	Haskell-only line endings. When this mode is enabled, only `'\n'` is recognized as a line ending in the behavior of `'.'`, `'^'`, and `'$'`.
UnicodeWord	Unicode word boundaries. If set, `'\\b'` uses the Unicode TR 29 definition of word boundaries. Warning: Unicode word boundaries are quite different from traditional regular expression word boundaries. See http://unicode.org/reports/tr29/#Word_Boundaries.
ErrorOnUnknownEscapes	Throw an error on unrecognized backslash escapes. If set, fail with an error on patterns that contain backslash-escaped ASCII letters without a known special meaning. If this flag is not set, these escaped letters represent themselves.
WorkLimit Int	Set a processing limit for match operations. Some patterns, when matching certain strings, can run in exponential time. For practical purposes, the match operation may appear to be in an infinite loop. When a limit is set a match operation will fail with an error if the limit is exceeded. The units of the limit are steps of the match engine. Correspondence with actual processor time will depend on the speed of the processor and the details of the specific pattern, but will typically be on the order of milliseconds. By default, the matching time is not limited.
StackLimit Int	Set the amount of heap storage available for use by the match backtracking stack. ICU uses a backtracking regular expression engine, with the backtrack stack maintained on the heap. This function sets the limit to the amount of memory that can be used for this purpose. A backtracking stack overflow will result in an error from the match operation that caused it. A limit is desirable because a malicious or poorly designed pattern can use excessive memory, potentially crashing the process. A limit is enabled by default.

Instances

Instances details

Show MatchOption Source #
Instance details Defined in Data.Text.ICU.Regex.Internal Methods showsPrec :: Int -> MatchOption -> ShowS # show :: MatchOption -> String # showList :: [MatchOption] -> ShowS #
Eq MatchOption Source #
Instance details Defined in Data.Text.ICU.Regex.Internal Methods (==) :: MatchOption -> MatchOption -> Bool # (/=) :: MatchOption -> MatchOption -> Bool #

data ParseError Source #

Detailed information about parsing errors. Used by ICU parsing engines that parse long rules, patterns, or programs, where the text being parsed is long enough that more information than an ICUError is needed to localize the error.

Instances

Instances details

Exception ParseError Source #
Instance details Defined in Data.Text.ICU.Error.Internal Methods toException :: ParseError -> SomeException # fromException :: SomeException -> Maybe ParseError # displayException :: ParseError -> String #
Show ParseError Source #
Instance details Defined in Data.Text.ICU.Error.Internal Methods showsPrec :: Int -> ParseError -> ShowS # show :: ParseError -> String # showList :: [ParseError] -> ShowS #
NFData ParseError Source #
Instance details Defined in Data.Text.ICU.Error.Internal Methods rnf :: ParseError -> () #

data Match Source #

A match for a regular expression.

Instances

Instances details

Show Match Source #
Instance details Defined in Data.Text.ICU.Regex.Pure Methods showsPrec :: Int -> Match -> ShowS # show :: Match -> String # showList :: [Match] -> ShowS #
Regular Match Source #
Instance details Defined in Data.Text.ICU.Regex.Pure Methods regRe :: Match -> Regex regFp :: Match -> ForeignPtr URegularExpression

data Regex Source #

A compiled regular expression.

Regex values are usually constructed using the regex or regex' functions. This type is also an instance of IsString, so if you have the OverloadedStrings language extension enabled, you can construct a Regex by simply writing the pattern in quotes (though this does not allow you to specify any Options).

Instances

Instances details

IsString Regex Source #
Instance details Defined in Data.Text.ICU.Regex.Pure Methods fromString :: String -> Regex #
Show Regex Source #
Instance details Defined in Data.Text.ICU.Regex.Pure Methods showsPrec :: Int -> Regex -> ShowS # show :: Regex -> String # showList :: [Regex] -> ShowS #
Regular Regex Source #
Instance details Defined in Data.Text.ICU.Regex.Pure Methods regRe :: Regex -> Regex0 regFp :: Regex -> ForeignPtr URegularExpression

class Regular r Source #

A typeclass for functions common to both Match and Regex types.

Minimal complete definition

regRe

Instances

Instances details

Regular Match Source #
Instance details Defined in Data.Text.ICU.Regex.Pure Methods regRe :: Match -> Regex regFp :: Match -> ForeignPtr URegularExpression
Regular Regex Source #
Instance details Defined in Data.Text.ICU.Regex.Pure Methods regRe :: Regex -> Regex0 regFp :: Regex -> ForeignPtr URegularExpression

Construction

regex :: [MatchOption] -> Text -> Regex Source #

Compile a regular expression with the given options. This function throws a ParseError if the pattern is invalid, so it is best for use when the pattern is statically known.

regex' :: [MatchOption] -> Text -> Either ParseError Regex Source #

Compile a regular expression with the given options. This is safest to use when the pattern is constructed at run time.

Inspection

pattern :: Regular r => r -> Text Source #

Return the source form of the pattern used to construct this regular expression or match.

Searching

find :: Regex -> Text -> Maybe Match Source #

Find the first match for the regular expression in the given text.

findAll :: Regex -> Text -> [Match] Source #

Lazily find all matches for the regular expression in the given text.

Match groups

Capturing groups are numbered starting from zero. Group zero is always the entire matching text. Groups greater than zero contain the text matching each capturing group in a regular expression.

groupCount :: Regular r => r -> Int Source #

Return the number of capturing groups in this regular expression or match's pattern.

unfold :: (Int -> Match -> Maybe Text) -> Match -> [Text] Source #

A combinator for returning a list of all capturing groups on a Match.

span :: Match -> Text Source #

Return the span of text between the end of the previous match and the beginning of the current match.

group :: Int -> Match -> Maybe Text Source #

Return the nth capturing group in a match, or Nothing if n is out of bounds.

prefix :: Int -> Match -> Maybe Text Source #

Return the prefix of the nth capturing group in a match (the text from the start of the string to the start of the match), or Nothing if n is out of bounds.

suffix :: Int -> Match -> Maybe Text Source #

Return the suffix of the nth capturing group in a match (the text from the end of the match to the end of the string), or Nothing if n is out of bounds.

Spoof checking

The Spoof type performs security checks on visually confusable (spoof) strings. For the impure spoof checking API (which is richer, but less easy to use than the pure API), see the Data.Text.ICU.Spoof module.

See UTR #36 and UTS #39 for detailed information about the underlying algorithms and databases used by these functions.

data Spoof Source #

Spoof checker type.

data SpoofParams Source #

Constructors

SpoofParams

Used to configure a Spoof checker via spoofWithParams.

Fields

spoofChecks :: Maybe [SpoofCheck]
Optional SpoofChecks to perform on a string. By default, performs all checks except CharLimit.
level :: Maybe RestrictionLevel
Optional SpoofCheck to which characters in the string will be limited. By default, uses HighlyRestrictive.
locales :: Maybe [String]
Optional locale(s) whose scripts will be used to limit the set of allowed characters in a string. If set, automatically enables the CharLimit spoof check.

Instances

Instances details

Show SpoofParams Source #
Instance details Defined in Data.Text.ICU.Spoof.Pure Methods showsPrec :: Int -> SpoofParams -> ShowS # show :: SpoofParams -> String # showList :: [SpoofParams] -> ShowS #
Eq SpoofParams Source #
Instance details Defined in Data.Text.ICU.Spoof.Pure Methods (==) :: SpoofParams -> SpoofParams -> Bool # (/=) :: SpoofParams -> SpoofParams -> Bool #

data SpoofCheck Source #

Constructors

SingleScriptConfusable	Makes `areConfusable` report if both identifiers are both from the same script and are visually confusable. Does not affect `spoofCheck`.
MixedScriptConfusable	Makes `areConfusable` report if both identifiers are visually confusable and at least one identifier contains characters from more than one script. Makes `spoofCheck` report if the identifier contains multiple scripts, and is confusable with some other identifier in a single script.
WholeScriptConfusable	Makes `areConfusable` report if each identifier is of a different single script, and the identifiers are visually confusable.
AnyCase	By default, spoof checks assume the strings have been processed through `toCaseFold` and only check lower-case identifiers. If this is set, spoof checks will check both upper and lower case identifiers.
RestrictionLevel	Checks that identifiers are no looser than the specified level passed to `setRestrictionLevel`.
Invisible	Checks the identifier for the presence of invisible characters, such as zero-width spaces, or character sequences that are likely not to display, such as multiple occurrences of the same non-spacing mark.
CharLimit	Checks whether the identifier contains only characters from a specified set (for example, via `setAllowedLocales`).
MixedNumbers	Checks that the identifier contains numbers from only a single script.
AllChecks	Enables all checks.
AuxInfo	Enables returning a `SpoofCheck` in the `SpoofCheckResult`.

Instances

Instances details

Bounded SpoofCheck Source #
Instance details Defined in Data.Text.ICU.Spoof Methods minBound :: SpoofCheck # maxBound :: SpoofCheck #
Enum SpoofCheck Source #
Instance details Defined in Data.Text.ICU.Spoof Methods succ :: SpoofCheck -> SpoofCheck # pred :: SpoofCheck -> SpoofCheck # toEnum :: Int -> SpoofCheck # fromEnum :: SpoofCheck -> Int # enumFrom :: SpoofCheck -> [SpoofCheck] # enumFromThen :: SpoofCheck -> SpoofCheck -> [SpoofCheck] # enumFromTo :: SpoofCheck -> SpoofCheck -> [SpoofCheck] # enumFromThenTo :: SpoofCheck -> SpoofCheck -> SpoofCheck -> [SpoofCheck] #
Show SpoofCheck Source #
Instance details Defined in Data.Text.ICU.Spoof Methods showsPrec :: Int -> SpoofCheck -> ShowS # show :: SpoofCheck -> String # showList :: [SpoofCheck] -> ShowS #
Eq SpoofCheck Source #
Instance details Defined in Data.Text.ICU.Spoof Methods (==) :: SpoofCheck -> SpoofCheck -> Bool # (/=) :: SpoofCheck -> SpoofCheck -> Bool #

data RestrictionLevel Source #

Constructors

ASCII	Checks that the string contains only Unicode values in the range ߝ inclusive.
SingleScriptRestrictive	Checks that the string contains only characters from a single script.
HighlyRestrictive	Checks that the string contains only characters from a single script, or from the combinations (Latin + Han + Hiragana + Katakana), (Latin + Han + Bopomofo), or (Latin + Han + Hangul).
ModeratelyRestrictive	Checks that the string contains only characters from the combinations (Latin + Cyrillic + Greek + Cherokee), (Latin + Han + Hiragana + Katakana), (Latin + Han + Bopomofo), or (Latin + Han + Hangul).
MinimallyRestrictive	Allows arbitrary mixtures of scripts.
Unrestrictive	Allows any valid identifiers, including characters outside of the Identifier Profile.

Instances

Instances details

Bounded RestrictionLevel Source #
Instance details Defined in Data.Text.ICU.Spoof Methods minBound :: RestrictionLevel # maxBound :: RestrictionLevel #
Enum RestrictionLevel Source #
Instance details Defined in Data.Text.ICU.Spoof Methods succ :: RestrictionLevel -> RestrictionLevel # pred :: RestrictionLevel -> RestrictionLevel # toEnum :: Int -> RestrictionLevel # fromEnum :: RestrictionLevel -> Int # enumFrom :: RestrictionLevel -> [RestrictionLevel] # enumFromThen :: RestrictionLevel -> RestrictionLevel -> [RestrictionLevel] # enumFromTo :: RestrictionLevel -> RestrictionLevel -> [RestrictionLevel] # enumFromThenTo :: RestrictionLevel -> RestrictionLevel -> RestrictionLevel -> [RestrictionLevel] #
Show RestrictionLevel Source #
Instance details Defined in Data.Text.ICU.Spoof Methods showsPrec :: Int -> RestrictionLevel -> ShowS # show :: RestrictionLevel -> String # showList :: [RestrictionLevel] -> ShowS #
Eq RestrictionLevel Source #
Instance details Defined in Data.Text.ICU.Spoof Methods (==) :: RestrictionLevel -> RestrictionLevel -> Bool # (/=) :: RestrictionLevel -> RestrictionLevel -> Bool #

data SpoofCheckResult Source #

Constructors

CheckOK	The string passed all configured spoof checks.
CheckFailed [SpoofCheck]	The string failed one or more spoof checks.
CheckFailedWithRestrictionLevel	The string failed one or more spoof checks, and failed to pass the configured restriction level.
Fields failedChecks :: [SpoofCheck] The spoof checks which the string failed. failedLevel :: RestrictionLevel The restriction level which the string failed to pass.

Instances

Instances details

Show SpoofCheckResult Source #
Instance details Defined in Data.Text.ICU.Spoof Methods showsPrec :: Int -> SpoofCheckResult -> ShowS # show :: SpoofCheckResult -> String # showList :: [SpoofCheckResult] -> ShowS #
Eq SpoofCheckResult Source #
Instance details Defined in Data.Text.ICU.Spoof Methods (==) :: SpoofCheckResult -> SpoofCheckResult -> Bool # (/=) :: SpoofCheckResult -> SpoofCheckResult -> Bool #

Construction

spoof :: Spoof Source #

Open an immutable Spoof checker with default options (all SpoofChecks except CharLimit).

spoofWithParams :: SpoofParams -> Spoof Source #

Open an immutable Spoof checker with specific SpoofParams to control its behavior.

spoofFromSource :: (ByteString, ByteString) -> SpoofParams -> Spoof Source #

Open a immutable Spoof checker with specific SpoofParams to control its behavior and custom rules given the UTF-8 encoded contents of the confusables.txt and confusablesWholeScript.txt files as described in Unicode UAX #39.

spoofFromSerialized :: ByteString -> SpoofParams -> Spoof Source #

Create an immutable spoof checker with specific SpoofParams to control its behavior and custom rules previously returned by serialize.

String checking

areConfusable :: Spoof -> Text -> Text -> SpoofCheckResult Source #

Check two strings for confusability.

spoofCheck :: Spoof -> Text -> SpoofCheckResult Source #

Check a string for spoofing issues.

getSkeleton :: Spoof -> Maybe SkeletonTypeOverride -> Text -> Text Source #

Generates re-usable "skeleton" strings which can be used (via Unicode equality) to check if an identifier is confusable with some large set of existing identifiers.

If you cache the returned strings in storage, you must invalidate your cache any time the underlying confusables database changes (i.e., on ICU upgrade).

By default, assumes all input strings have been passed through toCaseFold and are lower-case. To change this, pass SkeletonAnyCase.

By default, builds skeletons which catch visually confusable characters across multiple scripts. Pass SkeletonSingleScript to override that behavior and build skeletons which catch visually confusable characters across single scripts.

Configuration

getChecks :: Spoof -> [SpoofCheck] Source #

Gets the checks currently configured in the spoof checker.

getAllowedLocales :: Spoof -> [String] Source #

Gets the locales whose scripts are currently allowed by the spoof checker. (We don't use LocaleName since the root and default locales have no meaning here.)

getRestrictionLevel :: Spoof -> Maybe RestrictionLevel Source #

Gets the restriction level currently configured in the spoof checker, if present.

Persistence

serialize :: Spoof -> ByteString Source #

Serializes the rules in this spoof checker to a byte array, suitable for re-use by spoofFromSerialized.

Only includes any data provided to openFromSource. Does not include any other state or configuration.

Calendars

data Calendar Source #

Instances

Instances details

Show Calendar Source #
Instance details Defined in Data.Text.ICU.Calendar Methods showsPrec :: Int -> Calendar -> ShowS # show :: Calendar -> String # showList :: [Calendar] -> ShowS #

data CalendarType Source #

Constructors

TraditionalCalendarType
DefaultCalendarType
GregorianCalendarType

Instances

Instances details

Read CalendarType Source #
Instance details Defined in Data.Text.ICU.Calendar Methods readsPrec :: Int -> ReadS CalendarType # readList :: ReadS [CalendarType] # readPrec :: ReadPrec CalendarType # readListPrec :: ReadPrec [CalendarType] #
Show CalendarType Source #
Instance details Defined in Data.Text.ICU.Calendar Methods showsPrec :: Int -> CalendarType -> ShowS # show :: CalendarType -> String # showList :: [CalendarType] -> ShowS #
Eq CalendarType Source #
Instance details Defined in Data.Text.ICU.Calendar Methods (==) :: CalendarType -> CalendarType -> Bool # (/=) :: CalendarType -> CalendarType -> Bool #

data SystemTimeZoneType Source #

Constructors

AnyTimeZone
CanonicalTimeZone
CanonicalLocationTimeZone

Instances

Instances details

Read SystemTimeZoneType Source #
Instance details Defined in Data.Text.ICU.Calendar Methods readsPrec :: Int -> ReadS SystemTimeZoneType # readList :: ReadS [SystemTimeZoneType] # readPrec :: ReadPrec SystemTimeZoneType # readListPrec :: ReadPrec [SystemTimeZoneType] #
Show SystemTimeZoneType Source #
Instance details Defined in Data.Text.ICU.Calendar Methods showsPrec :: Int -> SystemTimeZoneType -> ShowS # show :: SystemTimeZoneType -> String # showList :: [SystemTimeZoneType] -> ShowS #
Eq SystemTimeZoneType Source #
Instance details Defined in Data.Text.ICU.Calendar Methods (==) :: SystemTimeZoneType -> SystemTimeZoneType -> Bool # (/=) :: SystemTimeZoneType -> SystemTimeZoneType -> Bool #

data CalendarField Source #

All the fields that comprise a Calendar.

Constructors

Era	Field indicating the era, e.g., AD or BC in the Gregorian (Julian) calendar. This is a calendar-specific value.
Year	Field indicating the year. This is a calendar-specific value.
Month	Field indicating the month. This is a calendar-specific value. The first month of the year is JANUARY; the last depends on the number of months in a year. Note: Calendar month is 0-based.
WeekOfYear	Field indicating the week number within the current year. The first week of the year, as defined by UCAL_FIRST_DAY_OF_WEEK and UCAL_MINIMAL_DAYS_IN_FIRST_WEEK attributes, has value 1. Subclasses define the value of UCAL_WEEK_OF_YEAR for days before the first week of the year.
WeekOfMonth	Field indicating the week number within the current month. The first week of the month, as defined by UCAL_FIRST_DAY_OF_WEEK and UCAL_MINIMAL_DAYS_IN_FIRST_WEEK attributes, has value 1. Subclasses define the value of WEEK_OF_MONTH for days before the first week of the month.
DayOfMonth	Field indicating the day of the month. This is a synonym for DAY_OF_MONTH. The first day of the month has value 1.
DayOfYear	Field indicating the day number within the current year. The first day of the year has value 1.
DayOfWeek	Field indicating the day of the week. This field takes values SUNDAY, MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, and SATURDAY. Note: Calendar day-of-week is 1-based. Clients who create locale resources for the field of first-day-of-week should be aware of this. For instance, in US locale, first-day-of-week is set to 1, i.e., UCAL_SUNDAY.
DayOfWeekInMonth	Field indicating the ordinal number of the day of the week within the current month. Together with the DAY_OF_WEEK field, this uniquely specifies a day within a month. Unlike WEEK_OF_MONTH and WEEK_OF_YEAR, this field's value does not depend on getFirstDayOfWeek() or getMinimalDaysInFirstWeek(). DAY_OF_MONTH 1 through 7 always correspond to DAY_OF_WEEK_IN_MONTH 1; 8 through 15 correspond to DAY_OF_WEEK_IN_MONTH 2, and so on. DAY_OF_WEEK_IN_MONTH 0 indicates the week before DAY_OF_WEEK_IN_MONTH 1. Negative values count back from the end of the month, so the last Sunday of a month is specified as DAY_OF_WEEK = SUNDAY, DAY_OF_WEEK_IN_MONTH = -1. Because negative values count backward they will usually be aligned differently within the month than positive values. For example, if a month has 31 days, DAY_OF_WEEK_IN_MONTH -1 will overlap DAY_OF_WEEK_IN_MONTH 5 and the end of 4.
AmPm	Field indicating whether the HOUR is before or after noon. E.g., at 10:04:15.250 PM the AM_PM is PM.
Hour	Field indicating the hour of the morning or afternoon. HOUR is used for the 12-hour clock. E.g., at 10:04:15.250 PM the HOUR is 10.
HourOfDay	Field indicating the hour of the day. HOUR_OF_DAY is used for the 24-hour clock. E.g., at 10:04:15.250 PM the HOUR_OF_DAY is 22.
Minute	Field indicating the minute within the hour. E.g., at 10:04:15.250 PM the UCAL_MINUTE is 4.
Second	Field indicating the second within the minute. E.g., at 10:04:15.250 PM the UCAL_SECOND is 15.
Millisecond	Field indicating the millisecond within the second. E.g., at 10:04:15.250 PM the UCAL_MILLISECOND is 250.
ZoneOffset	Field indicating the raw offset from GMT in milliseconds.
DstOffset	Field indicating the daylight savings offset in milliseconds.
YearWoY	Field indicating the extended year corresponding to the UCAL_WEEK_OF_YEAR field. This may be one greater or less than the value of UCAL_EXTENDED_YEAR.
DoWLocal	Field indicating the localized day of week. This will be a value from 1 to 7 inclusive, with 1 being the localized first day of the week.
ExtendedYear	Year of this calendar system, encompassing all supra-year fields. For example, in Gregorian/Julian calendars, positive Extended Year values indicate years AD, 1 BC = 0 extended, 2 BC = -1 extended, and so on.
JulianDay	Field indicating the modified Julian day number. This is different from the conventional Julian day number in two regards. First, it demarcates days at local zone midnight, rather than noon GMT. Second, it is a local number; that is, it depends on the local time zone. It can be thought of as a single number that encompasses all the date-related fields.
MillisecondsInDay	Ranges from 0 to 23:59:59.999 (regardless of DST). This field behaves exactly like a composite of all time-related fields, not including the zone fields. As such, it also reflects discontinuities of those fields on DST transition days. On a day of DST onset, it will jump forward. On a day of DST cessation, it will jump backward. This reflects the fact that it must be combined with the DST_OFFSET field to obtain a unique local time value.
IsLeapMonth	Whether or not the current month is a leap month (0 or 1). See the Chinese calendar for an example of this.

Instances

Instances details

Read CalendarField Source #
Instance details Defined in Data.Text.ICU.Calendar Methods readsPrec :: Int -> ReadS CalendarField # readList :: ReadS [CalendarField] # readPrec :: ReadPrec CalendarField # readListPrec :: ReadPrec [CalendarField] #
Show CalendarField Source #
Instance details Defined in Data.Text.ICU.Calendar Methods showsPrec :: Int -> CalendarField -> ShowS # show :: CalendarField -> String # showList :: [CalendarField] -> ShowS #
Eq CalendarField Source #
Instance details Defined in Data.Text.ICU.Calendar Methods (==) :: CalendarField -> CalendarField -> Bool # (/=) :: CalendarField -> CalendarField -> Bool #

Construction

calendar :: Text -> LocaleName -> CalendarType -> IO Calendar Source #

Open a Calendar.

A Calendar may be used to convert a millisecond value to a year, month, and day.

Note: When unknown TimeZone ID is specified or if the TimeZone ID specified is "Etc/Unknown", the Calendar returned by the function is initialized with GMT zone with TimeZone ID UCAL_UNKNOWN_ZONE_ID ("EtcUnknown") without any errorswarnings. If you want to check if a TimeZone ID is valid prior to this function, use ucal_getCanonicalTimeZoneID.

>>> import qualified Data.Text as T
>>> c <- calendar (T.pack "CET") (Locale "de_DE") TraditionalCalendarType
>>> show c
2021-10-12 17:37:43

Operations on calendars

roll Source #

Arguments

:: Calendar
-> [(CalendarField, Int)]	The field and the signed amount to add to this field. If the amount causes the value to exceed to maximum or minimum values for that field, the field is pinned to a permissible value.
-> Calendar

Add a specified signed amount to a particular field in a Calendar.

See rollField for further details.

>>> import qualified Data.Text as T
>>> c1 <- calendar (T.pack "CET") (Locale "de_DE") TraditionalCalendarType
>>> show c1
2021-10-12 17:53:26
>>> let c2 = roll c1 [(Hour, 2)]
>>> show c2
2021-10-12 19:53:26
>>> let c3 = roll c1 [(Hour, 12)]
>>> show c3
2021-10-12 17:53:26
>>> let c4 = add c1 [(Hour, 12)]
>>> show c4
2021-10-13 5:53:26

add Source #

Arguments

:: Calendar	The `Calendar` to which to add.
-> [(CalendarField, Int)]	Field type and the signed amount to add to field. If the amount causes the value to exceed to maximum or minimum values for that field, other fields are modified to preserve the magnitude of the change.
-> Calendar

Add a specified signed amount to a particular field in a Calendar.

See addField for further details and see rollField for examples and differences compared to rolling.

set1 :: Calendar -> CalendarField -> Int -> Calendar Source #

Set the value of one field of a calendar to a certain value. All fields are represented as 32-bit integers.

set :: Calendar -> [(CalendarField, Int)] -> Calendar Source #

Set the value of a list of fields of a calendar to certain values. All fields are represented as 32-bit integers.

get :: Calendar -> CalendarField -> Int Source #

Get the value of a specific field in the calendar.

Number formatting

data NumberFormatter Source #

numberFormatter :: Text -> LocaleName -> IO NumberFormatter Source #

Create a new NumberFormatter.

See https://unicode-org.github.io/icu/userguide/format_parse/numbers/skeletons.html for how to specify the number skeletons. And use availableLocales in order to find the allowed locale names. These usuallly look like "en", "de", "de_AT" etc. See formatIntegral and formatDouble for some examples.

formatIntegral :: Integral a => NumberFormatter -> a -> Text Source #

Format an integral number.

See https://unicode-org.github.io/icu/userguide/format_parse/numbers/skeletons.html for how to specify the number skeletons.

>>> import Data.Text
>>> nf <- numberFormatter (pack "precision-integer") (Locale "de")
>>> formatIntegral nf 12345
"12.345"
>>> nf2 <- numberFormatter (pack "precision-integer") (Locale "fr")
>>> formatIntegral nf2 12345
"12\8239\&345"

formatIntegral' :: Integral a => Text -> LocaleName -> a -> Text Source #

Create a number formatter and apply it to an integral number.

formatDouble :: NumberFormatter -> Double -> Text Source #

Format a Double.

See https://unicode-org.github.io/icu/userguide/format_parse/numbers/skeletons.html for how to specify the number skeletons.

>>> import Data.Text
>>> nf3 <- numberFormatter (pack "precision-currency-cash") (Locale "it")
>>> formatDouble nf3 12345.6789
"12.345,68"

formatDouble' :: Text -> LocaleName -> Double -> Text Source #

Create a number formatter and apply it to a Double.

Date formatting

data DateFormatter Source #

This is an abstract data type holding a reference to the ICU date format object. Create a DateFormatter with either standardDateFormatter or patternDateFormatter and use it in order to format Calendar objects with the function formatCalendar.

data FormatStyle Source #

The possible date/time format styles.

Constructors

FullFormatStyle	Full style, such as Tuesday, April 12, 1952 AD or 3:30:42pm PST
LongFormatStyle	Long style, such as January 12, 1952 or 3:30:32pm
MediumFormatStyle	Medium style, such as Jan. 12, 1952
ShortFormatStyle	Short style, such as 121352 or 3:30pm
DefaultFormatStyle	Default style
RelativeFormatStyle	Relative style: ICU currently provides limited support for formatting dates using a “relative” style, specified using RELATIVE_SHORT, RELATIVE_MEDIUM, RELATIVE_LONG or RELATIVE_FULL. As currently implemented, relative date formatting only affects the formatting of dates within a limited range of calendar days before or after the current date, based on the CLDR type="day"/relative data: For example, in English, “Yesterday”, “Today”, and “Tomorrow”. Within this range, the specific relative style currently makes no difference. Outside of this range, relative dates are formatted using the corresponding non-relative style (SHORT, MEDIUM, etc.). Relative time styles are not currently supported, and behave just like the corresponding non-relative style.
NoFormatStyle	No style.

Instances

Instances details

Enum FormatStyle Source #
Instance details Defined in Data.Text.ICU.DateFormatter Methods succ :: FormatStyle -> FormatStyle # pred :: FormatStyle -> FormatStyle # toEnum :: Int -> FormatStyle # fromEnum :: FormatStyle -> Int # enumFrom :: FormatStyle -> [FormatStyle] # enumFromThen :: FormatStyle -> FormatStyle -> [FormatStyle] # enumFromTo :: FormatStyle -> FormatStyle -> [FormatStyle] # enumFromThenTo :: FormatStyle -> FormatStyle -> FormatStyle -> [FormatStyle] #
Show FormatStyle Source #
Instance details Defined in Data.Text.ICU.DateFormatter Methods showsPrec :: Int -> FormatStyle -> ShowS # show :: FormatStyle -> String # showList :: [FormatStyle] -> ShowS #
Eq FormatStyle Source #
Instance details Defined in Data.Text.ICU.DateFormatter Methods (==) :: FormatStyle -> FormatStyle -> Bool # (/=) :: FormatStyle -> FormatStyle -> Bool #

data DateFormatSymbolType Source #

The possible types of date format symbols.

Constructors

Eras	The era names, for example AD.
Months	The month names, for example February.
ShortMonths	The short month names, for example Feb.
Weekdays	The CLDR-style format "wide" weekday names, for example Monday.
ShortWeekdays	The CLDR-style format "abbreviated" (not "short") weekday names, for example "Mon." For the CLDR-style format "short" weekday names, use UDAT_SHORTER_WEEKDAYS.
AmPms	The AM/PM names, for example AM.
LocalizedChars	The localized characters.
EraNames	The long era names, for example Anno Domini.
NarrowMonths	The narrow month names, for example F.
NarrowWeekdays	The CLDR-style format "narrow" weekday names, for example M.
StandaloneMonths	Standalone context versions of months.
StandaloneWeekdays	The CLDR-style stand-alone "wide" weekday names.
StandaoneShortWeekdays	The CLDR-style stand-alone "abbreviated" (not "short") weekday names. For the CLDR-style stand-alone "short" weekday names, use UDAT_STANDALONE_SHORTER_WEEKDAYS.
StandaloneNarrowWeekdays	The CLDR-style stand-alone "narrow" weekday names.
Quarters	The quarters, for example 1st Quarter.
ShortQuarters	The short quarter names, for example Q1.
StandaloneQuarters	Standalone context versions of quarters.
ShorterWeekdays	The CLDR-style short weekday names, e.g. Su, Mo", etc. These are named SHORTER to contrast with the constants using SHORT above, which actually get the CLDR-style abbreviated versions of the corresponding names.
StandaloneShorterWeekdays	Standalone version of UDAT_SHORTER_WEEKDAYS.
CyclicYearsWide	Cyclic year names (only supported for some calendars, and only for FORMAT usage; udat_setSymbols not supported for UDAT_CYCLIC_YEARS_WIDE)
CyclicYearsAbbreviated	Cyclic year names (only supported for some calendars, and only for FORMAT usage)
CyclicYearsNarrow	Cyclic year names (only supported for some calendars, and only for FORMAT usage; udat_setSymbols not supported for UDAT_CYCLIC_YEARS_NARROW)
ZodiacNamesWide	Calendar zodiac names (only supported for some calendars, and only for FORMAT usage; udat_setSymbols not supported for UDAT_ZODIAC_NAMES_WIDE)
ZodiacNamesAbbreviated	Calendar zodiac names (only supported for some calendars, and only for FORMAT usage)
ZodiacNamesNarrow	Calendar zodiac names (only supported for some calendars, and only for FORMAT usage; udat_setSymbols not supported for UDAT_ZODIAC_NAMES_NARROW)

standardDateFormatter :: FormatStyle -> FormatStyle -> LocaleName -> Text -> IO DateFormatter Source #

Create a new DateFormatter from the standard styles.

>>> import Data.Text
>>> dfDe <- standardDateFormatter LongFormatStyle LongFormatStyle (Locale "de_DE") (pack "CET")

patternDateFormatter :: Text -> LocaleName -> Text -> IO DateFormatter Source #

Create a new DateFormatter using a custom pattern as described at https://unicode-org.github.io/icu/userguide/format_parse/datetime/#datetime-format-syntax. For examples the pattern "yyyy.MM.dd G at HH:mm:ss zzz" produces “1996.07.10 AD at 15:08:56 PDT” in English for the PDT time zone.

A date pattern is a string of characters, where specific strings of characters are replaced with date and time data from a calendar when formatting or used to generate data for a calendar when parsing.

The Date Field Symbol Table contains the characters used in patterns to show the appropriate formats for a given locale, such as yyyy for the year. Characters may be used multiple times. For example, if y is used for the year, "yy" might produce “99”, whereas "yyyy" produces “1999”. For most numerical fields, the number of characters specifies the field width. For example, if h is the hour, "h" might produce “5”, but "hh" produces “05”. For some characters, the count specifies whether an abbreviated or full form should be used, but may have other choices, as given below.

Two single quotes represents a literal single quote, either inside or outside single quotes. Text within single quotes is not interpreted in any way (except for two adjacent single quotes). Otherwise all ASCII letter from a to z and A to Z are reserved as syntax characters, and require quoting if they are to represent literal characters. In addition, certain ASCII punctuation characters may become variable in the future (eg : being interpreted as the time separator and / as a date separator, and replaced by respective locale-sensitive characters in display).

“Stand-alone” values refer to those designed to stand on their own independently, as opposed to being with other formatted values. “2nd quarter” would use the wide stand-alone format "qqqq", whereas “2nd quarter 2007” would use the regular format "QQQQ yyyy". For more information about format and stand-alone forms, see CLDR Calendar Elements.

The pattern characters used in the Date Field Symbol Table are defined by CLDR; for more information see CLDR Date Field Symbol Table.

👉 Note that the examples may not reflect current CLDR data.

dateSymbols :: DateFormatter -> DateFormatSymbolType -> [Text] Source #

Get relevant date related symbols, e.g. month and weekday names.

>>> import Data.Text
>>> dfDe <- standardDateFormatter LongFormatStyle LongFormatStyle (Locale "de_DE") (pack "CET")
>>> dateSymbols dfDe Months
["Januar","Februar","M\228rz","April","Mai","Juni","Juli","August","September","Oktober","November","Dezember"]
>>> dfAt <- standardDateFormatter LongFormatStyle LongFormatStyle (Locale "de_AT") (pack "CET")
>>> dateSymbols dfAt Months
["J\228nner","Februar","M\228rz","April","Mai","Juni","Juli","August","September","Oktober","November","Dezember"]

formatCalendar :: DateFormatter -> Calendar -> Text Source #

Format a Calendar using a DateFormatter.

>>> import Data.Text
>>> dfDe <- standardDateFormatter LongFormatStyle LongFormatStyle (Locale "de_DE") (pack "CET")
>>> c <- calendar (pack "CET") (Locale "de_DE") TraditionalCalendarType
>>> formatCalendar dfDe c
"13. Oktober 2021 um 12:44:09 GMT+2"