Safe Haskell	Safe
Language	Haskell2010

Data.Char

Contents

Characters and strings
Character classification
- Subranges
- Unicode general categories
Case conversion
Single digit characters
Numeric representations
String representations

Synopsis

data Char
type String = [Char]
isControl :: Char -> Bool
isSpace :: Char -> Bool
isLower :: Char -> Bool
isUpper :: Char -> Bool
isAlpha :: Char -> Bool
isAlphaNum :: Char -> Bool
isPrint :: Char -> Bool
isDigit :: Char -> Bool
isOctDigit :: Char -> Bool
isHexDigit :: Char -> Bool
isLetter :: Char -> Bool
isMark :: Char -> Bool
isNumber :: Char -> Bool
isPunctuation :: Char -> Bool
isSymbol :: Char -> Bool
isSeparator :: Char -> Bool
isAscii :: Char -> Bool
isLatin1 :: Char -> Bool
isAsciiUpper :: Char -> Bool
isAsciiLower :: Char -> Bool
data GeneralCategory
- = UppercaseLetter
- | LowercaseLetter
- | TitlecaseLetter
- | ModifierLetter
- | OtherLetter
- | NonSpacingMark
- | SpacingCombiningMark
- | EnclosingMark
- | DecimalNumber
- | LetterNumber
- | OtherNumber
- | ConnectorPunctuation
- | DashPunctuation
- | OpenPunctuation
- | ClosePunctuation
- | InitialQuote
- | FinalQuote
- | OtherPunctuation
- | MathSymbol
- | CurrencySymbol
- | ModifierSymbol
- | OtherSymbol
- | Space
- | LineSeparator
- | ParagraphSeparator
- | Control
- | Format
- | Surrogate
- | PrivateUse
- | NotAssigned
generalCategory :: Char -> GeneralCategory
toUpper :: Char -> Char
toLower :: Char -> Char
toTitle :: Char -> Char
digitToInt :: Char -> Int
intToDigit :: Int -> Char
ord :: Char -> Int
chr :: Int -> Char
showLitChar :: Char -> ShowS
lexLitChar :: ReadS String
readLitChar :: ReadS Char

Characters and strings

data Char #

The character type Char is an enumeration whose values represent Unicode (or equivalently ISO/IEC 10646) code points (i.e. characters, see http://www.unicode.org/ for details). This set extends the ISO 8859-1 (Latin-1) character set (the first 256 characters), which is itself an extension of the ASCII character set (the first 128 characters). A character literal in Haskell has type Char.

To convert a Char to or from the corresponding Int value defined by Unicode, use toEnum and fromEnum from the Enum class respectively (or equivalently ord and chr).

Instances

Bounded Char	Since: base-2.1
Instance details Defined in GHC.Enum Methods minBound :: Char # maxBound :: Char #
Enum Char	Since: base-2.1
Instance details Defined in GHC.Enum Methods succ :: Char -> Char # pred :: Char -> Char # toEnum :: Int -> Char # fromEnum :: Char -> Int # enumFrom :: Char -> [Char] # enumFromThen :: Char -> Char -> [Char] # enumFromTo :: Char -> Char -> [Char] # enumFromThenTo :: Char -> Char -> Char -> [Char] #
Eq Char
Instance details Defined in GHC.Classes Methods (==) :: Char -> Char -> Bool # (/=) :: Char -> Char -> Bool #
Ord Char
Instance details Defined in GHC.Classes Methods compare :: Char -> Char -> Ordering # (<) :: Char -> Char -> Bool # (<=) :: Char -> Char -> Bool # (>) :: Char -> Char -> Bool # (>=) :: Char -> Char -> Bool # max :: Char -> Char -> Char # min :: Char -> Char -> Char #
Read Char	Since: base-2.1
Instance details Defined in GHC.Read Methods readsPrec :: Int -> ReadS Char # readList :: ReadS [Char] # readPrec :: ReadPrec Char # readListPrec :: ReadPrec [Char] #
Show Char	Since: base-2.1
Instance details Defined in GHC.Show Methods showsPrec :: Int -> Char -> ShowS # show :: Char -> String # showList :: [Char] -> ShowS #
Ix Char	Since: base-2.1
Instance details Defined in GHC.Arr Methods range :: (Char, Char) -> [Char] # index :: (Char, Char) -> Char -> Int # unsafeIndex :: (Char, Char) -> Char -> Int inRange :: (Char, Char) -> Char -> Bool # rangeSize :: (Char, Char) -> Int # unsafeRangeSize :: (Char, Char) -> Int
Storable Char	Since: base-2.1
Instance details Defined in Foreign.Storable Methods sizeOf :: Char -> Int # alignment :: Char -> Int # peekElemOff :: Ptr Char -> Int -> IO Char # pokeElemOff :: Ptr Char -> Int -> Char -> IO () # peekByteOff :: Ptr b -> Int -> IO Char # pokeByteOff :: Ptr b -> Int -> Char -> IO () # peek :: Ptr Char -> IO Char # poke :: Ptr Char -> Char -> IO () #
Generic1 (URec Char :: k -> Type)
Instance details Defined in GHC.Generics Associated Types type Rep1 (URec Char) :: k -> Type # Methods from1 :: URec Char a -> Rep1 (URec Char) a # to1 :: Rep1 (URec Char) a -> URec Char a #
Functor (URec Char :: Type -> Type)	Since: base-4.9.0.0
Instance details Defined in GHC.Generics Methods fmap :: (a -> b) -> URec Char a -> URec Char b # (<$) :: a -> URec Char b -> URec Char a #
Foldable (URec Char :: Type -> Type)	Since: base-4.9.0.0
Instance details Defined in Data.Foldable Methods fold :: Monoid m => URec Char m -> m # foldMap :: Monoid m => (a -> m) -> URec Char a -> m # foldr :: (a -> b -> b) -> b -> URec Char a -> b # foldr' :: (a -> b -> b) -> b -> URec Char a -> b # foldl :: (b -> a -> b) -> b -> URec Char a -> b # foldl' :: (b -> a -> b) -> b -> URec Char a -> b # foldr1 :: (a -> a -> a) -> URec Char a -> a # foldl1 :: (a -> a -> a) -> URec Char a -> a # toList :: URec Char a -> [a] # null :: URec Char a -> Bool # length :: URec Char a -> Int # elem :: Eq a => a -> URec Char a -> Bool # maximum :: Ord a => URec Char a -> a # minimum :: Ord a => URec Char a -> a # sum :: Num a => URec Char a -> a # product :: Num a => URec Char a -> a #
Traversable (URec Char :: Type -> Type)	Since: base-4.9.0.0
Instance details Defined in Data.Traversable Methods traverse :: Applicative f => (a -> f b) -> URec Char a -> f (URec Char b) # sequenceA :: Applicative f => URec Char (f a) -> f (URec Char a) # mapM :: Monad m => (a -> m b) -> URec Char a -> m (URec Char b) # sequence :: Monad m => URec Char (m a) -> m (URec Char a) #
Eq (URec Char p)	Since: base-4.9.0.0
Instance details Defined in GHC.Generics Methods (==) :: URec Char p -> URec Char p -> Bool # (/=) :: URec Char p -> URec Char p -> Bool #
Ord (URec Char p)	Since: base-4.9.0.0
Instance details Defined in GHC.Generics Methods compare :: URec Char p -> URec Char p -> Ordering # (<) :: URec Char p -> URec Char p -> Bool # (<=) :: URec Char p -> URec Char p -> Bool # (>) :: URec Char p -> URec Char p -> Bool # (>=) :: URec Char p -> URec Char p -> Bool # max :: URec Char p -> URec Char p -> URec Char p # min :: URec Char p -> URec Char p -> URec Char p #
Show (URec Char p)	Since: base-4.9.0.0
Instance details Defined in GHC.Generics Methods showsPrec :: Int -> URec Char p -> ShowS # show :: URec Char p -> String # showList :: [URec Char p] -> ShowS #
Generic (URec Char p)
Instance details Defined in GHC.Generics Associated Types type Rep (URec Char p) :: Type -> Type # Methods from :: URec Char p -> Rep (URec Char p) x # to :: Rep (URec Char p) x -> URec Char p #
data URec Char (p :: k)	Used for marking occurrences of `Char#` Since: base-4.9.0.0
Instance details Defined in GHC.Generics data URec Char (p :: k) = UChar { uChar# :: Char# }
type Rep1 (URec Char :: k -> Type)	Since: base-4.9.0.0
Instance details Defined in GHC.Generics type Rep1 (URec Char :: k -> Type) = D1 (MetaData "URec" "GHC.Generics" "base" False) (C1 (MetaCons "UChar" PrefixI True) (S1 (MetaSel (Just "uChar#") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (UChar :: k -> Type)))
type Rep (URec Char p)	Since: base-4.9.0.0
Instance details Defined in GHC.Generics type Rep (URec Char p) = D1 (MetaData "URec" "GHC.Generics" "base" False) (C1 (MetaCons "UChar" PrefixI True) (S1 (MetaSel (Just "uChar#") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (UChar :: Type -> Type)))

type String = [Char] #

A String is a list of characters. String constants in Haskell are values of type String.

Character classification

Unicode characters are divided into letters, numbers, marks, punctuation, symbols, separators (including spaces) and others (including control characters).

isControl :: Char -> Bool #

Selects control characters, which are the non-printing characters of the Latin-1 subset of Unicode.

isSpace :: Char -> Bool #

Returns True for any Unicode space character, and the control characters \t, \n, \r, \f, \v.

isLower :: Char -> Bool #

Selects lower-case alphabetic Unicode characters (letters).

isUpper :: Char -> Bool #

Selects upper-case or title-case alphabetic Unicode characters (letters). Title case is used by a small number of letter ligatures like the single-character form of Lj.

isAlpha :: Char -> Bool #

Selects alphabetic Unicode characters (lower-case, upper-case and title-case letters, plus letters of caseless scripts and modifiers letters). This function is equivalent to isLetter.

isAlphaNum :: Char -> Bool #

Selects alphabetic or numeric Unicode characters.

Note that numeric digits outside the ASCII range, as well as numeric characters which aren't digits, are selected by this function but not by isDigit. Such characters may be part of identifiers but are not used by the printer and reader to represent numbers.

isPrint :: Char -> Bool #

Selects printable Unicode characters (letters, numbers, marks, punctuation, symbols and spaces).

isDigit :: Char -> Bool #

Selects ASCII digits, i.e. '0'..'9'.

isOctDigit :: Char -> Bool #

Selects ASCII octal digits, i.e. '0'..'7'.

isHexDigit :: Char -> Bool #

Selects ASCII hexadecimal digits, i.e. '0'..'9', 'a'..'f', 'A'..'F'.

isLetter :: Char -> Bool #

Selects alphabetic Unicode characters (lower-case, upper-case and title-case letters, plus letters of caseless scripts and modifiers letters). This function is equivalent to isAlpha.

This function returns True if its argument has one of the following GeneralCategorys, or False otherwise:

These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Letter".

Examples

Expand

Basic usage:

>>> isLetter 'a'
True
>>> isLetter 'A'
True
>>> isLetter 'λ'
True
>>> isLetter '0'
False
>>> isLetter '%'
False
>>> isLetter '♥'
False
>>> isLetter '\31'
False

Ensure that isLetter and isAlpha are equivalent.

>>> let chars = [(chr 0)..]
>>> let letters = map isLetter chars
>>> let alphas = map isAlpha chars
>>> letters == alphas
True

isMark :: Char -> Bool #

Selects Unicode mark characters, for example accents and the like, which combine with preceding characters.

This function returns True if its argument has one of the following GeneralCategorys, or False otherwise:

These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Mark".

Examples

Expand

Basic usage:

>>> isMark 'a'
False
>>> isMark '0'
False

Combining marks such as accent characters usually need to follow another character before they become printable:

>>> map isMark "ò"
[False,True]

Puns are not necessarily supported:

>>> isMark '✓'
False

isNumber :: Char -> Bool #

Selects Unicode numeric characters, including digits from various scripts, Roman numerals, et cetera.

This function returns True if its argument has one of the following GeneralCategorys, or False otherwise:

These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Number".

Examples

Expand

Basic usage:

>>> isNumber 'a'
False
>>> isNumber '%'
False
>>> isNumber '3'
True

ASCII '0' through '9' are all numbers:

>>> and $ map isNumber ['0'..'9']
True

Unicode Roman numerals are "numbers" as well:

>>> isNumber 'Ⅸ'
True

isPunctuation :: Char -> Bool #

Selects Unicode punctuation characters, including various kinds of connectors, brackets and quotes.

This function returns True if its argument has one of the following GeneralCategorys, or False otherwise:

These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Punctuation".

Examples

Expand

Basic usage:

>>> isPunctuation 'a'
False
>>> isPunctuation '7'
False
>>> isPunctuation '♥'
False
>>> isPunctuation '"'
True
>>> isPunctuation '?'
True
>>> isPunctuation '—'
True

isSymbol :: Char -> Bool #

Selects Unicode symbol characters, including mathematical and currency symbols.

This function returns True if its argument has one of the following GeneralCategorys, or False otherwise:

These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Symbol".

Examples

Expand

Basic usage:

>>> isSymbol 'a'
False
>>> isSymbol '6'
False
>>> isSymbol '='
True

The definition of "math symbol" may be a little counter-intuitive depending on one's background:

>>> isSymbol '+'
True
>>> isSymbol '-'
False

isSeparator :: Char -> Bool #

Selects Unicode space and separator characters.

This function returns True if its argument has one of the following GeneralCategorys, or False otherwise:

These classes are defined in the Unicode Character Database, part of the Unicode standard. The same document defines what is and is not a "Separator".

Examples

Expand

Basic usage:

>>> isSeparator 'a'
False
>>> isSeparator '6'
False
>>> isSeparator ' '
True

Warning: newlines and tab characters are not considered separators.

>>> isSeparator '\n'
False
>>> isSeparator '\t'
False

But some more exotic characters are (like HTML's  ):

>>> isSeparator '\160'
True

Subranges

isAscii :: Char -> Bool #

Selects the first 128 characters of the Unicode character set, corresponding to the ASCII character set.

isLatin1 :: Char -> Bool #

Selects the first 256 characters of the Unicode character set, corresponding to the ISO 8859-1 (Latin-1) character set.

isAsciiUpper :: Char -> Bool #

Selects ASCII upper-case letters, i.e. characters satisfying both isAscii and isUpper.

isAsciiLower :: Char -> Bool #

Selects ASCII lower-case letters, i.e. characters satisfying both isAscii and isLower.

Unicode general categories

data GeneralCategory #

Unicode General Categories (column 2 of the UnicodeData table) in the order they are listed in the Unicode standard (the Unicode Character Database, in particular).

Examples

Expand

Basic usage:

>>> :t OtherLetter
OtherLetter :: GeneralCategory

Eq instance:

>>> UppercaseLetter == UppercaseLetter
True
>>> UppercaseLetter == LowercaseLetter
False

Ord instance:

>>> NonSpacingMark <= MathSymbol
True

Enum instance:

>>> enumFromTo ModifierLetter SpacingCombiningMark
[ModifierLetter,OtherLetter,NonSpacingMark,SpacingCombiningMark]

Read instance:

>>> read "DashPunctuation" :: GeneralCategory
DashPunctuation
>>> read "17" :: GeneralCategory
*** Exception: Prelude.read: no parse

Show instance:

>>> show EnclosingMark
"EnclosingMark"

Bounded instance:

>>> minBound :: GeneralCategory
UppercaseLetter
>>> maxBound :: GeneralCategory
NotAssigned

Ix instance:

>>> import Data.Ix ( index )
>>> index (OtherLetter,Control) FinalQuote
12
>>> index (OtherLetter,Control) Format
*** Exception: Error in array index

Constructors

UppercaseLetter	Lu: Letter, Uppercase
LowercaseLetter	Ll: Letter, Lowercase
TitlecaseLetter	Lt: Letter, Titlecase
ModifierLetter	Lm: Letter, Modifier
OtherLetter	Lo: Letter, Other
NonSpacingMark	Mn: Mark, Non-Spacing
SpacingCombiningMark	Mc: Mark, Spacing Combining
EnclosingMark	Me: Mark, Enclosing
DecimalNumber	Nd: Number, Decimal
LetterNumber	Nl: Number, Letter
OtherNumber	No: Number, Other
ConnectorPunctuation	Pc: Punctuation, Connector
DashPunctuation	Pd: Punctuation, Dash
OpenPunctuation	Ps: Punctuation, Open
ClosePunctuation	Pe: Punctuation, Close
InitialQuote	Pi: Punctuation, Initial quote
FinalQuote	Pf: Punctuation, Final quote
OtherPunctuation	Po: Punctuation, Other
MathSymbol	Sm: Symbol, Math
CurrencySymbol	Sc: Symbol, Currency
ModifierSymbol	Sk: Symbol, Modifier
OtherSymbol	So: Symbol, Other
Space	Zs: Separator, Space
LineSeparator	Zl: Separator, Line
ParagraphSeparator	Zp: Separator, Paragraph
Control	Cc: Other, Control
Format	Cf: Other, Format
Surrogate	Cs: Other, Surrogate
PrivateUse	Co: Other, Private Use
NotAssigned	Cn: Other, Not Assigned

Instances

Bounded GeneralCategory	Since: base-2.1
Instance details Defined in GHC.Unicode Methods minBound :: GeneralCategory # maxBound :: GeneralCategory #
Enum GeneralCategory	Since: base-2.1
Instance details Defined in GHC.Unicode Methods succ :: GeneralCategory -> GeneralCategory # pred :: GeneralCategory -> GeneralCategory # toEnum :: Int -> GeneralCategory # fromEnum :: GeneralCategory -> Int # enumFrom :: GeneralCategory -> [GeneralCategory] # enumFromThen :: GeneralCategory -> GeneralCategory -> [GeneralCategory] # enumFromTo :: GeneralCategory -> GeneralCategory -> [GeneralCategory] # enumFromThenTo :: GeneralCategory -> GeneralCategory -> GeneralCategory -> [GeneralCategory] #
Eq GeneralCategory	Since: base-2.1
Instance details Defined in GHC.Unicode Methods (==) :: GeneralCategory -> GeneralCategory -> Bool # (/=) :: GeneralCategory -> GeneralCategory -> Bool #
Ord GeneralCategory	Since: base-2.1
Instance details Defined in GHC.Unicode Methods compare :: GeneralCategory -> GeneralCategory -> Ordering # (<) :: GeneralCategory -> GeneralCategory -> Bool # (<=) :: GeneralCategory -> GeneralCategory -> Bool # (>) :: GeneralCategory -> GeneralCategory -> Bool # (>=) :: GeneralCategory -> GeneralCategory -> Bool # max :: GeneralCategory -> GeneralCategory -> GeneralCategory # min :: GeneralCategory -> GeneralCategory -> GeneralCategory #
Read GeneralCategory	Since: base-2.1
Instance details Defined in GHC.Read Methods readsPrec :: Int -> ReadS GeneralCategory # readList :: ReadS [GeneralCategory] # readPrec :: ReadPrec GeneralCategory # readListPrec :: ReadPrec [GeneralCategory] #
Show GeneralCategory	Since: base-2.1
Instance details Defined in GHC.Unicode Methods showsPrec :: Int -> GeneralCategory -> ShowS # show :: GeneralCategory -> String # showList :: [GeneralCategory] -> ShowS #
Ix GeneralCategory	Since: base-2.1
Instance details Defined in GHC.Unicode Methods range :: (GeneralCategory, GeneralCategory) -> [GeneralCategory] # index :: (GeneralCategory, GeneralCategory) -> GeneralCategory -> Int # unsafeIndex :: (GeneralCategory, GeneralCategory) -> GeneralCategory -> Int inRange :: (GeneralCategory, GeneralCategory) -> GeneralCategory -> Bool # rangeSize :: (GeneralCategory, GeneralCategory) -> Int # unsafeRangeSize :: (GeneralCategory, GeneralCategory) -> Int

generalCategory :: Char -> GeneralCategory #

The Unicode general category of the character. This relies on the Enum instance of GeneralCategory, which must remain in the same order as the categories are presented in the Unicode standard.

Examples

Expand

Basic usage:

>>> generalCategory 'a'
LowercaseLetter
>>> generalCategory 'A'
UppercaseLetter
>>> generalCategory '0'
DecimalNumber
>>> generalCategory '%'
OtherPunctuation
>>> generalCategory '♥'
OtherSymbol
>>> generalCategory '\31'
Control
>>> generalCategory ' '
Space

Case conversion

toUpper :: Char -> Char #

Convert a letter to the corresponding upper-case letter, if any. Any other character is returned unchanged.

toLower :: Char -> Char #

Convert a letter to the corresponding lower-case letter, if any. Any other character is returned unchanged.

toTitle :: Char -> Char #

Convert a letter to the corresponding title-case or upper-case letter, if any. (Title case differs from upper case only for a small number of ligature letters.) Any other character is returned unchanged.

Single digit characters

digitToInt :: Char -> Int #

Convert a single digit Char to the corresponding Int. This function fails unless its argument satisfies isHexDigit, but recognises both upper- and lower-case hexadecimal digits (that is, '0'..'9', 'a'..'f', 'A'..'F').

Examples

Expand

Characters '0' through '9' are converted properly to 0..9:

>>> map digitToInt ['0'..'9']
[0,1,2,3,4,5,6,7,8,9]

Both upper- and lower-case 'A' through 'F' are converted as well, to 10..15.

>>> map digitToInt ['a'..'f']
[10,11,12,13,14,15]
>>> map digitToInt ['A'..'F']
[10,11,12,13,14,15]

Anything else throws an exception:

>>> digitToInt 'G'
*** Exception: Char.digitToInt: not a digit 'G'
>>> digitToInt '♥'
*** Exception: Char.digitToInt: not a digit '\9829'

intToDigit :: Int -> Char #

Convert an Int in the range 0..15 to the corresponding single digit Char. This function fails on other inputs, and generates lower-case hexadecimal digits.

Numeric representations

ord :: Char -> Int #

The fromEnum method restricted to the type Char.

chr :: Int -> Char #

The toEnum method restricted to the type Char.

String representations

showLitChar :: Char -> ShowS #

Convert a character to a string using only printable characters, using Haskell source-language escape conventions. For example:

showLitChar '\n' s  =  "\\n" ++ s

lexLitChar :: ReadS String #

Read a string representation of a character, using Haskell source-language escape conventions. For example:

lexLitChar  "\\nHello"  =  [("\\n", "Hello")]

readLitChar :: ReadS Char #

Read a string representation of a character, using Haskell source-language escape conventions, and convert it to the character that it encodes. For example:

readLitChar "\\nHello"  =  [('\n', "Hello")]