zenacy-unicode-1.0.0: Unicode utilities for Haskell

Safe HaskellNone
LanguageHaskell2010

Zenacy.Unicode

Description

Tools to check and prepare data to be parsed as valid unicode.

The following is an example of converting dubious data to a text.

textDecode :: ByteString -> Text
textDecode b =
  case bomStrip b of
    (Nothing, s)           -> T.decodeUtf8 $ unicodeCleanUTF8 s -- Assume UTF8
    (Just BOM_UTF8, s)     -> T.decodeUtf8 $ unicodeCleanUTF8 s
    (Just BOM_UTF16_BE, s) -> T.decodeUtf16BE s
    (Just BOM_UTF16_LE, s) -> T.decodeUtf16LE s
    (Just BOM_UTF32_BE, s) -> T.decodeUtf32BE s
    (Just BOM_UTF32_LE, s) -> T.decodeUtf32LE s
Synopsis

Documentation

data BOM Source #

Defines the unicode byte order mark.

Instances
Eq BOM Source # 
Instance details

Defined in Zenacy.Unicode

Methods

(==) :: BOM -> BOM -> Bool #

(/=) :: BOM -> BOM -> Bool #

Ord BOM Source # 
Instance details

Defined in Zenacy.Unicode

Methods

compare :: BOM -> BOM -> Ordering #

(<) :: BOM -> BOM -> Bool #

(<=) :: BOM -> BOM -> Bool #

(>) :: BOM -> BOM -> Bool #

(>=) :: BOM -> BOM -> Bool #

max :: BOM -> BOM -> BOM #

min :: BOM -> BOM -> BOM #

Show BOM Source # 
Instance details

Defined in Zenacy.Unicode

Methods

showsPrec :: Int -> BOM -> ShowS #

show :: BOM -> String #

showList :: [BOM] -> ShowS #

bomStrings :: [(BOM, ByteString)] Source #

Defines the byte order mark signatures.

bomStrip :: ByteString -> (Maybe BOM, ByteString) Source #

Remove the BOM from the start of a string.

unicodeCleanUTF8 :: ByteString -> ByteString Source #

Removes bad characters and nulls from a UTF8 byte string.