Safe Haskell	None
Language	Haskell2010

Zenacy.Unicode

Description

Tools to check and prepare data to be parsed as valid unicode.

The following is an example of converting dubious data to a text.

textDecode :: ByteString -> Text
textDecode b =
  case bomStrip b of
    (Nothing, s)           -> T.decodeUtf8 $ unicodeCleanUTF8 s -- Assume UTF8
    (Just BOM_UTF8, s)     -> T.decodeUtf8 $ unicodeCleanUTF8 s
    (Just BOM_UTF16_BE, s) -> T.decodeUtf16BE s
    (Just BOM_UTF16_LE, s) -> T.decodeUtf16LE s
    (Just BOM_UTF32_BE, s) -> T.decodeUtf32BE s
    (Just BOM_UTF32_LE, s) -> T.decodeUtf32LE s

Synopsis

data BOM
- = BOM_UTF8
- | BOM_UTF16_BE
- | BOM_UTF16_LE
- | BOM_UTF32_BE
- | BOM_UTF32_LE
bomStrings :: [(BOM, ByteString)]
bomStrip :: ByteString -> (Maybe BOM, ByteString)
unicodeCleanUTF8 :: ByteString -> ByteString

Documentation

data BOM Source #

Defines the unicode byte order mark.

Constructors

BOM_UTF8
BOM_UTF16_BE
BOM_UTF16_LE
BOM_UTF32_BE
BOM_UTF32_LE

Instances

Eq BOM Source #
Instance details Defined in Zenacy.Unicode Methods (==) :: BOM -> BOM -> Bool # (/=) :: BOM -> BOM -> Bool #
Ord BOM Source #
Instance details Defined in Zenacy.Unicode Methods compare :: BOM -> BOM -> Ordering # (<) :: BOM -> BOM -> Bool # (<=) :: BOM -> BOM -> Bool # (>) :: BOM -> BOM -> Bool # (>=) :: BOM -> BOM -> Bool # max :: BOM -> BOM -> BOM # min :: BOM -> BOM -> BOM #
Show BOM Source #
Instance details Defined in Zenacy.Unicode Methods showsPrec :: Int -> BOM -> ShowS # show :: BOM -> String # showList :: [BOM] -> ShowS #

bomStrings :: [(BOM, ByteString)] Source #

Defines the byte order mark signatures.

bomStrip :: ByteString -> (Maybe BOM, ByteString) Source #

Remove the BOM from the start of a string.

unicodeCleanUTF8 :: ByteString -> ByteString Source #

Removes bad characters and nulls from a UTF8 byte string.