-- | -- module: Data.Text.Encoding.Locale -- copyright: © kudah 2013 -- license: BSD3 -- -- maintainer: kudahkukarek@gmail.com -- stability: experimental -- portability: GHC-only -- -- This module provides functions to encode and decode 'Data.Text.Text' to/from -- 'Data.ByteString.ByteString' using 'System.IO.TextEncoding' -- -- For performance, Text\'s native encoding functions are used if the conditions -- are right (LF NewlineMode and UTF encoding). {-# LANGUAGE CPP #-} #ifdef TRUSTWORTHY {-# LANGUAGE Trustworthy #-} #endif module Data.Text.Encoding.Locale (decodeLocale ,encodeLocale ,decodeLocale' ,encodeLocale' ,decodeFromHandle ,encodeFromHandle ) where import Import import qualified Data.ByteString as B import qualified Data.Text as T import qualified Data.Text.IO as TIO import qualified Data.Text.Encoding as TE import System.IO import Data.ByteString.Handle import Data.Maybe handleDecoder :: Maybe TextEncoding -> Maybe NewlineMode -> B.ByteString -> IO T.Text handleDecoder menc mnlmode = \bs -> do h <- readHandle False (fromStrict bs) whenJust' menc $ hSetEncoding h whenJust' mnlmode $ hSetNewlineMode h TIO.hGetContents h handleEncoder :: Maybe TextEncoding -> Maybe NewlineMode -> T.Text -> IO B.ByteString handleEncoder menc mnlmode = \t -> do (res, ()) <- writeHandle False $ \h -> do whenJust' menc $ hSetEncoding h whenJust' mnlmode $ hSetNewlineMode h TIO.hPutStr h t return (toStrict res) chooseDecoder :: Maybe TextEncoding -> Maybe NewlineMode -> B.ByteString -> IO T.Text chooseDecoder menc mnlmode = \bs -> do if inputNL nlmode == LF -- decode* functions won't convert line ends then do enc <- maybe getLocale' return menc case show enc of 'U':'T':'F':'-':s -> case s of '8':[] -> return (TE.decodeUtf8 bs) ('1':'6':x:'E':_) | 'L' == x -> return (TE.decodeUtf16LE bs) | 'B' == x -> return (TE.decodeUtf16BE bs) ('3':'2':x:'E':_) | 'L' == x -> return (TE.decodeUtf32LE bs) | 'B' == x -> return (TE.decodeUtf32BE bs) _ -> fallback bs _ -> fallback bs else fallback bs where nlmode = fromMaybe nativeNewlineMode mnlmode fallback = handleDecoder menc mnlmode chooseEncoder :: Maybe TextEncoding -> Maybe NewlineMode -> T.Text -> IO B.ByteString chooseEncoder menc mnlmode = \bs -> if outputNL nlmode == LF -- encode* functions won't convert line ends then do enc <- maybe getLocale' return menc case show enc of 'U':'T':'F':'-':s -> case s of '8':[] -> return (TE.encodeUtf8 bs) ('1':'6':x:'E':_) | 'L' == x -> return (TE.encodeUtf16LE bs) | 'B' == x -> return (TE.encodeUtf16BE bs) ('3':'2':x:'E':_) | 'L' == x -> return (TE.encodeUtf32LE bs) | 'B' == x -> return (TE.encodeUtf32BE bs) _ -> fallback bs _ -> fallback bs else fallback bs where nlmode = fromMaybe nativeNewlineMode mnlmode fallback = handleEncoder menc mnlmode -- | Decode 'B.ByteString' to 'T.Text' using current locale decodeLocale :: B.ByteString -> IO T.Text decodeLocale = chooseDecoder Nothing Nothing -- | Encode 'T.Text' to 'B.ByteString' using current locale encodeLocale :: T.Text -> IO B.ByteString encodeLocale = chooseEncoder Nothing Nothing -- | Decode 'B.ByteString' to 'T.Text' using supplied 'TextEncoding' and 'NewlineMode' decodeLocale' :: TextEncoding -> NewlineMode -> B.ByteString -> IO T.Text decodeLocale' enc nlmode = chooseDecoder (Just enc) (Just nlmode) -- | Encode 'T.Text' to 'B.ByteString' using supplied 'TextEncoding' and 'NewlineMode' encodeLocale' :: TextEncoding -> NewlineMode -> T.Text -> IO B.ByteString encodeLocale' enc nlmode = chooseEncoder (Just enc) (Just nlmode) -- | Decode 'B.ByteString' to 'T.Text' using 'Handle's 'TextEncoding' and 'NewlineMode' decodeFromHandle :: Handle -> B.ByteString -> IO T.Text decodeFromHandle h bs = do (enc, nlmode) <- hGetEncAndNlMode' h decodeLocale' enc nlmode bs -- | Encode 'T.Text' to 'B.ByteString' using 'Handle's 'TextEncoding' and 'NewlineMode' encodeFromHandle :: Handle -> T.Text -> IO B.ByteString encodeFromHandle h t = do (enc, nlmode) <- hGetEncAndNlMode' h encodeLocale' enc nlmode t