{- SPDX-FileCopyrightText: 2020 Serokell <https://serokell.io/> - - SPDX-License-Identifier: MPL-2.0 -} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE ViewPatterns #-} -- | Internal functions that implement encoding selection logic. module System.IO.Utf8.Internal ( EncodingAction (..) , chooseBestEncPure , chooseBestEnc ) where import Data.List (isSuffixOf) import GHC.IO.Encoding (TextEncoding, textEncodingName, utf8) import qualified System.IO as IO -- | What to do with the encoding of the handle. -- -- We return new encoding as string to simplify testing, because, -- it turns out, when you make an encoding with some name, the -- resulting encoding can have a different name. -- -- The second constructor also contains the old encoding as a proof -- that the encoding is set to a new one only if there was a previous -- one in the first place. You probably think I am crazy, but I am not, -- it is just the simplest way to make the @hSetBestUtf8Enc@ easy to write. data EncodingAction = Keep -- ^ Do nothing. | ChangeFromTo TextEncoding String -- ^ Change the first encoding to the second. -- | Pure version of 'chooseBestEnc'. -- -- This function is not actually used in the library. It exists only -- for documentation purposes to demonstrate the logic. -- It is also used in tests to verify that the logic implemented is -- indeed this. chooseBestEncPure :: Bool -- ^ Is a terminal device? -> Maybe String -- ^ Previous encoding name. -> Maybe String -- Never touch handles in binary mode. chooseBestEncPure _ Nothing = Nothing -- Already UTF-8, that's cool. chooseBestEncPure _ (Just "UTF-8") = Nothing -- Handle in text mode that is not a terminal. chooseBestEncPure False _ = Just "UTF-8" chooseBestEncPure True (Just name) -- Be idempotent. | "//TRANSLIT" `isSuffixOf` name = Nothing | otherwise = Just $ name ++ "//TRANSLIT" -- | Choose the best encoding for a file handle. -- -- This function implements the same logic as 'chooseBestEncPure', -- but in a way that is more optimal in terms of IO queries. In -- particular: -- -- 1. It receives both a handle and its current encoding, because the -- calling function will, most likely, need to know the current encoding -- (e.g. to be able to restore it), so we avoid repeating the query. -- 2. It first checks for the cases where it doesn't care whether the device -- is a terminal or not, so the query will be made only if really necessary. chooseBestEnc :: IO.Handle -- ^ Handle to choose encoding for. -> (IO.Handle -> IO Bool) -- ^ hIsTerminalDevice. -> Maybe TextEncoding -- ^ Current encoding. -> IO EncodingAction chooseBestEnc _ _ Nothing = pure Keep chooseBestEnc h hIsTerm (Just enc) = case textEncodingName enc of "UTF-8" -> pure Keep name -- XXX: The first branch is actually never used, because the encoding -- loses the @//TRANSLIT@ suffix after it is being created. -- TODO: Find a way to detect that the encoding is already trasliterating. | "//TRANSLIT" `isSuffixOf` name -> pure Keep | otherwise -> hIsTerm h >>= \case False -> pure $ ChangeFromTo enc (textEncodingName utf8) True -> pure $ ChangeFromTo enc (name ++ "//TRANSLIT")