-- | Text IO using the UTF8 character encoding.

module Agda.Utils.IO.UTF8
  ( ReadException
  , readTextFile
  , Agda.Utils.IO.UTF8.readFile
  , Agda.Utils.IO.UTF8.writeFile
  , writeTextToFile
  ) where

import Control.Exception
import Data.Text.Lazy (Text)
import qualified Data.Text.Lazy as T
import qualified Data.Text.Lazy.Encoding as T
import qualified Data.Text.Lazy.IO as T
import qualified Data.ByteString.Lazy as B
import qualified System.IO as IO
import qualified System.IO.Error as E

-- | Converts many character sequences which may be interpreted as
-- line or paragraph separators into '\n'.

convertLineEndings :: Text -> Text
convertLineEndings :: Text -> Text
convertLineEndings = (Char -> Char) -> Text -> Text
T.map Char -> Char
convert forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
convertNLCR
  where
  -- Replaces NL CR with NL.
  convertNLCR :: Text -> Text
convertNLCR = Text -> Text -> Text -> Text
T.replace Text
"\n\x000D" Text
"\n"

  -- ASCII:
  convert :: Char -> Char
convert Char
'\x000D' = Char
'\n'  -- CR  (Carriage return)
  convert Char
'\x000C' = Char
'\n'  -- FF  (Form feed)
  -- Unicode:
  convert Char
'\x0085' = Char
'\n'  -- NEXT LINE
  convert Char
'\x2028' = Char
'\n'  -- LINE SEPARATOR
  convert Char
'\x2029' = Char
'\n'  -- PARAGRAPH SEPARATOR
  -- Not a line ending (or '\x000A'):
  convert Char
c        = Char
c

-- | A kind of exception that can be thrown by 'readTextFile' and
-- 'readFile'.
newtype ReadException
  = DecodingError FilePath
    -- ^ Decoding failed for the given file.
  deriving Int -> ReadException -> ShowS
[ReadException] -> ShowS
ReadException -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ReadException] -> ShowS
$cshowList :: [ReadException] -> ShowS
show :: ReadException -> String
$cshow :: ReadException -> String
showsPrec :: Int -> ReadException -> ShowS
$cshowsPrec :: Int -> ReadException -> ShowS
Show

instance Exception ReadException where
  displayException :: ReadException -> String
displayException (DecodingError String
file) =
    String
"Failed to read " forall a. [a] -> [a] -> [a]
++ String
file forall a. [a] -> [a] -> [a]
++ String
".\n" forall a. [a] -> [a] -> [a]
++
    String
"Please ensure that this file uses the UTF-8 character encoding."

-- | Reads a UTF8-encoded text file and converts many character
-- sequences which may be interpreted as line or paragraph separators
-- into '\n'.
--
-- If the file cannot be decoded, then a 'ReadException' is raised.

readTextFile :: FilePath -> IO Text
readTextFile :: String -> IO Text
readTextFile String
file = do
  Either UnicodeException Text
s <- ByteString -> Either UnicodeException Text
T.decodeUtf8' forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO ByteString
B.readFile String
file
  case Either UnicodeException Text
s of
    Right Text
s -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Text
convertLineEndings Text
s
    Left UnicodeException
_  -> forall a e. Exception e => e -> a
throw forall a b. (a -> b) -> a -> b
$ String -> ReadException
DecodingError String
file

-- | Reads a UTF8-encoded text file and converts many character
-- sequences which may be interpreted as line or paragraph separators
-- into '\n'.
--
-- If the file cannot be decoded, then a 'ReadException' is raised.

readFile :: FilePath -> IO String
readFile :: String -> IO String
readFile String
f = do
  Text
s <- String -> IO Text
readTextFile String
f
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> String
T.unpack Text
s

-- | Writes a UTF8-encoded text file. The native convention for line
-- endings is used.

writeFile :: FilePath -> String -> IO ()
writeFile :: String -> String -> IO ()
writeFile String
file String
s = forall r. String -> IOMode -> (Handle -> IO r) -> IO r
IO.withFile String
file IOMode
IO.WriteMode forall a b. (a -> b) -> a -> b
$ \Handle
h -> do
  Handle -> TextEncoding -> IO ()
IO.hSetEncoding Handle
h TextEncoding
IO.utf8
  Handle -> String -> IO ()
IO.hPutStr Handle
h String
s

-- | Writes a UTF8-encoded text file. The native convention for line
-- endings is used.

writeTextToFile :: FilePath -> Text -> IO ()
writeTextToFile :: String -> Text -> IO ()
writeTextToFile String
file Text
s = forall r. String -> IOMode -> (Handle -> IO r) -> IO r
IO.withFile String
file IOMode
IO.WriteMode forall a b. (a -> b) -> a -> b
$ \Handle
h -> do
  Handle -> TextEncoding -> IO ()
IO.hSetEncoding Handle
h TextEncoding
IO.utf8
  Handle -> Text -> IO ()
T.hPutStr Handle
h Text
s