{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE NoImplicitPrelude
           , BangPatterns
           , NondecreasingIndentation
           , UnboxedTuples
           , MagicHash
  #-}
{-# OPTIONS_GHC  -funbox-strict-fields #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  GHC.IO.Encoding.Latin1
-- Copyright   :  (c) The University of Glasgow, 2009
-- License     :  see libraries/base/LICENSE
-- 
-- Maintainer  :  libraries@haskell.org
-- Stability   :  internal
-- Portability :  non-portable
--
-- Single-byte encodings that map directly to Unicode code points.
--
-- Portions Copyright   : (c) Tom Harper 2008-2009,
--                        (c) Bryan O'Sullivan 2009,
--                        (c) Duncan Coutts 2009
--
-----------------------------------------------------------------------------

module GHC.IO.Encoding.Latin1 (
  latin1, mkLatin1,
  latin1_checked, mkLatin1_checked,
  ascii, mkAscii,
  latin1_decode,
  ascii_decode,
  latin1_encode,
  latin1_checked_encode,
  ascii_encode,
  ) where

import GHC.Base
import GHC.Real
import GHC.Num
-- import GHC.IO
import GHC.IO.Buffer
import GHC.IO.Encoding.Failure
import GHC.IO.Encoding.Types

-- -----------------------------------------------------------------------------
-- Latin1

latin1 :: TextEncoding
latin1 = mkLatin1 ErrorOnCodingFailure

-- | @since 4.4.0.0
mkLatin1 :: CodingFailureMode -> TextEncoding
mkLatin1 cfm = TextEncoding { textEncodingName = "ISO-8859-1",
                              mkTextDecoder = latin1_DF cfm,
                              mkTextEncoder = latin1_EF cfm }

latin1_DF :: CodingFailureMode -> IO (TextDecoder ())
latin1_DF cfm =
  return (BufferCodec# {
             encode#   = latin1_decode,
             recover#  = recoverDecode# cfm,
             close#    = return (),
             getState# = return (),
             setState# = const $ return ()
          })

latin1_EF :: CodingFailureMode -> IO (TextEncoder ())
latin1_EF cfm =
  return (BufferCodec# {
             encode#   = latin1_encode,
             recover#  = recoverEncode# cfm,
             close#    = return (),
             getState# = return (),
             setState# = const $ return ()
          })

latin1_checked :: TextEncoding
latin1_checked = mkLatin1_checked ErrorOnCodingFailure

-- | @since 4.4.0.0
mkLatin1_checked :: CodingFailureMode -> TextEncoding
mkLatin1_checked cfm = TextEncoding { textEncodingName = "ISO-8859-1",
                                      mkTextDecoder = latin1_DF cfm,
                                      mkTextEncoder = latin1_checked_EF cfm }

latin1_checked_EF :: CodingFailureMode -> IO (TextEncoder ())
latin1_checked_EF cfm =
  return (BufferCodec# {
             encode#   = latin1_checked_encode,
             recover#  = recoverEncode# cfm,
             close#    = return (),
             getState# = return (),
             setState# = const $ return ()
          })

-- -----------------------------------------------------------------------------
-- ASCII

-- | @since 4.9.0.0
ascii :: TextEncoding
ascii = mkAscii ErrorOnCodingFailure

-- | @since 4.9.0.0
mkAscii :: CodingFailureMode -> TextEncoding
mkAscii cfm = TextEncoding { textEncodingName = "ASCII",
                             mkTextDecoder = ascii_DF cfm,
                             mkTextEncoder = ascii_EF cfm }

ascii_DF :: CodingFailureMode -> IO (TextDecoder ())
ascii_DF cfm =
  return (BufferCodec# {
             encode#   = ascii_decode,
             recover#  = recoverDecode# cfm,
             close#    = return (),
             getState# = return (),
             setState# = const $ return ()
          })

ascii_EF :: CodingFailureMode -> IO (TextEncoder ())
ascii_EF cfm =
  return (BufferCodec# {
             encode#   = ascii_encode,
             recover#  = recoverEncode# cfm,
             close#    = return (),
             getState# = return (),
             setState# = const $ return ()
          })


-- -----------------------------------------------------------------------------
-- The actual decoders and encoders

-- TODO: Eliminate code duplication between the checked and unchecked
-- versions of the decoder or encoder (but don't change the Core!)

latin1_decode :: DecodeBuffer#
latin1_decode 
  input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
  output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
  st
 = let 
       loop :: Int -> Int -> DecodingBuffer#
       loop !ir !ow st0
         | ow >= os = done OutputUnderflow ir ow st0
         | ir >= iw = done InputUnderflow ir ow st0
         | otherwise = do
              let !(# st1, c0 #) = unIO (readWord8Buf iraw ir) st0
                  !(# st2, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral c0))) st1
              loop (ir+1) ow' st2

       -- lambda-lifted, to avoid thunks being built in the inner-loop:
       {-# NOINLINE done #-}
       done :: CodingProgress -> Int -> Int -> DecodingBuffer#
       done why !ir !ow st' =
         let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
             !ro = output{ bufR=ow }
         in  (# st', why, ri, ro #)
    in
    loop ir0 ow0 st

ascii_decode :: DecodeBuffer#
ascii_decode
  input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
  output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
  st
 = let
       loop :: Int -> Int -> DecodingBuffer#
       loop !ir !ow st0
         | ow >= os = done OutputUnderflow ir ow st0
         | ir >= iw = done InputUnderflow ir ow st0
         | otherwise = do
              let !(# st1, c0 #) = unIO (readWord8Buf iraw ir) st0
              if c0 > 0x7f then invalid st1 else do
                let !(# st2, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral c0))) st1
                loop (ir+1) ow' st2
         where
           invalid :: DecodingBuffer#
           invalid st' = done InvalidSequence ir ow st'

       -- lambda-lifted, to avoid thunks being built in the inner-loop:
       {-# NOINLINE done #-}
       done :: CodingProgress -> Int -> Int -> DecodingBuffer#
       done why !ir !ow st' = 
         let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
             !ro = output{ bufR=ow }
         in  (# st', why, ri, ro #)
    in
    loop ir0 ow0 st

latin1_encode :: EncodeBuffer#
latin1_encode
  input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
  output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
  st
 = let
      {-# NOINLINE done #-}
      done :: CodingProgress -> Int -> Int -> EncodingBuffer#
      done why !ir !ow st' = 
        let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
            !ro = output{ bufR=ow }
        in  (# st', why, ri, ro #)
      loop :: Int -> Int -> EncodingBuffer#
      loop !ir !ow st0
        | ow >= os = done OutputUnderflow ir ow st0
        | ir >= iw = done InputUnderflow ir ow st0
        | otherwise = do
           let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
               !(# st2, () #) = unIO (writeWord8Buf oraw ow (fromIntegral (ord c))) st1
           loop ir' (ow+1) st2
    in
    loop ir0 ow0 st

latin1_checked_encode :: EncodeBuffer#
latin1_checked_encode input output
 = single_byte_checked_encode 0xff input output

ascii_encode :: EncodeBuffer#
ascii_encode input output
 = single_byte_checked_encode 0x7f input output

single_byte_checked_encode :: Int -> EncodeBuffer#
single_byte_checked_encode max_legal_char
  input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
  output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
  st
 = let
      {-# NOINLINE done #-}
      done :: CodingProgress -> Int -> Int -> EncodingBuffer#
      done why !ir !ow st' = 
        let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
            !ro = output{ bufR=ow }
        in  (# st', why, ri, ro #)
      loop :: Int -> Int -> EncodingBuffer#
      loop !ir !ow st0
        | ow >= os = done OutputUnderflow ir ow st0
        | ir >= iw = done InputUnderflow ir ow st0
        | otherwise = do
           let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
           if ord c > max_legal_char then invalid st1 else do
             let !(# st2, () #) = unIO (writeWord8Buf oraw ow (fromIntegral (ord c))) st1
             loop ir' (ow+1) st2
        where
          invalid :: EncodingBuffer#
          invalid st' = done InvalidSequence ir ow st'
    in
    loop ir0 ow0 st
{-# INLINE single_byte_checked_encode #-}