{-# OPTIONS_HADDOCK show-extensions #-}
{-# OPTIONS_GHC -Wno-deprecations #-} --FIXME: remove when Text16 is removed
{-# LANGUAGE CPP,
             MagicHash,
             TypeFamilies,
             UnboxedTuples,
             StandaloneKindSignatures #-}
{-|
Module      : Parsley.Internal.Backend.Machine.InputRep
Description : Internal representations of input and miscellaneous operations.
License     : BSD-3-Clause
Maintainer  : Jamie Willis
Stability   : experimental

This module contains the translation from user input type to the
underlying parsley representation of it, as well as some miscellaneous
functions for working with specific types of input (these do not appear
in the rest of the machinery, but in "Parsley.Internal.Backend.Machine.InputOps"
and potentially the generated code).

@since 1.0.0.0
-}
module Parsley.Internal.Backend.Machine.InputRep (
    -- * Representation Type-Families
    DynRep, StaRep, RepKind,
    -- * @Int#@ Operations
    PartialStaOffset(..), dynOffset,
    intSame, intLess, intAdd, intSubNonNeg, min#, max#,
    -- * @Offwith@ Operations
    OffWith, offWithShiftRight,
    PartialStaOffWith(..), staOffWith,
    -- * @LazyByteString@ Operations
    UnpackedLazyByteString, emptyUnpackedLazyByteString,
    byteStringShiftRight, byteStringShiftLeft,
    -- * @Stream@ Operations
    dropStream,
    -- * @Text@ Operations
    StaText(..), PartialStaText(..), staText, offsetText, textShiftRight, textShiftLeft,
    -- * Crucial Exposed Functions
    {- |
    These functions must be exposed, since they can appear
    in the generated code.
    -}
    byteStringNext,
    textShiftRight#,  textShiftLeft#, byteStringShiftRight#, byteStringShiftLeft#,
    -- * Re-exports
    module Parsley.Internal.Core.InputTypes
  ) where

import Data.Array.Unboxed                (UArray)
import Data.ByteString.Internal          (ByteString(..))
import Data.Kind                         (Type)
import Data.Text.Internal                (Text(..))
import Data.Text.Unsafe                  (iter_, reverseIter_)
import GHC.Exts                          (Int(..), Char(..), TYPE, RuntimeRep(..), (==#), (<#), (+#), (-#), isTrue#)
#if __GLASGOW_HASKELL__ > 900
import GHC.Exts                          (LiftedRep)
#endif
import GHC.ForeignPtr                    (ForeignPtr(..), ForeignPtrContents)
import GHC.Prim                          (Int#, Addr#, nullAddr#, readWord8OffAddr#, word2Int#, chr#, touch#, realWorld#)
#if __GLASGOW_HASKELL__ > 900
import GHC.Prim                          (word8ToWord#)
#else
import GHC.Prim                          (Word#)
#endif
import Parsley.Internal.Common.Utils     (Code)
import Parsley.Internal.Core.InputTypes  (Text16(..), CharList(..), Stream(..))

import qualified Data.ByteString.Lazy.Internal as Lazy (ByteString(..))
import qualified Data.Text.Array               as Text (Array)

#if __GLASGOW_HASKELL__ <= 900
{-# INLINE word8ToWord# #-}
word8ToWord# :: Word# -> Word#
word8ToWord# x = x
#endif

{- Representation Types -}
{-|
This allows types like @String@ and @Stream@ to be manipulated
more efficiently by packaging them along with an offset which can
be used for quicker comparisons.

@since 1.0.0.0
-}
type OffWith ts = (# Int#, ts #)

data PartialStaOffWith ts = StaOW !PartialStaOffset !(Code ts) | DynOW !(Code (OffWith ts))

staOffWith :: PartialStaOffWith ts -> (PartialStaOffset -> Code ts -> Code a) -> Code a
staOffWith (StaOW po qts) k = k po qts
staOffWith (DynOW qots) k = [|| let !(# o, cs #) = $$qots in $$(k (StaO [||o||] 0) [||cs||]) ||]

{-|
This type unpacks /lazy/ `Lazy.ByteString`s for efficiency.

@since 1.0.0.0
-}
type UnpackedLazyByteString = (#
    Int#,
    Addr#,
    ForeignPtrContents,
    Int#,
    Int#,
    Lazy.ByteString
  #)

data PartialStaText = StaT {-# UNPACK #-} !StaText | DynT !(Code Text)

staText :: PartialStaText -> (StaText -> Code a) -> Code a
staText (StaT t) k = k t
staText (DynT qt) k = [||
    let !t@(Text arr off unconsumed) = $$qt
    in $$(k (StaText [||t||] [||arr||] [||off||] [||unconsumed||]))
  ||]

data StaText = StaText {
  origText       :: !(Code Text),
  arrText        :: !(Code Text.Array),
  offText        :: !(Code Int),
  unconsumedText :: !(Code Int)
}

data PartialStaOffset = StaO !(Code Int#) {-# UNPACK #-} !Int

dynOffset :: PartialStaOffset -> Code Int#
dynOffset (StaO qi 0) = qi
dynOffset (StaO qi n)
 | n > 0 = let !(I# n#) = n in [||$$qi +# n#||]
 | otherwise = let !(I# m#) = negate n in [||$$qi -# m#||]

{-|
Initialises an `UnpackedLazyByteString` with a specified offset.
This offset varies as each lazy chunk is consumed.

@since 1.0.0.0
-}
emptyUnpackedLazyByteString :: Code Int# -> Code UnpackedLazyByteString
emptyUnpackedLazyByteString qi# = [|| (# $$(qi#), nullAddr#, error "nullForeignPtr", 0#, 0#, Lazy.Empty #) ||]

{- Representation Mappings -}
-- NOTE: When a new input type is added here, it needs an Input instance in Parsley.Backend.Machine
{-|
The representation type of an input `Rep`, does not have to be a lifted type. To match a
representation of an input with the correct kind, this type family must be used.

@since 1.0.0.0
-}
type RepKind :: Type -> RuntimeRep
type family RepKind input where
  RepKind String = 'TupleRep '[IntRep, LiftedRep]
  RepKind (UArray Int Char) = IntRep
  RepKind Text16 = LiftedRep
  RepKind ByteString = IntRep
  RepKind Text = LiftedRep
  RepKind Lazy.ByteString = 'TupleRep '[IntRep, AddrRep, LiftedRep, IntRep, IntRep, LiftedRep]
  RepKind CharList = 'TupleRep '[IntRep, LiftedRep]
  RepKind Stream = 'TupleRep '[IntRep, LiftedRep]

{-|
This type family relates a user input type with the underlying parsley
representation, which is significantly more efficient to work with.
Most parts of the machine work with `Rep`.

@since 1.0.0.0
-}
type DynRep :: forall (rep :: Type) -> TYPE (RepKind rep)
type family DynRep input where
  DynRep String = (# Int#, String #)
  DynRep (UArray Int Char) = Int#
  DynRep Text16 = Text
  DynRep ByteString = Int#
  DynRep Text = Text
  DynRep Lazy.ByteString = UnpackedLazyByteString
  DynRep CharList = (# Int#, String #)
  DynRep Stream = (# Int#, Stream #)

type family StaRep input where
  StaRep String = PartialStaOffWith String
  StaRep (UArray Int Char) = PartialStaOffset
  StaRep Text16 = PartialStaText
  StaRep ByteString = PartialStaOffset
  StaRep Text = PartialStaText
  StaRep Lazy.ByteString = Code UnpackedLazyByteString --TODO: could refine
  StaRep CharList = PartialStaOffWith String
  StaRep Stream = PartialStaOffWith Stream

{- Generic Representation Operations -}
{-|
Verifies that two `Int#`s are equal.

@since 1.0.0.0
-}
intSame :: Code Int# -> Code Int# -> Code Bool
intSame qi# qj# = [||isTrue# ($$(qi#) ==# $$(qj#))||]

{-|
Is the first argument is less than the second?

@since 2.3.0.0
-}
intLess :: Code Int# -> Code Int# -> Code a -> Code a -> Code a
intLess qi# qj# yes no = [||
    case $$(qi#) <# $$(qj#) of
      1# -> $$yes
      0# -> $$no
  ||]

intAdd ::  PartialStaOffset -> Int -> PartialStaOffset
intAdd (StaO qo n) i = StaO qo (n + i)

intSubNonNeg ::  PartialStaOffset -> Int -> PartialStaOffset
intSubNonNeg (StaO qo n) i
  | n >= i = StaO qo (n - i)
  | otherwise = let !(I# m#) = negate (n - i) in StaO [||max# ($$qo -# m#) 0#||] 0

{-|
Extracts the offset from `Text`.

@since 1.0.0.0
-}
-- FIXME: not accurate? this can be slow without consequence
offsetText :: PartialStaText -> Code Int
offsetText pt = staText pt offText

{-|
Shifts an `OffWith` to the right, taking care to also drop tokens from the
companion input.

@since 1.0.0.0
-}
offWithShiftRight :: Code (Int -> ts -> ts)        -- ^ A @drop@ function for underlying input.
                  -> PartialStaOffset -> Code ts   -- ^ The `OffWith` to shift.
                  -> Int                           -- ^ How much to shift by.
                  -> (PartialStaOffset, Code ts)
offWithShiftRight _ po qts 0 = (intAdd po 0, qts)
offWithShiftRight drop po qts n = (intAdd po n, [|| $$drop n $$qts ||])

{-|
Drops tokens off of a `Stream`.

@since 1.0.0.0
-}
dropStream :: Int -> Stream -> Stream
dropStream 0 cs = cs
dropStream n (_ :> cs) = dropStream (n-1) cs

{-|
Drops tokens off of `Text`.

@since 2.3.0.0
-}
textShiftRight :: Code Text -> Int# -> Code Text
textShiftRight t 0# = t
textShiftRight t n = [||textShiftRight# $$t n||]

{-# INLINABLE textShiftRight# #-}
textShiftRight# :: Text -> Int# -> Text
textShiftRight# (Text arr off unconsumed) i = go i arr off unconsumed
  where
    go 0# !arr !off !unconsumed = Text arr off unconsumed
    go n !arr !off !unconsumed
      | unconsumed > 0 = let !d = iter_ (Text arr off unconsumed) 0
                         in go (n -# 1#) arr (off + d) (unconsumed - d)
      | otherwise = Text arr off 0

{-|
Rewinds input consumption on `Text` where the input is still available (i.e. in the same chunk).

@since 2.3.0.0
-}
textShiftLeft :: Code Text -> Int# -> Code Text
textShiftLeft t 0# = t
textShiftLeft t n = [||textShiftLeft# $$t n||]

textShiftLeft# :: Text -> Int# -> Text
textShiftLeft# (Text arr off unconsumed) i = go i off unconsumed
  where
    go 0# off' unconsumed' = Text arr off' unconsumed'
    go n off' unconsumed'
      | off' > 0 = let !d = reverseIter_ (Text arr off' unconsumed') 0 in go (n -# 1#) (off' + d) (unconsumed' - d)
      | otherwise = Text arr 0 unconsumed'

{-# INLINE emptyUnpackedLazyByteString' #-}
emptyUnpackedLazyByteString' :: Int# -> UnpackedLazyByteString
emptyUnpackedLazyByteString' i# = (# i#, nullAddr#, error "nullForeignPtr", 0#, 0#, Lazy.Empty #)

{-# INLINABLE byteStringNext #-}
byteStringNext :: UnpackedLazyByteString -> (# Char, UnpackedLazyByteString #)
byteStringNext (# i#, addr#, final, off#, size#, cs #) =
  case readWord8OffAddr# addr# off# realWorld# of
    (# s', x #) -> case touch# final s' of
      _ -> (# C# (chr# (word2Int# (word8ToWord# x))),
          if I# size# /= 1 then (# i# +# 1#, addr#, final, off# +# 1#, size# -# 1#, cs #)
          else case cs of
            Lazy.Chunk (PS (ForeignPtr addr'# final') (I# off'#) (I# size'#)) cs' ->
              (# i# +# 1#, addr'#, final', off'#, size'#, cs' #)
            Lazy.Empty -> emptyUnpackedLazyByteString' (i# +# 1#)
        #)

{-|
Drops tokens off of a lazy `Lazy.ByteString`.

@since 2.3.0.0
-}
byteStringShiftRight :: Code UnpackedLazyByteString -> Int# -> Code UnpackedLazyByteString
byteStringShiftRight t 0# = t
byteStringShiftRight t n = [||byteStringShiftRight# $$t n||]

{-# INLINABLE byteStringShiftRight# #-}
byteStringShiftRight# :: UnpackedLazyByteString -> Int# -> UnpackedLazyByteString
byteStringShiftRight# (# i#, addr#, final, off#, size#, cs #) j#
  | isTrue# (j# <# size#)  = (# i# +# j#, addr#, final, off# +# j#, size# -# j#, cs #)
  | otherwise = case cs of
    Lazy.Chunk (PS (ForeignPtr addr'# final') (I# off'#) (I# size'#)) cs' -> byteStringShiftRight# (# i# +# size#, addr'#, final', off'#, size'#, cs' #) (j# -# size#)
    Lazy.Empty -> emptyUnpackedLazyByteString' (i# +# size#)

{-|
Rewinds input consumption on a lazy `Lazy.ByteString` if input is still available (within the same chunk).

@since 2.3.0.0
-}
byteStringShiftLeft :: Code UnpackedLazyByteString -> Int# -> Code UnpackedLazyByteString
byteStringShiftLeft t 0# = t
byteStringShiftLeft t n = [||byteStringShiftLeft# $$t n||]

{-# INLINABLE byteStringShiftLeft# #-}
byteStringShiftLeft# :: UnpackedLazyByteString -> Int# -> UnpackedLazyByteString
byteStringShiftLeft# (# i#, addr#, final, off#, size#, cs #) j# =
  let d# = min# off# j#
  in (# i# -# d#, addr#, final, off# -# d#, size# +# d#, cs #)

{-|
Finds the minimum of two `Int#` values.

@since 1.0.0.0
-}
{-# INLINABLE min# #-}
min# :: Int# -> Int# -> Int#
min# i# j# = case i# <# j# of
  0# -> j#
  _  -> i#

{-|
Finds the maximum of two `Int#` values.

@since 1.0.0.0
-}
{-# INLINABLE max# #-}
max# :: Int# -> Int# -> Int#
max# i# j# = case i# <# j# of
  0# -> i#
  _  -> j#