{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE CPP #-}
module Basement.UTF8.Base
where
import GHC.ST (ST, runST)
import GHC.Types
import GHC.Word
import GHC.Prim
import GHC.Exts (build)
import Basement.Compat.Base
import Basement.Numerical.Additive
import Basement.Compat.Bifunctor
import Basement.NormalForm
import Basement.Types.OffsetSize
import Basement.PrimType
import Basement.Monad
import Basement.FinalPtr
import Basement.UTF8.Helper
import Basement.UTF8.Types
import qualified Basement.Alg.UTF8 as UTF8
import Basement.UArray (UArray)
import Basement.Block (MutableBlock)
import qualified Basement.Block.Mutable as BLK
import qualified Basement.UArray as Vec
import qualified Basement.UArray as C
import qualified Basement.UArray.Mutable as MVec
import Basement.UArray.Base as Vec (offset, pureST, onBackend, ValidRange(..), offsetsValidRange)
import GHC.CString (unpackCString#, unpackCStringUtf8#)
import Data.Data
import Basement.Compat.ExtList as List
import Basement.Compat.Semigroup (Semigroup)
newtype String = String (UArray Word8)
deriving (Typeable, Semigroup, Monoid, Eq, Ord)
newtype MutableString st = MutableString (MVec.MUArray Word8 st)
deriving (Typeable)
instance Show String where
show = show . sToList
instance IsString String where
fromString = sFromList
instance IsList String where
type Item String = Char
fromList = sFromList
toList = sToList
instance Data String where
toConstr s = mkConstr stringType (show s) [] Prefix
dataTypeOf _ = stringType
gunfold _ _ = error "gunfold"
instance NormalForm String where
toNormalForm (String ba) = toNormalForm ba
stringType :: DataType
stringType = mkNoRepType "Foundation.String"
size :: String -> CountOf Word8
size (String ba) = Vec.length ba
sToList :: String -> [Char]
sToList (String arr) = Vec.onBackend onBA onAddr arr
where
(Vec.ValidRange !start !end) = Vec.offsetsValidRange arr
onBA ba@(BLK.Block _) = loop start
where
loop !idx
| idx == end = []
| otherwise = let !(Step c idx') = UTF8.next ba idx in c : loop idx'
onAddr fptr ptr@(Ptr _) = pureST (loop start)
where
loop !idx
| idx == end = []
| otherwise = let !(Step c idx') = UTF8.next ptr idx in c : loop idx'
{-# NOINLINE sToList #-}
sToListStream (String arr) k z = Vec.onBackend onBA onAddr arr
where
(Vec.ValidRange !start !end) = Vec.offsetsValidRange arr
onBA ba@(BLK.Block _) = loop start
where
loop !idx
| idx == end = z
| otherwise = let !(Step c idx') = UTF8.next ba idx in c `k` loop idx'
onAddr fptr ptr@(Ptr _) = pureST (loop start)
where
loop !idx
| idx == end = z
| otherwise = let !(Step c idx') = UTF8.next ptr idx in c `k` loop idx'
{-# RULES "String sToList" [~1] forall s . sToList s = build (\ k z -> sToListStream s k z) #-}
{-# RULES "String toList" [~1] forall s . toList s = build (\ k z -> sToListStream s k z) #-}
{-# RULES "String sFromList" forall s . sFromList (unpackCString# s) = fromModified s #-}
{-# RULES "String sFromList" forall s . sFromList (unpackCStringUtf8# s) = fromModified s #-}
fromModified :: Addr# -> String
fromModified addr = countAndCopy 0 0
where
countAndCopy :: CountOf Word8 -> Offset Word8 -> String
countAndCopy count ofs =
case primAddrIndex addr ofs of
0x00 -> runST $ do
mb <- MVec.newNative_ count (copy count)
String <$> Vec.unsafeFreeze mb
0xC0 -> case primAddrIndex addr (ofs+1) of
0x80 -> countAndCopy (count+1) (ofs+2)
_ -> countAndCopy (count+2) (ofs+2)
_ -> countAndCopy (count+1) (ofs+1)
copy :: CountOf Word8 -> MutableBlock Word8 st -> ST st ()
copy count mba = loop 0 0
where loop o i
| o .==# count = pure ()
| otherwise =
case primAddrIndex addr i of
0xC0 -> case primAddrIndex addr (i+1) of
0x80 -> BLK.unsafeWrite mba o 0x00 >> loop (o+1) (i+2)
b2 -> BLK.unsafeWrite mba o 0xC0 >> BLK.unsafeWrite mba (o+1) b2 >> loop (o+2) (i+2)
b1 -> BLK.unsafeWrite mba o b1 >> loop (o+1) (i+1)
sFromList :: [Char] -> String
sFromList l = runST (new bytes >>= startCopy)
where
!bytes = List.sum $ fmap (charToBytes . fromEnum) l
startCopy :: MutableString (PrimState (ST st)) -> ST st String
startCopy ms = loop 0 l
where
loop _ [] = freeze ms
loop idx (c:xs) = write ms idx c >>= \idx' -> loop idx' xs
{-# INLINE [0] sFromList #-}
next :: String -> Offset8 -> Step
next (String array) !n = Vec.onBackend nextBA nextAddr array
where
!start = Vec.offset array
reoffset (Step a ofs) = Step a (ofs `offsetSub` start)
nextBA ba@(BLK.Block _) = reoffset (UTF8.next ba (start + n))
nextAddr _ ptr@(Ptr _) = pureST $ reoffset (UTF8.next ptr (start + n))
prev :: String -> Offset8 -> StepBack
prev (String array) !n = Vec.onBackend prevBA prevAddr array
where
!start = Vec.offset array
reoffset (StepBack a ofs) = StepBack a (ofs `offsetSub` start)
prevBA ba@(BLK.Block _) = reoffset (UTF8.prev ba (start + n))
prevAddr _ ptr@(Ptr _) = pureST $ reoffset (UTF8.prev ptr (start + n))
nextAscii :: String -> Offset8 -> StepASCII
nextAscii (String ba) n = StepASCII w
where
!w = Vec.unsafeIndex ba n
expectAscii :: String -> Offset8 -> Word8 -> Bool
expectAscii (String ba) n v = Vec.unsafeIndex ba n == v
{-# INLINE expectAscii #-}
write :: PrimMonad prim => MutableString (PrimState prim) -> Offset8 -> Char -> prim Offset8
write (MutableString marray) ofs c =
MVec.onMutableBackend (\mba@(BLK.MutableBlock _) -> UTF8.writeUTF8 mba (start + ofs) c)
(\fptr -> withFinalPtr fptr $ \ptr@(Ptr _) -> UTF8.writeUTF8 ptr (start + ofs) c)
marray
where start = MVec.mutableOffset marray
new :: PrimMonad prim
=> CountOf Word8
-> prim (MutableString (PrimState prim))
new n = MutableString `fmap` MVec.new n
newNative :: PrimMonad prim
=> CountOf Word8
-> (MutableBlock Word8 (PrimState prim) -> prim a)
-> prim (a, MutableString (PrimState prim))
newNative n f = second MutableString `fmap` MVec.newNative n f
newNative_ :: PrimMonad prim
=> CountOf Word8
-> (MutableBlock Word8 (PrimState prim) -> prim ())
-> prim (MutableString (PrimState prim))
newNative_ n f = MutableString `fmap` MVec.newNative_ n f
freeze :: PrimMonad prim => MutableString (PrimState prim) -> prim String
freeze (MutableString mba) = String `fmap` C.unsafeFreeze mba
{-# INLINE freeze #-}
freezeShrink :: PrimMonad prim
=> CountOf Word8
-> MutableString (PrimState prim)
-> prim String
freezeShrink n (MutableString mba) = String `fmap` C.unsafeFreezeShrink mba n