{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE CPP #-}
module Basement.String
( String(..)
, MutableString(..)
, create
, replicate
, length
, Encoding(..)
, fromBytes
, fromChunkBytes
, fromBytesUnsafe
, fromBytesLenient
, toBytes
, mutableValidate
, copy
, ValidationFailure(..)
, index
, null
, drop
, take
, splitAt
, revDrop
, revTake
, revSplitAt
, splitOn
, sub
, elem
, indices
, intersperse
, span
, spanEnd
, break
, breakEnd
, breakElem
, breakLine
, dropWhile
, singleton
, charMap
, snoc
, cons
, unsnoc
, uncons
, find
, findIndex
, sortBy
, filter
, reverse
, replace
, builderAppend
, builderBuild
, builderBuild_
, readInteger
, readIntegral
, readNatural
, readDouble
, readRational
, readFloatingExact
, upper
, lower
, caseFold
, isPrefixOf
, isSuffixOf
, isInfixOf
, stripPrefix
, stripSuffix
, all
, any
, lines
, words
, toBase64
, toBase64URL
, toBase64OpenBSD
) where
import Basement.UArray (UArray)
import qualified Basement.UArray as Vec
import qualified Basement.UArray as C
import qualified Basement.UArray.Mutable as MVec
import Basement.Block.Mutable (Block(..), MutableBlock(..))
import qualified Basement.Block.Mutable as MBLK
import Basement.Compat.Bifunctor
import Basement.Compat.Base
import Basement.Compat.Natural
import Basement.Compat.MonadTrans
import Basement.Compat.Primitive
import Basement.Types.OffsetSize
import Basement.Numerical.Additive
import Basement.Numerical.Subtractive
import Basement.Numerical.Multiplicative
import Basement.Numerical.Number
import Basement.Cast
import Basement.Monad
import Basement.PrimType
import Basement.FinalPtr
import Basement.IntegralConv
import Basement.Floating
import Basement.MutableBuilder
import Basement.String.CaseMapping (upperMapping, lowerMapping, foldMapping)
import Basement.UTF8.Table
import Basement.UTF8.Helper
import Basement.UTF8.Base
import Basement.UTF8.Types
import Basement.UArray.Base as C (onBackendPrim, onBackend, onBackendPure, offset, ValidRange(..), offsetsValidRange, MUArray(..), MUArrayBackend(..))
import Basement.Alg.Class (Indexable)
import qualified Basement.Alg.UTF8 as UTF8
import qualified Basement.Alg.String as Alg
import Basement.Types.Char7 (Char7(..), c7Upper, c7Lower)
import qualified Basement.Types.Char7 as Char7
import GHC.Prim
import GHC.ST
import GHC.Types
import GHC.Word
#if MIN_VERSION_base(4,9,0)
import GHC.Char
#endif
import qualified Data.List
import Data.Ratio
import Data.Char (toUpper, toLower)
import qualified Prelude
import qualified Basement.String.Encoding.Encoding as Encoder
import qualified Basement.String.Encoding.ASCII7 as Encoder
import qualified Basement.String.Encoding.UTF16 as Encoder
import qualified Basement.String.Encoding.UTF32 as Encoder
import qualified Basement.String.Encoding.ISO_8859_1 as Encoder
data EncoderUTF8 = EncoderUTF8
instance Encoder.Encoding EncoderUTF8 where
type Unit EncoderUTF8 = Word8
type Error EncoderUTF8 = ValidationFailure
encodingNext _ = \ofs -> Right . nextWithIndexer ofs
encodingWrite _ = writeWithBuilder
validate :: UArray Word8
-> Offset8
-> CountOf Word8
-> (Offset8, Maybe ValidationFailure)
validate array ofsStart sz = C.unsafeDewrap goBa goAddr array
where
unTranslateOffset start = first (\e -> e `offsetSub` start)
goBa ba start =
unTranslateOffset start $ Alg.validate (start+end) ba (start + ofsStart)
goAddr ptr@(Ptr !_) start =
pure $ unTranslateOffset start $ Alg.validate (start+end) ptr (ofsStart + start)
end = ofsStart `offsetPlusE` sz
mutableValidate :: PrimMonad prim
=> MVec.MUArray Word8 (PrimState prim)
-> Offset Word8
-> CountOf Word8
-> prim (Offset Word8, Maybe ValidationFailure)
mutableValidate mba ofsStart sz = do
loop ofsStart
where
end = ofsStart `offsetPlusE` sz
loop ofs
| ofs > end = error "mutableValidate: internal error: went pass offset"
| ofs == end = return (end, Nothing)
| otherwise = do
r <- one ofs
case r of
(nextOfs, Nothing) -> loop nextOfs
(pos, Just failure) -> return (pos, Just failure)
one pos = do
h <- StepASCII <$> Vec.unsafeRead mba pos
let nbConts = getNbBytes h
if nbConts == 0xff
then return (pos, Just InvalidHeader)
else if pos + 1 + Offset nbConts > end
then return (pos, Just MissingByte)
else do
case nbConts of
0 -> return (pos + 1, Nothing)
1 -> do
c1 <- Vec.unsafeRead mba (pos + 1)
if isContinuation c1
then return (pos + 2, Nothing)
else return (pos, Just InvalidContinuation)
2 -> do
c1 <- Vec.unsafeRead mba (pos + 1)
c2 <- Vec.unsafeRead mba (pos + 2)
if isContinuation c1 && isContinuation c2
then return (pos + 3, Nothing)
else return (pos, Just InvalidContinuation)
3 -> do
c1 <- Vec.unsafeRead mba (pos + 1)
c2 <- Vec.unsafeRead mba (pos + 2)
c3 <- Vec.unsafeRead mba (pos + 3)
if isContinuation c1 && isContinuation c2 && isContinuation c3
then return (pos + 4, Nothing)
else return (pos, Just InvalidContinuation)
_ -> error "internal error"
nextWithIndexer :: (Offset Word8 -> Word8)
-> Offset Word8
-> (Char, Offset Word8)
nextWithIndexer getter off =
case getNbBytes# h of
0# -> (toChar h, off + 1)
1# -> (toChar (decode2 (getter $ off + 1)), off + 2)
2# -> (toChar (decode3 (getter $ off + 1) (getter $ off + 2)), off + 3)
3# -> (toChar (decode4 (getter $ off + 1) (getter $ off + 2) (getter $ off + 3))
, off + 4)
r -> error ("next: internal error: invalid input: " <> show (I# r) <> " " <> show (W# h))
where
!(W8# h) = getter off
toChar :: Word# -> Char
toChar w = C# (chr# (word2Int# w))
decode2 :: Word8 -> Word#
decode2 (W8# c1) =
or# (uncheckedShiftL# (and# h 0x1f##) 6#)
(and# c1 0x3f##)
decode3 :: Word8 -> Word8 -> Word#
decode3 (W8# c1) (W8# c2) =
or# (uncheckedShiftL# (and# h 0xf##) 12#)
(or# (uncheckedShiftL# (and# c1 0x3f##) 6#)
(and# c2 0x3f##))
decode4 :: Word8 -> Word8 -> Word8 -> Word#
decode4 (W8# c1) (W8# c2) (W8# c3) =
or# (uncheckedShiftL# (and# h 0x7##) 18#)
(or# (uncheckedShiftL# (and# c1 0x3f##) 12#)
(or# (uncheckedShiftL# (and# c2 0x3f##) 6#)
(and# c3 0x3f##))
)
writeWithBuilder :: (PrimMonad st, Monad st)
=> Char
-> Builder (UArray Word8) (MVec.MUArray Word8) Word8 st err ()
writeWithBuilder c
| bool# (ltWord# x 0x80## ) = encode1
| bool# (ltWord# x 0x800## ) = encode2
| bool# (ltWord# x 0x10000##) = encode3
| otherwise = encode4
where
!(I# xi) = fromEnum c
!x = int2Word# xi
encode1 = Vec.builderAppend (W8# x)
encode2 = do
let x1 = or# (uncheckedShiftRL# x 6#) 0xc0##
x2 = toContinuation x
Vec.builderAppend (W8# x1) >> Vec.builderAppend (W8# x2)
encode3 = do
let x1 = or# (uncheckedShiftRL# x 12#) 0xe0##
x2 = toContinuation (uncheckedShiftRL# x 6#)
x3 = toContinuation x
Vec.builderAppend (W8# x1) >> Vec.builderAppend (W8# x2) >> Vec.builderAppend (W8# x3)
encode4 = do
let x1 = or# (uncheckedShiftRL# x 18#) 0xf0##
x2 = toContinuation (uncheckedShiftRL# x 12#)
x3 = toContinuation (uncheckedShiftRL# x 6#)
x4 = toContinuation x
Vec.builderAppend (W8# x1) >> Vec.builderAppend (W8# x2) >> Vec.builderAppend (W8# x3) >> Vec.builderAppend (W8# x4)
toContinuation :: Word# -> Word#
toContinuation w = or# (and# w 0x3f##) 0x80##
writeUTF8Char :: PrimMonad prim => MutableString (PrimState prim) -> Offset8 -> UTF8Char -> prim ()
writeUTF8Char (MutableString mba) i (UTF8_1 x1) =
Vec.unsafeWrite mba i x1
writeUTF8Char (MutableString mba) i (UTF8_2 x1 x2) = do
Vec.unsafeWrite mba i x1
Vec.unsafeWrite mba (i+1) x2
writeUTF8Char (MutableString mba) i (UTF8_3 x1 x2 x3) = do
Vec.unsafeWrite mba i x1
Vec.unsafeWrite mba (i+1) x2
Vec.unsafeWrite mba (i+2) x3
writeUTF8Char (MutableString mba) i (UTF8_4 x1 x2 x3 x4) = do
Vec.unsafeWrite mba i x1
Vec.unsafeWrite mba (i+1) x2
Vec.unsafeWrite mba (i+2) x3
Vec.unsafeWrite mba (i+3) x4
{-# INLINE writeUTF8Char #-}
unsafeFreezeShrink :: PrimMonad prim => MutableString (PrimState prim) -> CountOf Word8 -> prim String
unsafeFreezeShrink (MutableString mba) s = String <$> Vec.unsafeFreezeShrink mba s
{-# INLINE unsafeFreezeShrink #-}
null :: String -> Bool
null (String ba) = C.length ba == 0
countCharMoreThanBytes :: CountOf Char -> UArray Word8 -> Bool
countCharMoreThanBytes (CountOf chars) ba = chars >= bytes
where (CountOf bytes) = C.length ba
take :: CountOf Char -> String -> String
take n s@(String ba)
| n <= 0 = mempty
| countCharMoreThanBytes n ba = s
| otherwise = String $ Vec.unsafeTake (offsetAsSize $ indexN n s) ba
drop :: CountOf Char -> String -> String
drop n s@(String ba)
| n <= 0 = s
| countCharMoreThanBytes n ba = mempty
| otherwise = String $ Vec.drop (offsetAsSize $ indexN n s) ba
splitAt :: CountOf Char -> String -> (String, String)
splitAt n s@(String ba)
| n <= 0 = (mempty, s)
| countCharMoreThanBytes n ba = (s, mempty)
| otherwise =
let (v1,v2) = C.splitAt (offsetAsSize $ indexN n s) ba
in (String v1, String v2)
indexN :: CountOf Char -> String -> Offset Word8
indexN !n (String ba) = Vec.unsafeDewrap goVec goAddr ba
where
goVec :: Block Word8 -> Offset Word8 -> Offset Word8
goVec (Block !ma) !start = loop start 0
where
!len = start `offsetPlusE` Vec.length ba
loop :: Offset Word8 -> Offset Char -> Offset Word8
loop !idx !i
| idx >= len || i .==# n = sizeAsOffset (idx - start)
| otherwise = loop (idx `offsetPlusE` d) (i + Offset 1)
where d = skipNextHeaderValue (primBaIndex ma idx)
{-# INLINE goVec #-}
goAddr :: Ptr Word8 -> Offset Word8 -> ST s (Offset Word8)
goAddr (Ptr ptr) !start = return $ loop start (Offset 0)
where
!len = start `offsetPlusE` Vec.length ba
loop :: Offset Word8 -> Offset Char -> Offset Word8
loop !idx !i
| idx >= len || i .==# n = sizeAsOffset (idx - start)
| otherwise = loop (idx `offsetPlusE` d) (i + Offset 1)
where d = skipNextHeaderValue (primAddrIndex ptr idx)
{-# INLINE goAddr #-}
{-# INLINE indexN #-}
countFromStart :: String -> CountOf Char -> CountOf Char
countFromStart s sz@(CountOf sz')
| sz >= len = CountOf 0
| otherwise = CountOf (len' - sz')
where len@(CountOf len') = length s
revTake :: CountOf Char -> String -> String
revTake n v = drop (countFromStart v n) v
revDrop :: CountOf Char -> String -> String
revDrop n v = take (countFromStart v n) v
revSplitAt :: CountOf Char -> String -> (String, String)
revSplitAt n v = (drop idx v, take idx v) where idx = countFromStart v n
splitOn :: (Char -> Bool) -> String -> [String]
splitOn predicate s
| sz == CountOf 0 = [mempty]
| otherwise = loop azero azero
where
!sz = size s
end = azero `offsetPlusE` sz
loop prevIdx idx
| idx == end = [sub s prevIdx idx]
| otherwise =
let !(Step c idx') = next s idx
in if predicate c
then sub s prevIdx idx : loop idx' idx'
else loop prevIdx idx'
sub :: String -> Offset8 -> Offset8 -> String
sub (String ba) start end = String $ Vec.sub ba start end
splitIndex :: Offset8 -> String -> (String, String)
splitIndex idx (String ba) = (String v1, String v2)
where (v1,v2) = C.splitAt (offsetAsSize idx) ba
break :: (Char -> Bool) -> String -> (String, String)
break predicate s@(String ba) = runST $ Vec.unsafeIndexer ba go
where
!sz = size s
end = azero `offsetPlusE` sz
go :: (Offset Word8 -> Word8) -> ST st (String, String)
go getIdx = loop (Offset 0)
where
!nextI = nextWithIndexer getIdx
loop idx
| idx == end = return (s, mempty)
| otherwise = do
let (c, idx') = nextI idx
case predicate c of
True -> return $ splitIndex idx s
False -> loop idx'
{-# INLINE loop #-}
{-# INLINE [2] break #-}
breakEnd :: (Char -> Bool) -> String -> (String, String)
breakEnd predicate s@(String arr)
| k == end = (s, mempty)
| otherwise = splitIndex (k `offsetSub` start) s
where
k = C.onBackend goVec (\_ -> pure . goAddr) arr
(C.ValidRange !start !end) = offsetsValidRange arr
goVec ba@(Block !_) = let k = Alg.revFindIndexPredicate predicate ba start end
in if k == end then end else UTF8.nextSkip ba k
goAddr ptr@(Ptr !_) =
let k = Alg.revFindIndexPredicate predicate ptr start end
in if k == end then end else UTF8.nextSkip ptr k
{-# INLINE [2] breakEnd #-}
#if MIN_VERSION_base(4,9,0)
{-# RULES "break (== 'c')" [3] forall c . break (eqChar c) = breakElem c #-}
#else
{-# RULES "break (== 'c')" [3] forall c . break (== c) = breakElem c #-}
#endif
breakElem :: Char -> String -> (String, String)
breakElem !el s@(String ba)
| sz == 0 = (mempty, mempty)
| otherwise =
case asUTF8Char el of
UTF8_1 w -> let !(v1,v2) = Vec.breakElem w ba in (String v1, String v2)
_ -> runST $ Vec.unsafeIndexer ba go
where
sz = size s
end = azero `offsetPlusE` sz
go :: (Offset Word8 -> Word8) -> ST st (String, String)
go getIdx = loop (Offset 0)
where
!nextI = nextWithIndexer getIdx
loop idx
| idx == end = return (s, mempty)
| otherwise = do
let (c, idx') = nextI idx
case el == c of
True -> return $ splitIndex idx s
False -> loop idx'
breakLine :: String -> Either Bool (String, String)
breakLine (String arr) = bimap String String <$> Vec.breakLine arr
span :: (Char -> Bool) -> String -> (String, String)
span predicate s = break (not . predicate) s
spanEnd :: (Char -> Bool) -> String -> (String, String)
spanEnd predicate s = breakEnd (not . predicate) s
dropWhile :: (Char -> Bool) -> String -> String
dropWhile predicate = snd . break (not . predicate)
elem :: Char -> String -> Bool
elem !el s@(String ba) =
case asUTF8Char el of
UTF8_1 w -> Vec.elem w ba
_ -> runST $ Vec.unsafeIndexer ba go
where
sz = size s
end = azero `offsetPlusE` sz
go :: (Offset Word8 -> Word8) -> ST st Bool
go getIdx = loop (Offset 0)
where
!nextI = nextWithIndexer getIdx
loop !idx
| idx == end = return False
| otherwise = do
let (c, idx') = nextI idx
case el == c of
True -> return True
False -> loop idx'
intersperse :: Char -> String -> String
intersperse sep src = case length src - 1 of
Nothing -> src
Just 0 -> src
Just gaps -> runST $ unsafeCopyFrom src dstBytes go
where
lastSrcI :: Offset Char
lastSrcI = 0 `offsetPlusE` gaps
dstBytes = (size src :: CountOf Word8) + (gaps `scale` charToBytes (fromEnum sep))
go :: String -> Offset Char -> Offset8 -> MutableString s -> Offset8 -> ST s (Offset8, Offset8)
go src' srcI srcIdx dst dstIdx
| srcI == lastSrcI = do
nextDstIdx <- write dst dstIdx c
return (nextSrcIdx, nextDstIdx)
| otherwise = do
nextDstIdx <- write dst dstIdx c
nextDstIdx' <- write dst nextDstIdx sep
return (nextSrcIdx, nextDstIdx')
where
!(Step c nextSrcIdx) = next src' srcIdx
unsafeCopyFrom :: String
-> CountOf Word8
-> (String -> Offset Char -> Offset8 -> MutableString s -> Offset8 -> ST s (Offset8, Offset8))
-> ST s String
unsafeCopyFrom src dstBytes f = new dstBytes >>= fill (Offset 0) (Offset 0) (Offset 0) f >>= freeze
where
srcLen = length src
end = Offset 0 `offsetPlusE` srcLen
fill srcI srcIdx dstIdx f' dst'
| srcI == end = return dst'
| otherwise = do (nextSrcIdx, nextDstIdx) <- f' src srcI srcIdx dst' dstIdx
fill (srcI + Offset 1) nextSrcIdx nextDstIdx f' dst'
length :: String -> CountOf Char
length (String arr)
| start == end = 0
| otherwise = C.onBackend goVec (\_ -> pure . goAddr) arr
where
(C.ValidRange !start !end) = offsetsValidRange arr
goVec ma = UTF8.length ma start end
goAddr ptr = UTF8.length ptr start end
replicate :: CountOf Char -> Char -> String
replicate (CountOf n) c = runST (new nbBytes >>= fill)
where
nbBytes = scale (cast n :: Word) sz
sz = charToBytes (fromEnum c)
fill :: PrimMonad prim => MutableString (PrimState prim) -> prim String
fill ms = loop (Offset 0)
where
loop idx
| idx .==# nbBytes = freeze ms
| otherwise = write ms idx c >>= loop
copy :: String -> String
copy (String s) = String (Vec.copy s)
singleton :: Char -> String
singleton c = runST $ do
ms <- new nbBytes
_ <- write ms (Offset 0) c
freeze ms
where
!nbBytes = charToBytes (fromEnum c)
create :: PrimMonad prim => CountOf Word8 -> (MutableString (PrimState prim) -> prim (Offset Word8)) -> prim String
create sz f = do
ms <- new sz
filled <- f ms
if filled .==# sz
then freeze ms
else do
s <- freeze ms
let (String ba) = s
pure $ String $ C.take (offsetAsSize filled) ba
charMap :: (Char -> Char) -> String -> String
charMap f src
| srcSz == 0 = mempty
| otherwise =
let !(elems, nbBytes) = allocateAndFill [] (Offset 0) (CountOf 0)
in runST $ do
dest <- new nbBytes
copyLoop dest elems (Offset 0 `offsetPlusE` nbBytes)
freeze dest
where
!srcSz = size src
srcEnd = azero `offsetPlusE` srcSz
allocateAndFill :: [(String, CountOf Word8)]
-> Offset8
-> CountOf Word8
-> ([(String,CountOf Word8)], CountOf Word8)
allocateAndFill acc idx bytesWritten
| idx == srcEnd = (acc, bytesWritten)
| otherwise =
let (el@(_,addBytes), idx') = runST $ do
let !diffBytes = srcEnd - idx
!allocatedBytes = if diffBytes <= CountOf 4 then CountOf 4 else diffBytes
ms <- new allocatedBytes
(dstIdx, srcIdx) <- fill ms allocatedBytes idx
s <- freeze ms
return ((s, dstIdx), srcIdx)
in allocateAndFill (el : acc) idx' (bytesWritten + addBytes)
fill :: PrimMonad prim
=> MutableString (PrimState prim)
-> CountOf Word8
-> Offset8
-> prim (CountOf Word8, Offset8)
fill mba dsz srcIdxOrig =
loop (Offset 0) srcIdxOrig
where
endDst = (Offset 0) `offsetPlusE` dsz
loop dstIdx srcIdx
| srcIdx == srcEnd = return (offsetAsSize dstIdx, srcIdx)
| dstIdx == endDst = return (offsetAsSize dstIdx, srcIdx)
| otherwise =
let !(Step c srcIdx') = next src srcIdx
c' = f c
!nbBytes = charToBytes (fromEnum c')
in
if dstIdx `offsetPlusE` nbBytes <= sizeAsOffset dsz
then do dstIdx' <- write mba dstIdx c'
loop dstIdx' srcIdx'
else return (offsetAsSize dstIdx, srcIdx)
copyLoop _ [] (Offset 0) = return ()
copyLoop _ [] n = error ("charMap invalid: " <> show n)
copyLoop ms@(MutableString mba) ((String ba, sz):xs) end = do
let start = end `offsetMinusE` sz
Vec.unsafeCopyAtRO mba start ba (Offset 0) sz
copyLoop ms xs start
snoc :: String -> Char -> String
snoc s@(String ba) c
| len == CountOf 0 = singleton c
| otherwise = runST $ do
ms <- new (len + nbBytes)
let (MutableString mba) = ms
Vec.unsafeCopyAtRO mba (Offset 0) ba (Offset 0) len
_ <- write ms (azero `offsetPlusE` len) c
freeze ms
where
!len = size s
!nbBytes = charToBytes (fromEnum c)
cons :: Char -> String -> String
cons c s@(String ba)
| len == CountOf 0 = singleton c
| otherwise = runST $ do
ms <- new (len + nbBytes)
let (MutableString mba) = ms
idx <- write ms (Offset 0) c
Vec.unsafeCopyAtRO mba idx ba (Offset 0) len
freeze ms
where
!len = size s
!nbBytes = charToBytes (fromEnum c)
unsnoc :: String -> Maybe (String, Char)
unsnoc s@(String arr)
| sz == 0 = Nothing
| otherwise =
let !(StepBack c idx) = prev s (sizeAsOffset sz)
in Just (String $ Vec.take (offsetAsSize idx) arr, c)
where
sz = size s
uncons :: String -> Maybe (Char, String)
uncons s@(String ba)
| null s = Nothing
| otherwise =
let !(Step c idx) = next s azero
in Just (c, String $ Vec.drop (offsetAsSize idx) ba)
find :: (Char -> Bool) -> String -> Maybe Char
find predicate s = loop (Offset 0)
where
!sz = size s
end = Offset 0 `offsetPlusE` sz
loop idx
| idx == end = Nothing
| otherwise =
let !(Step c idx') = next s idx
in case predicate c of
True -> Just c
False -> loop idx'
sortBy :: (Char -> Char -> Ordering) -> String -> String
sortBy sortF s = fromList $ Data.List.sortBy sortF $ toList s
filter :: (Char -> Bool) -> String -> String
filter predicate (String arr) = runST $ do
(finalSize, dst) <- newNative sz $ \(MutableBlock mba) ->
C.onBackendPrim (\ba@(Block !_) -> Alg.copyFilter predicate sz mba ba start)
(\fptr -> withFinalPtr fptr $ \ptr@(Ptr !_) -> Alg.copyFilter predicate sz mba ptr start)
arr
freezeShrink finalSize dst
where
!sz = C.length arr
!start = C.offset arr
reverse :: String -> String
reverse (String arr) = runST $ do
s <- newNative_ (C.length arr) $ \(MutableBlock mba) ->
C.onBackendPrim
(\ba@(Block !_) -> UTF8.reverse mba 0 ba start end)
(\fptr -> withFinalPtr fptr $ \ptr@(Ptr !_) -> UTF8.reverse mba 0 ptr start end)
arr
freeze s
where
!(C.ValidRange start end) = C.offsetsValidRange arr
indices :: String -> String -> [Offset8]
indices (String ned) (String hy) = Vec.indices ned hy
replace :: String -> String -> String -> String
replace (String needle) (String replacement) (String haystack) =
String $ Vec.replace needle replacement haystack
index :: String -> Offset Char -> Maybe Char
index s n
| ofs >= end = Nothing
| otherwise =
let (Step !c _) = next s ofs
in Just c
where
!nbBytes = size s
end = 0 `offsetPlusE` nbBytes
ofs = indexN (offsetAsSize n) s
findIndex :: (Char -> Bool) -> String -> Maybe (Offset Char)
findIndex predicate s = loop 0 0
where
!sz = size s
loop ofs idx
| idx .==# sz = Nothing
| otherwise =
let !(Step c idx') = next s idx
in case predicate c of
True -> Just ofs
False -> loop (ofs+1) idx'
data Encoding
= ASCII7
| UTF8
| UTF16
| UTF32
| ISO_8859_1
deriving (Typeable, Data, Eq, Ord, Show, Enum, Bounded)
fromEncoderBytes :: ( Encoder.Encoding encoding
, PrimType (Encoder.Unit encoding)
)
=> encoding
-> UArray Word8
-> (String, Maybe ValidationFailure, UArray Word8)
fromEncoderBytes enc bytes =
case runST $ Encoder.convertFromTo enc EncoderUTF8 (Vec.recast bytes) of
Left (off, _) ->
let (b1, b2) = Vec.splitAt (offsetAsSize off) (Vec.recast bytes)
in (String $ Vec.recast b1, Just BuildingFailure, Vec.recast b2)
Right converted -> (String converted, Nothing, mempty)
fromBytes :: Encoding -> UArray Word8 -> (String, Maybe ValidationFailure, UArray Word8)
fromBytes ASCII7 bytes = fromEncoderBytes Encoder.ASCII7 bytes
fromBytes ISO_8859_1 bytes = fromEncoderBytes Encoder.ISO_8859_1 bytes
fromBytes UTF16 bytes = fromEncoderBytes Encoder.UTF16 bytes
fromBytes UTF32 bytes = fromEncoderBytes Encoder.UTF32 bytes
fromBytes UTF8 bytes
| C.null bytes = (mempty, Nothing, mempty)
| otherwise =
case validate bytes (Offset 0) (C.length bytes) of
(_, Nothing) -> (fromBytesUnsafe bytes, Nothing, mempty)
(pos, Just vf) ->
let (b1, b2) = C.splitAt (offsetAsSize pos) bytes
in (fromBytesUnsafe b1, toErr vf, b2)
where
toErr MissingByte = Nothing
toErr InvalidHeader = Just InvalidHeader
toErr InvalidContinuation = Just InvalidContinuation
toErr BuildingFailure = Just BuildingFailure
fromBytesLenient :: UArray Word8 -> (String, UArray Word8)
fromBytesLenient bytes
| C.null bytes = (mempty, mempty)
| otherwise =
case validate bytes (Offset 0) (C.length bytes) of
(_, Nothing) -> (fromBytesUnsafe bytes, mempty)
(_, Just BuildingFailure) -> error "fromBytesLenient: FIXME!"
(pos, Just MissingByte) ->
let (b1,b2) = C.splitAt (offsetAsSize pos) bytes
in (fromBytesUnsafe b1, b2)
(pos, Just InvalidHeader) ->
let (b1,b2) = C.splitAt (offsetAsSize pos) bytes
(_,b3) = C.splitAt 1 b2
(s3, r) = fromBytesLenient b3
in (mconcat [fromBytesUnsafe b1,replacement, s3], r)
(pos, Just InvalidContinuation) ->
let (b1,b2) = C.splitAt (offsetAsSize pos) bytes
(_,b3) = C.splitAt 1 b2
(s3, r) = fromBytesLenient b3
in (mconcat [fromBytesUnsafe b1,replacement, s3], r)
where
replacement :: String
!replacement = fromBytesUnsafe $ fromList [0xef,0xbf,0xbd]
fromChunkBytes :: [UArray Word8] -> [String]
fromChunkBytes l = loop l
where
loop [] = []
loop [bytes] =
case validate bytes (Offset 0) (C.length bytes) of
(_, Nothing) -> [fromBytesUnsafe bytes]
(_, Just err) -> doErr err
loop (bytes:cs@(c1:c2)) =
case validate bytes (Offset 0) (C.length bytes) of
(_, Nothing) -> fromBytesUnsafe bytes : loop cs
(pos, Just MissingByte) ->
let (b1,b2) = C.splitAt (offsetAsSize pos) bytes
in fromBytesUnsafe b1 : loop ((b2 `mappend` c1) : c2)
(_, Just err) -> doErr err
doErr err = error ("fromChunkBytes: " <> show err)
fromBytesUnsafe :: UArray Word8 -> String
fromBytesUnsafe = String
toEncoderBytes :: ( Encoder.Encoding encoding
, PrimType (Encoder.Unit encoding)
, Exception (Encoder.Error encoding)
)
=> encoding
-> UArray Word8
-> UArray Word8
toEncoderBytes enc bytes = Vec.recast $
case runST $ Encoder.convertFromTo EncoderUTF8 enc bytes of
Left _ -> error "toEncoderBytes: FIXME!"
Right converted -> converted
toBytes :: Encoding -> String -> UArray Word8
toBytes UTF8 (String bytes) = bytes
toBytes ASCII7 (String bytes) = toEncoderBytes Encoder.ASCII7 bytes
toBytes ISO_8859_1 (String bytes) = toEncoderBytes Encoder.ISO_8859_1 bytes
toBytes UTF16 (String bytes) = toEncoderBytes Encoder.UTF16 bytes
toBytes UTF32 (String bytes) = toEncoderBytes Encoder.UTF32 bytes
lines :: String -> [String]
lines s =
case breakLine s of
Left _ -> [s]
Right (line,r) -> line : lines r
words :: String -> [String]
words = fmap fromList . Prelude.words . toList
builderAppend :: PrimMonad state => Char -> Builder String MutableString Word8 state err ()
builderAppend c = Builder $ State $ \(i, st, e) ->
if offsetAsSize i + nbBytes >= chunkSize st
then do
cur <- unsafeFreezeShrink (curChunk st) (offsetAsSize i)
newChunk <- new (chunkSize st)
writeUTF8Char newChunk (Offset 0) utf8Char
return ((), (sizeAsOffset nbBytes, st { prevChunks = cur : prevChunks st
, prevChunksSize = offsetAsSize i + prevChunksSize st
, curChunk = newChunk
}, e))
else do
writeUTF8Char (curChunk st) i utf8Char
return ((), (i + sizeAsOffset nbBytes, st, e))
where
utf8Char = asUTF8Char c
nbBytes = numBytes utf8Char
builderBuild :: PrimMonad m => Int -> Builder String MutableString Word8 m err () -> m (Either err String)
builderBuild sizeChunksI sb
| sizeChunksI <= 3 = builderBuild 64 sb
| otherwise = do
firstChunk <- new sizeChunks
(i, st, e) <- snd <$> runState (runBuilder sb) (Offset 0, BuildingState [] (CountOf 0) firstChunk sizeChunks, Nothing)
case e of
Just err -> return (Left err)
Nothing -> do
cur <- unsafeFreezeShrink (curChunk st) (offsetAsSize i)
let totalSize = prevChunksSize st + offsetAsSize i
final <- Vec.new totalSize >>= fillFromEnd totalSize (cur : prevChunks st) >>= Vec.unsafeFreeze
return . Right . String $ final
where
sizeChunks = CountOf sizeChunksI
fillFromEnd _ [] mba = return mba
fillFromEnd !end (String x:xs) mba = do
let sz = Vec.length x
let start = end `sizeSub` sz
Vec.unsafeCopyAtRO mba (sizeAsOffset start) x (Offset 0) sz
fillFromEnd start xs mba
builderBuild_ :: PrimMonad m => Int -> Builder String MutableString Word8 m () () -> m String
builderBuild_ sizeChunksI sb = either (\() -> internalError "impossible output") id <$> builderBuild sizeChunksI sb
stringDewrap :: (Block Word8 -> Offset Word8 -> a)
-> (Ptr Word8 -> Offset Word8 -> ST s a)
-> String
-> a
stringDewrap withBa withPtr (String ba) = C.unsafeDewrap withBa withPtr ba
{-# INLINE stringDewrap #-}
readIntegral :: (HasNegation i, IntegralUpsize Word8 i, Additive i, Multiplicative i, IsIntegral i) => String -> Maybe i
readIntegral str
| sz == 0 = Nothing
| otherwise = stringDewrap withBa (\ptr@(Ptr !_) -> pure . withPtr ptr) str
where
!sz = size str
withBa ba ofs =
let negativeSign = UTF8.expectAscii ba ofs 0x2d
startOfs = if negativeSign then succ ofs else ofs
in case decimalDigitsBA 0 ba endOfs startOfs of
(# acc, True, endOfs' #) | endOfs' > startOfs -> Just $! if negativeSign then negate acc else acc
_ -> Nothing
where !endOfs = ofs `offsetPlusE` sz
withPtr addr ofs =
let negativeSign = UTF8.expectAscii addr ofs 0x2d
startOfs = if negativeSign then succ ofs else ofs
in case decimalDigitsPtr 0 addr endOfs startOfs of
(# acc, True, endOfs' #) | endOfs' > startOfs -> Just $! if negativeSign then negate acc else acc
_ -> Nothing
where !endOfs = ofs `offsetPlusE` sz
{-# SPECIALISE readIntegral :: String -> Maybe Integer #-}
{-# SPECIALISE readIntegral :: String -> Maybe Int #-}
readInteger :: String -> Maybe Integer
readInteger = readIntegral
readNatural :: String -> Maybe Natural
readNatural str
| sz == 0 = Nothing
| otherwise = stringDewrap withBa (\ptr@(Ptr !_) -> pure . withPtr ptr) str
where
!sz = size str
withBa ba stringStart =
case decimalDigitsBA 0 ba eofs stringStart of
(# acc, True, endOfs #) | endOfs > stringStart -> Just acc
_ -> Nothing
where eofs = stringStart `offsetPlusE` sz
withPtr addr stringStart =
case decimalDigitsPtr 0 addr eofs stringStart of
(# acc, True, endOfs #) | endOfs > stringStart -> Just acc
_ -> Nothing
where eofs = stringStart `offsetPlusE` sz
readDouble :: String -> Maybe Double
readDouble s =
readFloatingExact s $ \isNegative integral floatingDigits mExponant ->
Just $ applySign isNegative $ case (floatingDigits, mExponant) of
(0, Nothing) -> naturalToDouble integral
(0, Just exponent) -> withExponant exponent $ naturalToDouble integral
(floating, Nothing) -> applyFloating floating $ naturalToDouble integral
(floating, Just exponent) -> withExponant exponent $ applyFloating floating $ naturalToDouble integral
where
applySign True = negate
applySign False = id
withExponant e v = v * doubleExponant 10 e
applyFloating digits n = n / (10 Prelude.^ digits)
readRational :: String -> Maybe Prelude.Rational
readRational s =
readFloatingExact s $ \isNegative integral floatingDigits mExponant ->
case mExponant of
Just exponent
| exponent < -10000 || exponent > 10000 -> Nothing
| otherwise -> Just $ modF isNegative integral % (10 Prelude.^ (cast floatingDigits - exponent))
Nothing -> Just $ modF isNegative integral % (10 Prelude.^ floatingDigits)
where
modF True = negate . integralUpsize
modF False = integralUpsize
type ReadFloatingCallback a = Bool
-> Natural
-> Word
-> Maybe Int
-> Maybe a
readFloatingExact :: String -> ReadFloatingCallback a -> Maybe a
readFloatingExact str f
| sz == 0 = Nothing
| otherwise = stringDewrap withBa withPtr str
where
!sz = size str
withBa ba stringStart =
let !isNegative = UTF8.expectAscii ba stringStart 0x2d
in consumeIntegral isNegative (if isNegative then stringStart+1 else stringStart)
where
eofs = stringStart `offsetPlusE` sz
consumeIntegral !isNegative startOfs =
case decimalDigitsBA 0 ba eofs startOfs of
(# acc, True , endOfs #) | endOfs > startOfs -> f isNegative acc 0 Nothing
(# acc, False, endOfs #) | endOfs > startOfs ->
if UTF8.expectAscii ba endOfs 0x2e
then consumeFloat isNegative acc (endOfs + 1)
else consumeExponant isNegative acc 0 endOfs
_ -> Nothing
consumeFloat isNegative integral startOfs =
case decimalDigitsBA integral ba eofs startOfs of
(# acc, True, endOfs #) | endOfs > startOfs -> let (CountOf !diff) = endOfs - startOfs
in f isNegative acc (cast diff) Nothing
(# acc, False, endOfs #) | endOfs > startOfs -> let (CountOf !diff) = endOfs - startOfs
in consumeExponant isNegative acc (cast diff) endOfs
_ -> Nothing
consumeExponant !isNegative !integral !floatingDigits !startOfs
| startOfs == eofs = f isNegative integral floatingDigits Nothing
| otherwise =
case UTF8.nextAscii ba startOfs of
StepASCII 0x45 -> consumeExponantSign (startOfs+1)
StepASCII 0x65 -> consumeExponantSign (startOfs+1)
_ -> Nothing
where
consumeExponantSign ofs
| ofs == eofs = Nothing
| otherwise = let exponentNegative = UTF8.expectAscii ba ofs 0x2d
in consumeExponantNumber exponentNegative (if exponentNegative then ofs + 1 else ofs)
consumeExponantNumber exponentNegative ofs =
case decimalDigitsBA 0 ba eofs ofs of
(# acc, True, endOfs #) | endOfs > ofs -> f isNegative integral floatingDigits (Just $! if exponentNegative then negate acc else acc)
_ -> Nothing
withPtr ptr@(Ptr !_) stringStart = pure $
let !isNegative = UTF8.expectAscii ptr stringStart 0x2d
in consumeIntegral isNegative (if isNegative then stringStart+1 else stringStart)
where
eofs = stringStart `offsetPlusE` sz
consumeIntegral !isNegative startOfs =
case decimalDigitsPtr 0 ptr eofs startOfs of
(# acc, True , endOfs #) | endOfs > startOfs -> f isNegative acc 0 Nothing
(# acc, False, endOfs #) | endOfs > startOfs ->
if UTF8.expectAscii ptr endOfs 0x2e
then consumeFloat isNegative acc (endOfs + 1)
else consumeExponant isNegative acc 0 endOfs
_ -> Nothing
consumeFloat isNegative integral startOfs =
case decimalDigitsPtr integral ptr eofs startOfs of
(# acc, True, endOfs #) | endOfs > startOfs -> let (CountOf !diff) = endOfs - startOfs
in f isNegative acc (cast diff) Nothing
(# acc, False, endOfs #) | endOfs > startOfs -> let (CountOf !diff) = endOfs - startOfs
in consumeExponant isNegative acc (cast diff) endOfs
_ -> Nothing
consumeExponant !isNegative !integral !floatingDigits !startOfs
| startOfs == eofs = f isNegative integral floatingDigits Nothing
| otherwise =
case UTF8.nextAscii ptr startOfs of
StepASCII 0x45 -> consumeExponantSign (startOfs+1)
StepASCII 0x65 -> consumeExponantSign (startOfs+1)
_ -> Nothing
where
consumeExponantSign ofs
| ofs == eofs = Nothing
| otherwise = let exponentNegative = UTF8.expectAscii ptr ofs 0x2d
in consumeExponantNumber exponentNegative (if exponentNegative then ofs + 1 else ofs)
consumeExponantNumber exponentNegative ofs =
case decimalDigitsPtr 0 ptr eofs ofs of
(# acc, True, endOfs #) | endOfs > ofs -> f isNegative integral floatingDigits (Just $! if exponentNegative then negate acc else acc)
_ -> Nothing
decimalDigitsBA :: (IntegralUpsize Word8 acc, Additive acc, Multiplicative acc, Integral acc)
=> acc
-> Block Word8
-> Offset Word8
-> Offset Word8
-> (# acc, Bool, Offset Word8 #)
decimalDigitsBA startAcc ba !endOfs !startOfs = loop startAcc startOfs
where
loop !acc !ofs
| ofs == endOfs = (# acc, True, ofs #)
| otherwise =
case UTF8.nextAsciiDigit ba ofs of
sg@(StepDigit d) | isValidStepDigit sg -> loop (10 * acc + integralUpsize d) (succ ofs)
| otherwise -> (# acc, False, ofs #)
{-# SPECIALIZE decimalDigitsBA :: Integer -> Block Word8 -> Offset Word8 -> Offset Word8 -> (# Integer, Bool, Offset Word8 #) #-}
{-# SPECIALIZE decimalDigitsBA :: Natural -> Block Word8 -> Offset Word8 -> Offset Word8 -> (# Natural, Bool, Offset Word8 #) #-}
{-# SPECIALIZE decimalDigitsBA :: Int -> Block Word8 -> Offset Word8 -> Offset Word8 -> (# Int, Bool, Offset Word8 #) #-}
{-# SPECIALIZE decimalDigitsBA :: Word -> Block Word8 -> Offset Word8 -> Offset Word8 -> (# Word, Bool, Offset Word8 #) #-}
decimalDigitsPtr :: (IntegralUpsize Word8 acc, Additive acc, Multiplicative acc, Integral acc)
=> acc
-> Ptr Word8
-> Offset Word8
-> Offset Word8
-> (# acc, Bool, Offset Word8 #)
decimalDigitsPtr startAcc ptr !endOfs !startOfs = loop startAcc startOfs
where
loop !acc !ofs
| ofs == endOfs = (# acc, True, ofs #)
| otherwise =
case UTF8.nextAsciiDigit ptr ofs of
sg@(StepDigit d) | isValidStepDigit sg -> loop (10 * acc + integralUpsize d) (succ ofs)
| otherwise -> (# acc, False, ofs #)
{-# SPECIALIZE decimalDigitsPtr :: Integer -> Ptr Word8 -> Offset Word8 -> Offset Word8 -> (# Integer, Bool, Offset Word8 #) #-}
{-# SPECIALIZE decimalDigitsPtr :: Natural -> Ptr Word8 -> Offset Word8 -> Offset Word8 -> (# Natural, Bool, Offset Word8 #) #-}
{-# SPECIALIZE decimalDigitsPtr :: Int -> Ptr Word8 -> Offset Word8 -> Offset Word8 -> (# Int, Bool, Offset Word8 #) #-}
{-# SPECIALIZE decimalDigitsPtr :: Word -> Ptr Word8 -> Offset Word8 -> Offset Word8 -> (# Word, Bool, Offset Word8 #) #-}
caseConvert :: (Char7 -> Char7) -> (Char -> CM) -> String -> String
caseConvert opASCII op s@(String arr) = runST $ do
mba <- MBLK.new iLen
nL <- C.onBackendPrim
(\blk -> go mba blk (Offset 0) start)
(\fptr -> withFinalPtr fptr $ \ptr -> go mba ptr (Offset 0) start)
arr
freeze . MutableString $ MVec.MUArray 0 nL (C.MUArrayMBA mba)
where
!(C.ValidRange start end) = C.offsetsValidRange arr
!iLen = 1 + C.length arr
go :: (Indexable container Word8, PrimMonad prim)
=> MutableBlock Word8 (PrimState prim)
-> container
-> Offset Word8
-> Offset Word8
-> prim (CountOf Word8)
go !dst !src = loop dst iLen 0
where
eSize !e = if e == '\0' then 0 else charToBytes (fromEnum e)
loop !dst !allocLen !nLen !dstIdx !srcIdx
| srcIdx == end = return nLen
| nLen == allocLen = realloc
| headerIsAscii h = do
UTF8.writeASCII dst dstIdx (opASCII $ Char7 $ stepAsciiRawValue h)
loop dst allocLen (nLen + 1) (dstIdx+Offset 1) (srcIdx+Offset 1)
| otherwise = do
let !(CM c1 c2 c3) = op c
!(Step c nextSrcIdx) = UTF8.nextWith h src (srcIdx+Offset 1)
nextDstIdx <- UTF8.writeUTF8 dst dstIdx c1
if c2 == '\0'
then loop dst allocLen (nLen + charToBytes (fromEnum c1)) nextDstIdx nextSrcIdx
else do
let !cSize = eSize c1 + eSize c2 + eSize c3
nextDstIdx <- UTF8.writeUTF8 dst nextDstIdx c2
nextDstIdx <- if c3 == '\0' then return nextDstIdx else UTF8.writeUTF8 dst nextDstIdx c3
loop dst allocLen (nLen + cSize) nextDstIdx nextSrcIdx
where
{-# NOINLINE realloc #-}
realloc = do
let nAll = allocLen + allocLen + 1
nDst <- MBLK.new nAll
MBLK.unsafeCopyElements nDst 0 dst 0 nLen
loop nDst nAll nLen dstIdx srcIdx
h = UTF8.nextAscii src srcIdx
upper :: String -> String
upper = caseConvert c7Upper upperMapping
lower :: String -> String
lower = caseConvert c7Lower lowerMapping
caseFold :: String -> String
caseFold = caseConvert c7Upper foldMapping
isPrefixOf :: String -> String -> Bool
isPrefixOf (String needle) (String haystack) = C.isPrefixOf needle haystack
isSuffixOf :: String -> String -> Bool
isSuffixOf (String needle) (String haystack)
| needleLen > hayLen = False
| otherwise = needle == C.revTake needleLen haystack
where
needleLen = C.length needle
hayLen = C.length haystack
isInfixOf :: String -> String -> Bool
isInfixOf (String needle) (String haystack)
= loop (hayLen - needleLen) haystack
where
needleLen = C.length needle
hayLen = C.length haystack
loop Nothing _ = False
loop (Just cnt) haystack' = needle == C.take needleLen haystack' || loop (cnt-1) (C.drop 1 haystack')
stripPrefix :: String -> String -> Maybe String
stripPrefix (String suffix) (String arr)
| C.isPrefixOf suffix arr = Just $ String $ C.drop (C.length suffix) arr
| otherwise = Nothing
stripSuffix :: String -> String -> Maybe String
stripSuffix (String prefix) (String arr)
| C.isSuffixOf prefix arr = Just $ String $ C.revDrop (C.length prefix) arr
| otherwise = Nothing
all :: (Char -> Bool) -> String -> Bool
all predicate (String arr) = C.onBackend goBA (\_ -> pure . goAddr) arr
where
!(C.ValidRange start end) = C.offsetsValidRange arr
goBA ba = UTF8.all predicate ba start end
goAddr addr = UTF8.all predicate addr start end
any :: (Char -> Bool) -> String -> Bool
any predicate (String arr) = C.onBackend goBA (\_ -> pure . goAddr) arr
where
!(C.ValidRange start end) = C.offsetsValidRange arr
goBA ba = UTF8.any predicate ba start end
goAddr addr = UTF8.any predicate addr start end
toBase64 :: String -> String
toBase64 (String src) = fromBytesUnsafe . Vec.toBase64Internal set src $ True
where
!set = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"#
toBase64URL :: Bool -> String -> String
toBase64URL padded (String src) = fromBytesUnsafe . Vec.toBase64Internal set src $ padded
where
!set = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"#
toBase64OpenBSD :: String -> String
toBase64OpenBSD (String src) = fromBytesUnsafe . Vec.toBase64Internal set src $ False
where
!set = "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"#