{-# LANGUAGE CPP #-} {-# LANGUAGE MagicHash #-} #if defined(PURE_HASKELL) {-# LANGUAGE BangPatterns #-} {-# LANGUAGE MultiWayIf #-} #endif #if !defined(PURE_HASKELL) {-# LANGUAGE UnliftedFFITypes #-} #endif {-# OPTIONS_HADDOCK not-home #-} -- | Implements 'measure_off', using efficient C routines by default. module Data.Text.Internal.Measure ( measure_off ) where import GHC.Exts #if defined(PURE_HASKELL) import GHC.Word import Data.Text.Internal.Encoding.Utf8 (utf8LengthByLeader) #endif import Foreign.C.Types (CSize(..)) import System.Posix.Types (CSsize(..)) #if defined(PURE_HASKELL) measure_off :: ByteArray# -> CSize -> CSize -> CSize -> CSsize measure_off ba off len cnt = go 0 0 where go !cc !i -- return the number of bytes for the first cnt codepoints, | cc == cnt = fromIntegral i -- return negated number of codepoints if there are fewer than cnt | i >= len = negate (fromIntegral cc) | otherwise = let !(I# o) = fromIntegral (off+i) !b = indexWord8Array# ba o in go (cc+1) (i + fromIntegral (utf8LengthByLeader (W8# b))) #else -- | The input buffer (arr :: ByteArray#, off :: CSize, len :: CSize) -- must specify a valid UTF-8 sequence, this condition is not checked. foreign import ccall unsafe "_hs_text_measure_off" measure_off :: ByteArray# -> CSize -> CSize -> CSize -> CSsize #endif