\begin{code}
module Data.Rope(
Rope,
empty,
singleton,
pack,
unpack,
fromByteString,
toByteString,
cons,
snoc,
append,
head,
uncons,
last,
tail,
init,
null,
length,
map,
reverse,
intercalate,
insert,
foldl,
foldl',
foldr,
take,
drop,
splitAt#,
splitAt,
index,
elemIndex,
elemIndices,
readFile,
hGet,
hPut,
hPutStrLn,
hPutStr,
putStrLn,
putStr,
) where
import Data.Rope.Internals
import Foreign.Storable
import Foreign.ForeignPtr
import qualified Foreign.Concurrent as FC
import Foreign.Marshal.Alloc
import Foreign.Marshal.Array
import Foreign.Marshal.Utils
import Foreign.Ptr
import Foreign.C.Types
import qualified Data.ByteString.Internal as B
import Data.String
import Data.Word
#if __GLASGOW_HASKELL__ >=608
import GHC.IO (IO(IO),unsafePerformIO,unsafeInterleaveIO)
import GHC.Base (realWorld#)
import GHC.IO.Handle.Types
import GHC.IO.Handle.Internals
import GHC.IO.Device as IODevice
#else
import System.IO.Unsafe
#endif
import Data.Maybe
import GHC.Conc
import System.Posix (openFd,closeFd,defaultFileFlags,getFdStatus,getFileStatus,
fileSize,OpenMode(..),
OpenFileFlags(trunc))
import System.IO hiding (hPutStrLn,hPutStr,putStrLn,putStr,readFile)
import qualified Prelude(length,map,splitAt)
import Prelude hiding (length,null,head,tail,last,init,map,reverse,
foldl,foldr,
take,drop,splitAt,
putStrLn,putStr,readFile)
import Data.Bits hiding(rotateL,rotateR)
\end{code}
Attention, on ne garantit pas necessairement que les deux sous-arbres sont non-vides.
Il faudrait regler ce probleme dans balance.
i0 est le premier indice ou on a le droit d'ecrire.
File sert a ne pas recrire tout un fichier. On l'enleve des qu'on modifie
la @Rope@, par contre. Il faut recrire la fin du fichier quand meme, dans ce cas.
\begin{code}
data Rope=
Concat { sizeC:: !Int,
length_:: !Int,
l::Rope,
r::Rope }
| String { contents:: !(ForeignPtr Word8),
i0:: !(ForeignPtr Int),
offset:: !Int,
length_:: !Int }
| File { handle::Handle,
position:: !Int,
length_:: !Int,
rope::Rope }
deriving (Show)
instance IsString Rope where
fromString s=pack $ Prelude.map (fromIntegral.fromEnum) s
leafSize::Int
leafSize=0x10000
\end{code}
Interface basique. Rien \`a dire.
Pour cons et snoc, il ne servirait a rien de rajouter un bout "non utilise"
au debut de la chaine : vu qu'on ecrit toutes les chaines depuis l'offset 0,
si on prend une sous-chaine, c'est que le debut est deja utilise dans une autre
sous-chaine.
\begin{code}
cons::Word8->Rope->Rope
cons w x=append (singleton w) x
snoc::Rope->Word8->Rope
snoc x w=append x (singleton w)
head::Rope->Word8
head (String{contents,offset,length_})
|length_>0 = inlinePerformIO $ withForeignPtr contents $ \c->peekByteOff c offset
|otherwise = error "head"
head (Concat{l})=head l
head (File{rope})=head rope
uncons::Rope->Maybe (Word8, Rope)
uncons (s@String{contents,offset,length_})
| length_<=0 = Nothing
| otherwise=
let u=inlinePerformIO $ withForeignPtr contents $ \c->peekByteOff c offset in
Just (u,s { contents,offset=offset+1,length_=length_1 })
uncons (c@Concat{l,r,length_})=
case uncons l of
Just (x,y)->Just (x,balance $ c { l=y, length_=length_1 })
Nothing->uncons r
uncons (File{rope})=uncons rope
last::Rope->Word8
last (String{contents,offset,length_})
| length_<=0 = error "last"
| otherwise = inlinePerformIO $ withForeignPtr contents $ \c->peekByteOff c $ offset+length_1
last (Concat{l,r})
|size r<=0 = last l
|otherwise = last r
last File{rope}=last rope
tail::Rope->Rope
tail (s@String{offset,length_})
| length_<=0 = error "tail"
| otherwise = s { offset=offset+1,length_=length_1 }
tail (c@Concat{l,r})
| length l<=0 = tail r
| otherwise = balance $ c { l=tail l }
tail File{rope}=tail rope
init::Rope->Rope
init (s@String{length_})
| length_<=0 = error "init"
| otherwise = s { length_=length_1 }
init (s@Concat{r})=balance $ s { r=init r }
init File{rope}=init rope
null::Rope->Bool
null x=length x>0
length::Rope->Int
length (String{length_})=length_
length (Concat{length_})=length_
length (File{rope})=length rope
size::Rope->Int
size (Concat {sizeC})=sizeC
size (File {rope})=size rope
size _=1
\end{code}
Transformations
\begin{code}
map::(Word8->Word8)->Rope->Rope
map f (String{contents,offset,length_})=
unsafePerformIO $ withForeignPtr contents $ \c->do
contents<-mallocForeignPtrBytes leafSize
withForeignPtr contents $ \p->copyMap p (c`plusPtr`offset) length_
i0<-mallocForeignPtr
withForeignPtr i0 $ \i->poke i length_
return $ String { contents,offset=0,length_,i0 }
where
copyMap _ _ 0=return ()
copyMap p p' len=do
peek p' >>= ((poke p).f)
copyMap (p`plusPtr`1) (p'`plusPtr`1) $ len1
map f (Concat{l,r})=append (map f l) (map f r)
map f File{rope}=map f rope
reverse::Rope->Rope
reverse (String{contents,offset,length_})=
unsafePerformIO $ withForeignPtr contents $ \c->do
contents<-mallocForeignPtrBytes leafSize
withForeignPtr contents $ \p->
copyRev (p`plusPtr`(length_1)) (c`plusPtr`offset) length_
i0<-mallocForeignPtr
withForeignPtr i0 $ \i->poke i length_
return $ String { contents,offset=0,length_,i0 }
where
copyRev::Ptr Word8->Ptr Word8->Int->IO()
copyRev _ _ 0=return ()
copyRev p p' len=do
peek p' >>= poke p
copyRev (p`plusPtr`(1)) (p'`plusPtr`1) $ len1
reverse (Concat{l,r})=append (reverse r) (reverse l)
reverse File{rope}=reverse rope
intercalate::Rope->[Rope]->Rope
intercalate rope list=
intercalate_ list empty
where
intercalate_ [] x=x
intercalate_ [h] x=append x h
intercalate_ (h:s) x=intercalate_ s (append (append h rope) x)
\end{code}
Il n'est pas clair qu'on ne puisse pas faire beaucoup mieux que ce pack.
Par exemple, vu qu'on calcule la taille de la liste, on pourrait sortir un arbre
directement \'equilibr\'e. Vu qu'on ne fait jamais pack avec des 'ByteString's
(sauf ghc quand il compile des IsString), \c ca ne vaut sans doute pas le co\^ut.
Par contre, append est utilise presque partout. Il est crucial que son implementation
soit extremement efficace.
\begin{code}
empty::Rope
empty=unsafePerformIO $ do
contents<-mallocForeignPtrArray leafSize
i0<-mallocForeignPtr
withForeignPtr i0 $ \i->poke i (1)
return $ String { contents,i0,offset=0,length_=0 }
singleton::Word8->Rope
singleton c=unsafePerformIO $ do
contents<-mallocForeignPtrArray leafSize
withForeignPtr contents $ \con->poke con c
i0<-mallocForeignPtr
withForeignPtr i0 $ \i->poke i 1
return $ String { contents,i0,offset=0,length_=1 }
pack::[Word8]->Rope
pack s=
if Prelude.length s<=leafSize then unsafePerformIO $ do
contents<-mallocForeignPtrArray leafSize
i0<-mallocForeignPtr
withForeignPtr i0 $ \i->poke i $ Prelude.length s
withForeignPtr contents $ \c->fillPtr c s
return $ String { contents, i0, offset=0, length_=Prelude.length s }
else
let (a,b)=Prelude.splitAt leafSize s in
append (pack a) (pack b)
where
fillPtr::Storable a=>Ptr a->[a]->IO ()
fillPtr _ []=return ()
fillPtr ptr (h:t)=do
poke ptr h
fillPtr (plusPtr ptr 1) t
unpack::Rope->[Word8]
unpack (Concat{l,r})=unpack l++unpack r
unpack (String{contents,offset,length_})=
unsafePerformIO $
mapM (\i->withForeignPtr contents $
\c->peek (plusPtr c i)::IO Word8) [offset..(offset+length_1)]
unpack File{rope}=unpack rope
append::Rope->Rope->Rope
append File{rope=a} File{rope=b}=append a b
append File{rope} a=append rope a
append a File{rope}=append a rope
append sa@(String {}) sb@(String{})
|length sa + offset sa + length sb <= leafSize = unsafePerformIO $ do
i0_<-withForeignPtr (i0 sa) $ peek
let l=length sa
if i0_<offset sa+l then do
withForeignPtr (i0 sa) $ \i->poke i $ offset sa+l+length sb
withForeignPtr (contents sa) $
\ca->withForeignPtr (contents sb) $
\cb->copyArray (ca`plusPtr`(l+offset sa)::Ptr Word8)
(cb`plusPtr`(offset sb)) $ length sb
return $ sa { length_=length sa+length sb }
else
return $ Concat { sizeC=3,
length_=length sa+length sb,
l=sa,
r=sb }
|otherwise=
Concat { sizeC=3,
length_=length sa+length sb,
l=sa,
r=sb }
append x@String{} y@Concat{sizeC,l}=balance $ y { sizeC=1+sizeC,
length_=length x+length y,
l=append x l }
append x y = balance $ Concat { sizeC=size x+size y,
length_=length x+length y,
l=x,
r=y }
insert::Rope->Int->Rope->Rope
insert a 0 b=append a b
insert a i (b@String{})=
balance $ Concat { sizeC=size a+size b,
length_=length a+length b,
l=String { contents=contents b,
i0=i0 b,
offset=offset b,
length_=min i $ length b },
r=append a $ String { contents=contents b,
i0=i0 b,
offset=i+offset b,
length_=max 0 $ (length b)i }
}
insert a i (b@Concat{})
|i>=(length $ r b) = balance $ b { r=insert a (i(length $ r b)) $ r b }
|otherwise = balance $ b { l=insert a i $ l b}
insert a i File{rope}=insert a i rope
\end{code}
R\'e\'equilibrage des arbres.
@delta@ et @ratio@ sont des param\`etres exp\'erimentaux. Peut-\^etre faut-il
profiler un peu.
\begin{code}
delta,ratio::Int
delta=5
ratio=2
balance,rotateL,rotateR,singleL,singleR,doubleL,doubleR::Rope->Rope
balance x=case x of
String{}->x
Concat{l,r}
| length l==0 -> r
| length r==0 -> l
| size r>=delta*size l -> rotateL x
| size l>=delta*size r -> rotateR x
| otherwise->x
_->x
rotateL x=case x of
String{}->x
Concat{r}->case r of
String{}->x
Concat{l=rl,r=rr}
|size rl<ratio*size rr -> singleL x
|otherwise -> doubleL x
_->x
rotateR x=case x of
String{}->x
Concat{l}->case l of
String{}->x
Concat{l=ll,r=lr}
|size lr<ratio*size ll -> singleR x
|otherwise -> doubleR x
_->x
singleL (a@String{})=a
singleL (b@Concat{})=
case r b of
d@Concat{}->
let b'=append (l b) (l d)
in
Concat { length_=length b,
sizeC=size b,
r=r d,
l=b' }
_->b
singleR (a@String{})=a
singleR (d@Concat{})=
case l d of
b@Concat{}->
let d'=append (r b) (r d)
in
Concat { length_=length d,
sizeC=size d,
r=d',
l=l b }
_->d
doubleL (a@Concat{})=singleL $ a { l=singleR $ l a }
doubleL a=a
doubleR (a@Concat{})=singleR $ a { r=singleL $ r a }
doubleR a=a
\end{code}
Unsafe Stuff
\begin{code}
foreign import ccall unsafe "string.h memchr" memchr::Ptr Word8->Word8->CSize->IO (Ptr Word8)
inlinePerformIO::IO a->a
#ifdef __GLASGOW_HASKELL__
inlinePerformIO (IO m)=case m realWorld# of (# _, r #)->r
#else
inlinePerformIO=unsafePerformIO
#endif
\end{code}
Indexing Ropes
\begin{code}
index::Rope->Int->Char
index c i
|i>=length c = error $ "index trop grand : "++(show i)++", longueur = "++(show $ length c)
|otherwise=index_ c i
where
index_ (String{contents,offset}) i_=
inlinePerformIO $ withForeignPtr contents $
\con->do
x<-peek $ plusPtr con (offset+i_) :: IO Word8
return $ toEnum $ fromIntegral x
index_ (Concat{r,l}) i_
|i_>=length l = index_ r (i_length l)
|otherwise = index_ l i_
index_ (File{handle,position,length_}) i_=
unsafePerformIO $ alloca $ \c->do
hSeek handle AbsoluteSeek $ fromIntegral $ position+i_
hGetBuf handle c 1
peek c
elemIndex::Word8->Rope->Maybe Int
elemIndex w rope=
elemIndex_ 0 rope
where
elemIndex_ i (String{contents,offset,length_})=
inlinePerformIO $ withForeignPtr contents $ \con->do
ptr<-memchr (con`plusPtr`offset) w (fromIntegral length_)
return $ if ptr==nullPtr then Nothing else Just $ i+offset+(ptr`minusPtr`con)
elemIndex_ i (Concat{l,r})=
let sl=elemIndex_ i l
sr=elemIndex_ (i+length l) r
in
(sl`par`sr)`seq`(if isNothing sl then sr else sl)
elemIndex_ i (File{handle,position,length_})=
unsafePerformIO $ allocaBytes length_ $ \c->do
hSeek handle AbsoluteSeek $ fromIntegral position
l<-hGetBuf handle c length_
loop c 0 l
loop ptr i l
| i>=l = return Nothing
| otherwise = do
x<-peek ptr
if x==w then return $ Just i else loop (ptr`plusPtr`1) (i+1) l
elemIndices::Word8->Rope->[Int]
elemIndices w rope=
elemIndices_ 0 rope []
where
loop::Ptr Word8->CInt->CSize->[Int]->IO [Int]
loop _ _ 0 l=return l
loop ptr i len l=do
ptr'<-memchr ptr w len
if ptr'==nullPtr then return [] else do
let off=fromIntegral $ ptr'`minusPtr`ptr
x<-loop (ptr'`plusPtr`1) (i+off+1) (lenfromIntegral off1) l
return $ (fromIntegral $ i+off):x
elemIndices_ i (String{contents,offset,length_}) list=
inlinePerformIO $ withForeignPtr contents $ \con->
loop (con`plusPtr`offset) i (fromIntegral length_) list
elemIndices_ i (Concat{l,r}) list=
let sl=elemIndices_ i l sr
sr=elemIndices_ (i+(fromIntegral $ length l)) r list
in
sl
elemIndex_ i (File{handle,position,length_}) list=
unsafePerformIO $ allocaBytes length_ $ \c->do
hSeek handle AbsoluteSeek $ fromIntegral position
l<-hGetBuf handle c length_
loop c i (fromIntegral l) list
\end{code}
Reducing
\begin{code}
foldl::(a->Word8->a)->a->Rope->a
foldl f a (String {contents,offset,length_})=
unsafePerformIO $ withForeignPtr contents $
\c->foldlBuf f a (c`plusPtr`offset) length_
foldl f a (Concat{l,r})=foldl f (foldl f a l) r
foldl f a (File{handle,position,length_})=
unsafePerformIO $ allocaArray length_ $ \arr->do
hSeek handle AbsoluteSeek $ fromIntegral position
l<-hGetBuf handle arr length_
foldlBuf f a arr l
foldlBuf::(a->Word8->a)->a->Ptr Word8->Int->IO a
foldlBuf _ a _ 0=return a
foldlBuf f a p l=do
c<-peek p
foldlBuf f (f a c) (p`plusPtr`1) (l1)
foldl'::(a->Word8->a)->a->Rope->a
foldl' f a (String {contents,offset,length_})=
unsafePerformIO $ withForeignPtr contents $
\c->foldlBuf' f a (c`plusPtr`offset) length_
foldl' f a (Concat{l,r})=
let b=foldl' f a l in b`seq`(foldl' f b r)
foldl' f a (File{handle,position,length_})=
unsafePerformIO $ allocaArray length_ $ \arr->do
hSeek handle AbsoluteSeek $ fromIntegral position
l<-hGetBuf handle arr length_
foldlBuf' f a arr l
foldlBuf'::(a->Word8->a)->a->Ptr Word8->Int->IO a
foldlBuf' _ a _ 0=return a
foldlBuf' f a p l=do
c<-peek p
let b=f a c
b`seq`foldlBuf' f b (p`plusPtr`1) (l1)
foldr::(Word8->a->a)->a->Rope->a
foldr f a (String{contents,offset,length_})=
unsafePerformIO $ withForeignPtr contents $
\p->foldrBuf f a (p`plusPtr`offset) length_
foldr f a (Concat{l,r})=foldr f (foldr f a r) l
foldr f a (File{handle,position,length_})=
unsafePerformIO $ allocaArray length_ $ \arr->do
hSeek handle AbsoluteSeek $ fromIntegral position
l<-hGetBuf handle arr length_
foldrBuf f a arr l
foldrBuf::(Word8->a->a)->a->Ptr Word8->Int->IO a
foldrBuf _ a _ 0=return a
foldrBuf f a p l=do
c<-peek $ p`plusPtr`(l1)
foldrBuf f (f c a) p (l1)
\end{code}
Breaking Ropes
\begin{code}
take::Int->Rope->Rope
take i (s@String{offset,length_})=s { length_=min length_ i }
take i (c@Concat{l,r})
| i==length l = l
| i<length l = take i l
| otherwise = c { r=take (ilength l) r }
drop::Int->Rope->Rope
drop i (s@String{offset,length_})=s { offset=offset+(min i length_) }
drop i (c@Concat{l,r})
| i==length l = r
| i<length l = c { l=drop i l }
| otherwise = drop (ilength l) r
splitAt# ::Int->Rope->(# Rope,Rope #)
splitAt# i (s@String{contents,offset,length_})
| i>=length_ = (# s,empty #)
| otherwise = (# s { length_=i }, s { offset=offset+i,length_=length_i} #)
splitAt# i (c@Concat{length_=l0,l,r})
| i>=l0 = (# c,empty #)
| i>=length_ l =
let (# u,v #)=splitAt# (ilength_ l) r in
(# c { r=u }, v #)
| otherwise =
let (# u,v #)=splitAt# i l in
(# u, c { l=v } #)
splitAt::Int->Rope->(Rope,Rope)
splitAt i r=
let (# u,v #)=splitAt# i r in
(u,v)
\end{code}
Input and Output
\begin{code}
hPut::Handle->Rope->IO()
hPut h (String{contents,offset,length_})=
withForeignPtr contents $ \c->hPutBuf h (c`plusPtr`offset) length_
hPut h (Concat{l,r})=do
hPut h l
hPut h r
hPut h (File{handle,position,length_})=do
hSeek handle AbsoluteSeek $ fromIntegral position
allocaBytes length_ $ \c->do
l<-hGetBuf handle c length_
hPutBuf h c l
hPutStr::Handle->Rope->IO()
hPutStr=hPut
hPutStrLn::Handle->Rope->IO()
hPutStrLn handle r=do
hPut handle r
alloca $ \c->do
poke (c::Ptr Word8) 0x0a
hPutBuf handle c 1
putStr::Rope->IO()
putStr=hPutStr stdout
putStrLn::Rope->IO()
putStrLn=hPutStrLn stdout
fromPtr::ForeignPtr Word8->Int->Int->IO Rope
fromPtr fp offset s
| s<=leafSize = withForeignPtr fp $ \p->do
contents<-mallocForeignPtrBytes leafSize
withForeignPtr contents $ \c->copyArray c (p`plusPtr`offset) s
i0<-mallocForeignPtr
withForeignPtr i0 $ \i->poke i s
return $ String { contents,i0,offset=0,length_=s }
| otherwise = withForeignPtr fp $ \p->do
let sl=s`shiftR`1
sr=sl+s.&.1
sz=ceiling $ (fromIntegral s)/(fromIntegral leafSize)
l<-unsafeInterleaveIO $ fromPtr fp offset sl
r<-unsafeInterleaveIO $ fromPtr fp (offset+sl) sr
return $ Concat { l,r,length_=s, sizeC=2*sz1 }
fromByteString::B.ByteString->Rope
fromByteString b=
let (fp,off,len)=B.toForeignPtr b in
unsafePerformIO $ fromPtr fp off len
toByteString::Rope->B.ByteString
toByteString (String{contents,offset,length_})=
B.fromForeignPtr contents offset length_
toByteString (c@Concat{length_=len})=
B.unsafeCreate len $ \ptr->fill ptr c
where
fill p (Concat{l,r})=do
fill p l
fill (p`plusPtr`(length_ l)) r
fill p (String{contents,offset,length_})=
withForeignPtr contents $ \c->copyBytes p (c`plusPtr`offset) length_
readFile::FilePath->IO Rope
readFile file=do
stat<-getFileStatus file
let s=fromIntegral $! fileSize stat
s`seq`return ()
fd<-openFd file ReadOnly Nothing $ defaultFileFlags
p<-c_mmap nullPtr s
c_PROT_READ
c_MAP_SHARED
(fromIntegral fd) 0
fp<-FC.newForeignPtr p $ do
c_munmap p s
closeFd fd
fromPtr fp 0 (fromIntegral s)
hGet::Handle->Int->IO Rope
hGet handle length_=do
position<-hTell handle
buildRope handle position length_
where
realSize=leafSize*(ceiling $ (fromIntegral length_)/(fromIntegral leafSize))
buildRope::Handle->Integer->Int->IO Rope
buildRope handle position len
| len<=leafSize = do
contents<-mallocForeignPtrBytes leafSize
length_<-withForeignPtr contents $ \c->do
hSeek handle AbsoluteSeek position
hGetBuf handle c len
i0<-mallocForeignPtr
withForeignPtr i0 $ \i->poke i len
return $ String {contents,length_,offset=0,i0 }
| len>leafSize =
let a=len`shiftR`1
b=a+(len.&.1)
in do
aa<-buildRope handle position a
bb<-buildRope handle (position+fromIntegral a) b
return $ Concat { length_=len,l=aa,r=bb,sizeC=size aa+size bb }
\end{code}