{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE MultiWayIf    #-}
{-# LANGUAGE ViewPatterns  #-}
{-# LANGUAGE Strict        #-}


-- |
-- Module      :  Data.BWT
-- Copyright   :  (c) Matthew Mosior 2022
-- License     :  BSD-style
-- Maintainer  :  mattm.github@gmail.com
-- Portability :  portable
--
-- = Burrows-Wheeler Transform (BWT)
-- 
-- The two functions that most users will utilize are 'toBWT' and 'fromBWT'.
-- There are auxilary function(s) inside of @"Data.BWT.Internal"@.
--
-- The helper functions for ByteString, 'bytestringToBWT', 'bytestringFromWord8BWT' , 'bytestringFromByteStringBWT' and Text, 'textToBWT' and 'textFromBWT' should help for common use cases.
--
-- @"Data.BWT.Internal"@ also has the function 'createBWTMatrix', which can be useful as well, although not used by either 'toBWT' or 'fromBWT'.


module Data.BWT where

import Data.BWT.Internal

import Control.Monad()
import Control.Monad.ST as CMST
import Control.Monad.State.Strict()
import Data.ByteString as BS (ByteString,concat,pack,unpack)
import Data.Foldable as DFold (toList)
import Data.STRef()
import Data.Text (Text)
import Data.Text.Encoding as DTE (decodeUtf8,encodeUtf8)
import Data.Vector as DVB (empty,iterateN,length,zip)
import Data.Vector.Unboxed as DVU (Unbox,fromList)
import Data.Word (Word8)
import GHC.Generics (Generic)


{-toBWT Function(s)-}

-- | Takes a String and returns the Burrows-Wheeler Transform (BWT).
-- Implemented via a 'SuffixArray'.
toBWT :: (Unbox a,Ord a)
      => [a]
      -> BWT a
toBWT :: forall a. (Unbox a, Ord a) => [a] -> BWT a
toBWT [] = forall a. Vector (Maybe a) -> BWT a
BWT forall a. Vector a
DVB.empty
toBWT [a]
xs = do
  let saxs :: SuffixArray a
saxs = forall a. (Unbox a, Ord a) => Vector a -> SuffixArray a
createSuffixArray Vector a
xss
  forall a. Unbox a => SuffixArray a -> Vector a -> BWT a
saToBWT SuffixArray a
saxs
          Vector a
xss
    where
      xss :: Vector a
xss = forall a. Unbox a => [a] -> Vector a
DVU.fromList [a]
xs

-- | Helper function for converting a 'ByteString'
-- to a 'BWT' 'Word8'.
bytestringToBWT :: ByteString ->
                   BWT Word8
bytestringToBWT :: ByteString -> BWT Word8
bytestringToBWT = forall a. (Unbox a, Ord a) => [a] -> BWT a
toBWT forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Word8]
BS.unpack

-- | A newtype to ensure you only uncompress a BWT created
-- from textToBWT, since [Word8] -> Text is partial.
newtype TextBWT = TextBWT (BWT Word8)
  deriving (TextBWT -> TextBWT -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TextBWT -> TextBWT -> Bool
$c/= :: TextBWT -> TextBWT -> Bool
== :: TextBWT -> TextBWT -> Bool
$c== :: TextBWT -> TextBWT -> Bool
Eq,Eq TextBWT
TextBWT -> TextBWT -> Bool
TextBWT -> TextBWT -> Ordering
TextBWT -> TextBWT -> TextBWT
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: TextBWT -> TextBWT -> TextBWT
$cmin :: TextBWT -> TextBWT -> TextBWT
max :: TextBWT -> TextBWT -> TextBWT
$cmax :: TextBWT -> TextBWT -> TextBWT
>= :: TextBWT -> TextBWT -> Bool
$c>= :: TextBWT -> TextBWT -> Bool
> :: TextBWT -> TextBWT -> Bool
$c> :: TextBWT -> TextBWT -> Bool
<= :: TextBWT -> TextBWT -> Bool
$c<= :: TextBWT -> TextBWT -> Bool
< :: TextBWT -> TextBWT -> Bool
$c< :: TextBWT -> TextBWT -> Bool
compare :: TextBWT -> TextBWT -> Ordering
$ccompare :: TextBWT -> TextBWT -> Ordering
Ord,Int -> TextBWT -> ShowS
[TextBWT] -> ShowS
TextBWT -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TextBWT] -> ShowS
$cshowList :: [TextBWT] -> ShowS
show :: TextBWT -> String
$cshow :: TextBWT -> String
showsPrec :: Int -> TextBWT -> ShowS
$cshowsPrec :: Int -> TextBWT -> ShowS
Show,ReadPrec [TextBWT]
ReadPrec TextBWT
Int -> ReadS TextBWT
ReadS [TextBWT]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [TextBWT]
$creadListPrec :: ReadPrec [TextBWT]
readPrec :: ReadPrec TextBWT
$creadPrec :: ReadPrec TextBWT
readList :: ReadS [TextBWT]
$creadList :: ReadS [TextBWT]
readsPrec :: Int -> ReadS TextBWT
$creadsPrec :: Int -> ReadS TextBWT
Read,forall x. Rep TextBWT x -> TextBWT
forall x. TextBWT -> Rep TextBWT x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep TextBWT x -> TextBWT
$cfrom :: forall x. TextBWT -> Rep TextBWT x
Generic)

-- | Helper function for converting 'Text'
-- to a 'TextBWT'.
textToBWT :: Text ->
             TextBWT
textToBWT :: Text -> TextBWT
textToBWT = BWT Word8 -> TextBWT
TextBWT forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> BWT Word8
bytestringToBWT forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
DTE.encodeUtf8

{-------------------}


{-fromBWT function(s)-}

-- | Takes a BWT data type (please see @"Data.BWT.Internal"@) and inverts it back to the original string.
-- 
-- This function utilizes the state monad (strict) in order
-- to implement the [Magic](https://www.youtube.com/watch?v=QwSsppKrCj4) Inverse BWT algorithm by backtracking
-- indices starting with the (__Nothing__,_) entry.
fromBWT :: Ord a
        => BWT a
        -> [a]
fromBWT :: forall a. Ord a => BWT a -> [a]
fromBWT BWT a
bwt = do
  let originall :: BWTVec a
originall = forall a. (forall s. ST s a) -> a
CMST.runST forall a b. (a -> b) -> a -> b
$ forall a s. Vector (Maybe a, Int) -> ST s (BWTVec a)
magicInverseBWT Vector (Maybe a, Int)
magicsz
  forall (t :: * -> *) a. Foldable t => t a -> [a]
DFold.toList BWTVec a
originall
    where
      magicsz :: Vector (Maybe a, Int)
magicsz = forall a. Ord a => Vector (a, Int) -> Vector (a, Int)
sortVecBWT Vector (Maybe a, Int)
zipped
      zipped :: Vector (Maybe a, Int)
zipped  = forall a b. Vector a -> Vector b -> Vector (a, b)
DVB.zip Vector (Maybe a)
bwtt
                        (forall a. Int -> (a -> a) -> a -> Vector a
DVB.iterateN (forall a. Vector a -> Int
DVB.length Vector (Maybe a)
bwtt) (forall a. Num a => a -> a -> a
+Int
1) Int
0)
      bwtt :: Vector (Maybe a)
bwtt    = (\(BWT Vector (Maybe a)
t) -> Vector (Maybe a)
t) BWT a
bwt

-- | Helper function for converting a 'BWT' of 'Word8's
-- to a 'ByteString'.
bytestringFromWord8BWT :: BWT Word8
                       -> ByteString
bytestringFromWord8BWT :: BWT Word8 -> ByteString
bytestringFromWord8BWT = [Word8] -> ByteString
BS.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => BWT a -> [a]
fromBWT

-- | Helper function for converting a 'BWT' 'ByteString's
-- to a 'ByteString'.
bytestringFromByteStringBWT :: BWT ByteString
                            -> ByteString
bytestringFromByteStringBWT :: BWT ByteString -> ByteString
bytestringFromByteStringBWT = [ByteString] -> ByteString
BS.concat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => BWT a -> [a]
fromBWT

-- | Helper function for converting 'TextBWT'
-- to a 'Text'
textFromBWT :: TextBWT -> Text
textFromBWT :: TextBWT -> Text
textFromBWT (TextBWT BWT Word8
x) = ByteString -> Text
DTE.decodeUtf8 forall a b. (a -> b) -> a -> b
$
                          BWT Word8 -> ByteString
bytestringFromWord8BWT BWT Word8
x

{---------------------}