{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE MultiWayIf    #-}
{-# LANGUAGE ViewPatterns  #-}
{-# LANGUAGE Strict        #-}


-- |
-- Module      :  Data.BWT
-- Copyright   :  (c) Matthew Mosior 2022
-- License     :  BSD-style
-- Maintainer  :  mattm.github@gmail.com
-- Portability :  portable
--
-- = Burrows-Wheeler Transform (BWT)
-- 
-- The two functions that most users will utilize are 'toBWT' and 'fromBWT'.
-- There are auxilary function(s) inside of @"Data.BWT.Internal"@.
--
-- The helper functions for ByteString, 'bytestringToBWT', 'bytestringFromWord8BWT' , 'bytestringFromByteStringBWT' and Text, 'textToBWT' and 'textFromBWT' should help for common use cases.
--
-- @"Data.BWT.Internal"@ also has the function 'createBWTMatrix', which can be useful as well, although not used by either 'toBWT' or 'fromBWT'.


module Data.BWT where

import Data.BWT.Internal

import Control.Monad()
import Control.Monad.ST as CMST
import Control.Monad.State.Strict()
import Data.ByteString as BS (ByteString,concat,pack,unpack)
import Data.Foldable as DFold (toList)
import Data.Sequence as DS
import Data.STRef()
import Data.Text (Text)
import Data.Text.Encoding as DTE (decodeUtf8,encodeUtf8)
import Data.Word (Word8)
import GHC.Generics(Generic)


{-toBWT Function(s)-}

-- | Takes a String and returns the Burrows-Wheeler Transform (BWT).
-- Implemented via a 'SuffixArray'.
toBWT :: Ord a =>
         [a]   ->
         BWT a
toBWT :: forall a. Ord a => [a] -> BWT a
toBWT [] = forall a. Seq a
DS.Empty
toBWT [a]
xs = do
  let saxs :: SuffixArray a
saxs = forall a. Ord a => Seq a -> SuffixArray a
createSuffixArray Seq a
xss
  forall a. SuffixArray a -> Seq a -> BWT a
saToBWT SuffixArray a
saxs
          Seq a
xss
    where
      xss :: Seq a
xss = forall a. [a] -> Seq a
DS.fromList [a]
xs

-- | Helper function for converting a 'ByteString'
-- to a 'BWT' 'Word8'.
bytestringToBWT :: ByteString ->
                   BWT Word8
bytestringToBWT :: ByteString -> BWT Word8
bytestringToBWT = forall a. Ord a => [a] -> BWT a
toBWT forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Word8]
BS.unpack

-- | A newtype to ensure you only uncompress a BWT created
-- from textToBWT, since [Word8] -> Text is partial.
newtype TextBWT = TextBWT (BWT Word8)
  deriving (TextBWT -> TextBWT -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TextBWT -> TextBWT -> Bool
$c/= :: TextBWT -> TextBWT -> Bool
== :: TextBWT -> TextBWT -> Bool
$c== :: TextBWT -> TextBWT -> Bool
Eq,Eq TextBWT
TextBWT -> TextBWT -> Bool
TextBWT -> TextBWT -> Ordering
TextBWT -> TextBWT -> TextBWT
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: TextBWT -> TextBWT -> TextBWT
$cmin :: TextBWT -> TextBWT -> TextBWT
max :: TextBWT -> TextBWT -> TextBWT
$cmax :: TextBWT -> TextBWT -> TextBWT
>= :: TextBWT -> TextBWT -> Bool
$c>= :: TextBWT -> TextBWT -> Bool
> :: TextBWT -> TextBWT -> Bool
$c> :: TextBWT -> TextBWT -> Bool
<= :: TextBWT -> TextBWT -> Bool
$c<= :: TextBWT -> TextBWT -> Bool
< :: TextBWT -> TextBWT -> Bool
$c< :: TextBWT -> TextBWT -> Bool
compare :: TextBWT -> TextBWT -> Ordering
$ccompare :: TextBWT -> TextBWT -> Ordering
Ord,Int -> TextBWT -> ShowS
[TextBWT] -> ShowS
TextBWT -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TextBWT] -> ShowS
$cshowList :: [TextBWT] -> ShowS
show :: TextBWT -> String
$cshow :: TextBWT -> String
showsPrec :: Int -> TextBWT -> ShowS
$cshowsPrec :: Int -> TextBWT -> ShowS
Show,ReadPrec [TextBWT]
ReadPrec TextBWT
Int -> ReadS TextBWT
ReadS [TextBWT]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [TextBWT]
$creadListPrec :: ReadPrec [TextBWT]
readPrec :: ReadPrec TextBWT
$creadPrec :: ReadPrec TextBWT
readList :: ReadS [TextBWT]
$creadList :: ReadS [TextBWT]
readsPrec :: Int -> ReadS TextBWT
$creadsPrec :: Int -> ReadS TextBWT
Read,forall x. Rep TextBWT x -> TextBWT
forall x. TextBWT -> Rep TextBWT x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep TextBWT x -> TextBWT
$cfrom :: forall x. TextBWT -> Rep TextBWT x
Generic)

-- | Helper function for converting 'Text'
-- to a 'TextBWT'.
textToBWT :: Text ->
             TextBWT
textToBWT :: Text -> TextBWT
textToBWT = BWT Word8 -> TextBWT
TextBWT forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> BWT Word8
bytestringToBWT forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
DTE.encodeUtf8

{-------------------}


{-fromBWT function(s)-}

-- | Takes a BWT data type (please see @"Data.BWT.Internal"@) and inverts it back to the original string.
-- 
-- This function utilizes the state monad (strict) in order
-- to implement the [Magic](https://www.youtube.com/watch?v=QwSsppKrCj4) Inverse BWT algorithm by backtracking
-- indices starting with the (__Nothing__,_) entry.
fromBWT :: Ord a =>
           BWT a ->
           [a]
fromBWT :: forall a. Ord a => BWT a -> [a]
fromBWT BWT a
bwt = do
  let originall :: BWTSeq a
originall = forall a. (forall s. ST s a) -> a
CMST.runST forall a b. (a -> b) -> a -> b
$ forall a s. Seq (Maybe a, Int) -> ST s (BWTSeq a)
magicInverseBWT Seq (Maybe a, Int)
magicsz
  forall (t :: * -> *) a. Foldable t => t a -> [a]
DFold.toList BWTSeq a
originall
    where
      magicsz :: Seq (Maybe a, Int)
magicsz = forall a. (a -> a -> Ordering) -> Seq a -> Seq a
DS.sortBy (\(Maybe a
a,Int
b) (Maybe a
c,Int
d) -> forall a1 a2. (Ord a1, Ord a2) => (a1, a2) -> (a1, a2) -> Ordering
sortTB (Maybe a
a,Int
b) (Maybe a
c,Int
d))
                Seq (Maybe a, Int)
zipped
      zipped :: Seq (Maybe a, Int)
zipped  = forall a b. Seq a -> Seq b -> Seq (a, b)
DS.zip BWT a
bwt
                       (forall a. Int -> (a -> a) -> a -> Seq a
DS.iterateN (forall a. Seq a -> Int
DS.length BWT a
bwt) (forall a. Num a => a -> a -> a
+Int
1) Int
0)

-- | Helper function for converting a 'BWT' of 'Word8's
-- to a 'ByteString'.
bytestringFromWord8BWT :: BWT Word8
                       -> ByteString
bytestringFromWord8BWT :: BWT Word8 -> ByteString
bytestringFromWord8BWT = [Word8] -> ByteString
BS.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => BWT a -> [a]
fromBWT

-- | Helper function for converting a 'BWT' 'ByteString's
-- to a 'ByteString'.
bytestringFromByteStringBWT :: BWT ByteString
                            -> ByteString
bytestringFromByteStringBWT :: BWT ByteString -> ByteString
bytestringFromByteStringBWT = [ByteString] -> ByteString
BS.concat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => BWT a -> [a]
fromBWT

-- | Helper function for converting 'TextBWT'
-- to a 'Text'
textFromBWT :: TextBWT -> Text
textFromBWT :: TextBWT -> Text
textFromBWT (TextBWT BWT Word8
x) = ByteString -> Text
DTE.decodeUtf8 forall a b. (a -> b) -> a -> b
$
                          BWT Word8 -> ByteString
bytestringFromWord8BWT BWT Word8
x

{---------------------}