------------------------------------------------------------------------
-- |
-- Module           : What4.Utils.Word16String
-- Description      : Utility definitions for wide (2-byte) strings
-- Copyright        : (c) Galois, Inc 2019-2020
-- License          : BSD3
-- Maintainer       : Rob Dockins <rdockins@galois.com>
-- Stability        : provisional
------------------------------------------------------------------------

module What4.Utils.Word16String
( Word16String
, fromLEByteString
, toLEByteString
, empty
, singleton
, null
, index
, drop
, take
, append
, length
, foldl'
, findSubstring
, isInfixOf
, isPrefixOf
, isSuffixOf
) where

import           Prelude hiding (null,length, drop, take)
import qualified Prelude

import           Data.Bits
import           Data.Char
import           Data.Hashable
import qualified Data.List as List
import           Data.Maybe (isJust)
import           Data.Word
import           Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import           Numeric

-- | A string of Word16 values, encoded as a bytestring
--   in little endian (LE) order.
--
--   We maintain the invariant that Word16Strings
--   are represented by an even number of bytes.
newtype Word16String = Word16String ByteString


instance Semigroup Word16String where
  <> :: Word16String -> Word16String -> Word16String
(<>) = Word16String -> Word16String -> Word16String
append

instance Monoid Word16String where
  mempty :: Word16String
mempty = Word16String
empty
  mappend :: Word16String -> Word16String -> Word16String
mappend = Word16String -> Word16String -> Word16String
append

instance Eq Word16String where
  (Word16String ByteString
xs) == :: Word16String -> Word16String -> Bool
== (Word16String ByteString
ys) = ByteString
xs ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
ys

instance Ord Word16String where
  compare :: Word16String -> Word16String -> Ordering
compare (Word16String ByteString
xs) (Word16String ByteString
ys) = ByteString -> ByteString -> Ordering
forall a. Ord a => a -> a -> Ordering
compare ByteString
xs ByteString
ys

instance Show Word16String where
 showsPrec :: Int -> Word16String -> ShowS
showsPrec Int
_ = Word16String -> ShowS
showsWord16String

instance Hashable Word16String where
 hashWithSalt :: Int -> Word16String -> Int
hashWithSalt Int
s (Word16String ByteString
xs) = Int -> ByteString -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
s ByteString
xs

showsWord16String :: Word16String -> ShowS
showsWord16String :: Word16String -> ShowS
showsWord16String (Word16String ByteString
xs0) String
tl = Char
'"' Char -> ShowS
forall a. a -> [a] -> [a]
: [Word8] -> String
forall a. Integral a => [a] -> String
go (ByteString -> [Word8]
BS.unpack ByteString
xs0)
 where
 go :: [a] -> String
go [] = Char
'"' Char -> ShowS
forall a. a -> [a] -> [a]
: String
tl
 go (a
_:[]) = ShowS
forall a. HasCallStack => String -> a
error String
"showsWord16String: representation has odd number of bytes!"
 go (a
lo:a
hi:[a]
xs)
    | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'"'    = String
"\\\"" String -> ShowS
forall a. [a] -> [a] -> [a]
++ [a] -> String
go [a]
xs
    | Char -> Bool
isPrint Char
c   = Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: [a] -> String
go [a]
xs
    | Bool
otherwise   = String
"\\u" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
zs String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
esc String -> ShowS
forall a. [a] -> [a] -> [a]
++ [a] -> String
go [a]
xs

  where
  esc :: String
esc = Word16 -> ShowS
forall a. (Integral a, Show a) => a -> ShowS
showHex Word16
x []
  zs :: String
zs  = Int -> ShowS
forall a. Int -> [a] -> [a]
Prelude.take (Int
4 Int -> Int -> Int
forall a. Num a => a -> a -> a
- String -> Int
forall (t :: Type -> Type) a. Foldable t => t a -> Int
Prelude.length String
esc) (Char -> String
forall a. a -> [a]
repeat Char
'0')

  x :: Word16
  x :: Word16
x = a -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
lo Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. (a -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
hi Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftL` Int
8)

  c :: Char
  c :: Char
c = Int -> Char
forall a. Enum a => Int -> a
toEnum (Word16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word16
x)


-- | Generate a @Word16String@ from a bytestring
--   where the 16bit words are encoded as two bytes
--   in little-endian order.
--
--   PRECONDITION: the input bytestring must 
--   have a length which is a multiple of 2.
fromLEByteString :: ByteString -> Word16String
fromLEByteString :: ByteString -> Word16String
fromLEByteString ByteString
xs
  | ByteString -> Int
BS.length ByteString
xs Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
2 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = ByteString -> Word16String
Word16String ByteString
xs
  | Bool
otherwise = String -> Word16String
forall a. HasCallStack => String -> a
error String
"fromLEByteString: bytestring must have even length"

-- | Return the underlying little endian bytestring.
toLEByteString :: Word16String -> ByteString
toLEByteString :: Word16String -> ByteString
toLEByteString (Word16String ByteString
xs) = ByteString
xs

-- | Return the empty string
empty :: Word16String
empty :: Word16String
empty = ByteString -> Word16String
Word16String ByteString
BS.empty

-- | Compute the string containing just the given character
singleton :: Word16 -> Word16String
singleton :: Word16 -> Word16String
singleton Word16
c = ByteString -> Word16String
Word16String ([Word8] -> ByteString
BS.pack [ Word8
lo , Word8
hi ])
 where
 lo, hi :: Word8
 lo :: Word8
lo = Word16 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word16
c Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.&. Word16
0xFF)
 hi :: Word8
hi = Word16 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word16
c Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftR` Int
8)

-- | Test if the given string is empty
null :: Word16String -> Bool
null :: Word16String -> Bool
null (Word16String ByteString
xs) = ByteString -> Bool
BS.null ByteString
xs

-- | Retrive the @n@th character of the string.
--   Out of bounds accesses will cause an error.
index :: Word16String -> Int -> Word16
index :: Word16String -> Int -> Word16
index (Word16String ByteString
xs) Int
i = (Word16
hi Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftL` Int
8) Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. Word16
lo
 where
 lo, hi :: Word16
 hi :: Word16
hi = Word8 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
BS.index ByteString
xs (Int
2Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1))
 lo :: Word16
lo = Word8 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
BS.index ByteString
xs (Int
2Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
i))

drop :: Int -> Word16String -> Word16String
drop :: Int -> Word16String -> Word16String
drop Int
k (Word16String ByteString
xs) = ByteString -> Word16String
Word16String (Int -> ByteString -> ByteString
BS.drop (Int
2Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
k) ByteString
xs)

take :: Int -> Word16String -> Word16String
take :: Int -> Word16String -> Word16String
take Int
k (Word16String ByteString
xs) = ByteString -> Word16String
Word16String (Int -> ByteString -> ByteString
BS.take (Int
2Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
k) ByteString
xs)

append :: Word16String -> Word16String -> Word16String
append :: Word16String -> Word16String -> Word16String
append (Word16String ByteString
xs) (Word16String ByteString
ys) =
  ByteString -> Word16String
Word16String (ByteString -> ByteString -> ByteString
BS.append ByteString
xs ByteString
ys)

length :: Word16String -> Int
length :: Word16String -> Int
length (Word16String ByteString
xs) = ByteString -> Int
BS.length ByteString
xs Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
1

foldl' :: (a -> Word16 -> a) -> a -> Word16String -> a
foldl' :: (a -> Word16 -> a) -> a -> Word16String -> a
foldl' a -> Word16 -> a
f a
z Word16String
xs =
  (a -> Int -> a) -> a -> [Int] -> a
forall (t :: Type -> Type) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
List.foldl' (\a
x Int
i -> a -> Word16 -> a
f a
x (Word16String -> Int -> Word16
index Word16String
xs Int
i)) a
z [ Int
0 .. (Word16String -> Int
length Word16String
xs Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) ]

-- | Find the first index (if it exists) where the first
--   string appears as a substring in the second
findSubstring :: Word16String -> Word16String -> Maybe Int
findSubstring :: Word16String -> Word16String -> Maybe Int
findSubstring (Word16String ByteString
xs) Word16String
_ | ByteString -> Bool
BS.null ByteString
xs = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
0
findSubstring (Word16String ByteString
xs) (Word16String ByteString
ys) = Int -> Maybe Int
go Int
0
  where
  brk :: ByteString -> (ByteString, ByteString)
brk = ByteString -> ByteString -> (ByteString, ByteString)
BS.breakSubstring ByteString
xs

  -- search for the first aligned (even) index where the pattern string occurs
  -- invariant: k is even
  go :: Int -> Maybe Int
go Int
k
    | ByteString -> Bool
BS.null ByteString
b = Maybe Int
forall a. Maybe a
Nothing
    | Int -> Bool
forall a. Integral a => a -> Bool
even (ByteString -> Int
BS.length ByteString
a) = Int -> Maybe Int
forall a. a -> Maybe a
Just ((Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
BS.length ByteString
a) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
1)
    | Bool
otherwise = Int -> Maybe Int
go (Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
BS.length ByteString
a Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
   where
   (ByteString
a,ByteString
b) = ByteString -> (ByteString, ByteString)
brk (Int -> ByteString -> ByteString
BS.drop Int
k ByteString
ys)

-- | Returns true if the first string appears somewhere
--   in the second string.
isInfixOf :: Word16String -> Word16String -> Bool
isInfixOf :: Word16String -> Word16String -> Bool
isInfixOf Word16String
xs Word16String
ys = Maybe Int -> Bool
forall a. Maybe a -> Bool
isJust (Maybe Int -> Bool) -> Maybe Int -> Bool
forall a b. (a -> b) -> a -> b
$ Word16String -> Word16String -> Maybe Int
findSubstring Word16String
xs Word16String
ys

isPrefixOf :: Word16String -> Word16String -> Bool
isPrefixOf :: Word16String -> Word16String -> Bool
isPrefixOf (Word16String ByteString
xs) (Word16String ByteString
ys) = ByteString -> ByteString -> Bool
BS.isPrefixOf ByteString
xs ByteString
ys

isSuffixOf :: Word16String -> Word16String -> Bool
isSuffixOf :: Word16String -> Word16String -> Bool
isSuffixOf (Word16String ByteString
xs) (Word16String ByteString
ys) = ByteString -> ByteString -> Bool
BS.isSuffixOf ByteString
xs ByteString
ys