------------------------------------------------------------------------
-- |
-- Module           : What4.Utils.StringLiteral
-- Description      : Utility definitions for strings
-- Copyright        : (c) Galois, Inc 2019-2020
-- License          : BSD3
-- Maintainer       : Rob Dockins <rdockins@galois.com>
-- Stability        : provisional
------------------------------------------------------------------------

{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE GADTs #-}

module What4.Utils.StringLiteral
( StringLiteral(..)
, stringLiteralInfo
, fromUnicodeLit
, fromChar8Lit
, fromChar16Lit
, stringLitEmpty
, stringLitLength
, stringLitNull
, stringLitBounds
, stringLitContains
, stringLitIsPrefixOf
, stringLitIsSuffixOf
, stringLitSubstring
, stringLitIndexOf
) where


import           Data.Kind
import           Data.Parameterized.Classes
import qualified Data.ByteString as BS
import           Data.String
import qualified Data.Text as T

import           What4.BaseTypes
import qualified What4.Utils.Word16String as WS


------------------------------------------------------------------------
-- String literals

data StringLiteral (si::StringInfo) :: Type where
  UnicodeLiteral :: !T.Text -> StringLiteral Unicode
  Char8Literal   :: !BS.ByteString -> StringLiteral Char8
  Char16Literal  :: !WS.Word16String -> StringLiteral Char16

stringLiteralInfo :: StringLiteral si -> StringInfoRepr si
stringLiteralInfo :: StringLiteral si -> StringInfoRepr si
stringLiteralInfo UnicodeLiteral{} = StringInfoRepr si
StringInfoRepr Unicode
UnicodeRepr
stringLiteralInfo Char16Literal{}  = StringInfoRepr si
StringInfoRepr Char16
Char16Repr
stringLiteralInfo Char8Literal{}   = StringInfoRepr si
StringInfoRepr Char8
Char8Repr

fromUnicodeLit :: StringLiteral Unicode -> T.Text
fromUnicodeLit :: StringLiteral Unicode -> Text
fromUnicodeLit (UnicodeLiteral Text
x) = Text
x

fromChar8Lit :: StringLiteral Char8 -> BS.ByteString
fromChar8Lit :: StringLiteral Char8 -> ByteString
fromChar8Lit (Char8Literal ByteString
x) = ByteString
x

fromChar16Lit :: StringLiteral Char16 -> WS.Word16String
fromChar16Lit :: StringLiteral Char16 -> Word16String
fromChar16Lit (Char16Literal Word16String
x) = Word16String
x

instance TestEquality StringLiteral where
  testEquality :: StringLiteral a -> StringLiteral b -> Maybe (a :~: b)
testEquality (UnicodeLiteral Text
x) (UnicodeLiteral Text
y) =
    if Text
x Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
y then (a :~: a) -> Maybe (a :~: a)
forall a. a -> Maybe a
Just a :~: a
forall k (a :: k). a :~: a
Refl else Maybe (a :~: b)
forall a. Maybe a
Nothing
  testEquality (Char16Literal Word16String
x) (Char16Literal Word16String
y) =
    if Word16String
x Word16String -> Word16String -> Bool
forall a. Eq a => a -> a -> Bool
== Word16String
y then (a :~: a) -> Maybe (a :~: a)
forall a. a -> Maybe a
Just a :~: a
forall k (a :: k). a :~: a
Refl else Maybe (a :~: b)
forall a. Maybe a
Nothing
  testEquality (Char8Literal ByteString
x) (Char8Literal ByteString
y) =
    if ByteString
x ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
y then (a :~: a) -> Maybe (a :~: a)
forall a. a -> Maybe a
Just a :~: a
forall k (a :: k). a :~: a
Refl else Maybe (a :~: b)
forall a. Maybe a
Nothing

  testEquality StringLiteral a
_ StringLiteral b
_ = Maybe (a :~: b)
forall a. Maybe a
Nothing

instance Eq (StringLiteral si) where
  StringLiteral si
x == :: StringLiteral si -> StringLiteral si -> Bool
== StringLiteral si
y = Maybe (si :~: si) -> Bool
forall a. Maybe a -> Bool
isJust (StringLiteral si -> StringLiteral si -> Maybe (si :~: si)
forall k (f :: k -> Type) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality StringLiteral si
x StringLiteral si
y)

instance OrdF StringLiteral where
  compareF :: StringLiteral x -> StringLiteral y -> OrderingF x y
compareF (UnicodeLiteral Text
x) (UnicodeLiteral Text
y) =
    case Text -> Text -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Text
x Text
y of
      Ordering
LT -> OrderingF x y
forall k (x :: k) (y :: k). OrderingF x y
LTF
      Ordering
EQ -> OrderingF x y
forall k (x :: k). OrderingF x x
EQF
      Ordering
GT -> OrderingF x y
forall k (x :: k) (y :: k). OrderingF x y
GTF
  compareF UnicodeLiteral{} StringLiteral y
_ = OrderingF x y
forall k (x :: k) (y :: k). OrderingF x y
LTF
  compareF StringLiteral x
_ UnicodeLiteral{} = OrderingF x y
forall k (x :: k) (y :: k). OrderingF x y
GTF

  compareF (Char16Literal Word16String
x) (Char16Literal Word16String
y) =
    case Word16String -> Word16String -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Word16String
x Word16String
y of
      Ordering
LT -> OrderingF x y
forall k (x :: k) (y :: k). OrderingF x y
LTF
      Ordering
EQ -> OrderingF x y
forall k (x :: k). OrderingF x x
EQF
      Ordering
GT -> OrderingF x y
forall k (x :: k) (y :: k). OrderingF x y
GTF
  compareF Char16Literal{} StringLiteral y
_ = OrderingF x y
forall k (x :: k) (y :: k). OrderingF x y
LTF
  compareF StringLiteral x
_ Char16Literal{} = OrderingF x y
forall k (x :: k) (y :: k). OrderingF x y
GTF

  compareF (Char8Literal ByteString
x) (Char8Literal ByteString
y) =
    case ByteString -> ByteString -> Ordering
forall a. Ord a => a -> a -> Ordering
compare ByteString
x ByteString
y of
      Ordering
LT -> OrderingF x y
forall k (x :: k) (y :: k). OrderingF x y
LTF
      Ordering
EQ -> OrderingF x y
forall k (x :: k). OrderingF x x
EQF
      Ordering
GT -> OrderingF x y
forall k (x :: k) (y :: k). OrderingF x y
GTF

instance Ord (StringLiteral si) where
  compare :: StringLiteral si -> StringLiteral si -> Ordering
compare StringLiteral si
x StringLiteral si
y = OrderingF si si -> Ordering
forall k (x :: k) (y :: k). OrderingF x y -> Ordering
toOrdering (StringLiteral si -> StringLiteral si -> OrderingF si si
forall k (ktp :: k -> Type) (x :: k) (y :: k).
OrdF ktp =>
ktp x -> ktp y -> OrderingF x y
compareF StringLiteral si
x StringLiteral si
y)

instance ShowF StringLiteral where
  showF :: StringLiteral tp -> String
showF (UnicodeLiteral Text
x) = Text -> String
forall a. Show a => a -> String
show Text
x
  showF (Char16Literal Word16String
x) = Word16String -> String
forall a. Show a => a -> String
show Word16String
x
  showF (Char8Literal ByteString
x) = ByteString -> String
forall a. Show a => a -> String
show ByteString
x

instance Show (StringLiteral si) where
  show :: StringLiteral si -> String
show = StringLiteral si -> String
forall k (f :: k -> Type) (tp :: k). ShowF f => f tp -> String
showF


instance HashableF StringLiteral where
  hashWithSaltF :: Int -> StringLiteral tp -> Int
hashWithSaltF Int
s (UnicodeLiteral Text
x) = Int -> Text -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
s (Int
1::Int)) Text
x
  hashWithSaltF Int
s (Char16Literal Word16String
x)  = Int -> Word16String -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
s (Int
2::Int)) Word16String
x
  hashWithSaltF Int
s (Char8Literal ByteString
x)   = Int -> ByteString -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
s (Int
3::Int)) ByteString
x

instance Hashable (StringLiteral si) where
  hashWithSalt :: Int -> StringLiteral si -> Int
hashWithSalt = Int -> StringLiteral si -> Int
forall k (f :: k -> Type) (tp :: k).
HashableF f =>
Int -> f tp -> Int
hashWithSaltF

stringLitLength :: StringLiteral si -> Integer
stringLitLength :: StringLiteral si -> Integer
stringLitLength (UnicodeLiteral Text
x) = Int -> Integer
forall a. Integral a => a -> Integer
toInteger (Text -> Int
T.length Text
x)
stringLitLength (Char16Literal Word16String
x)  = Int -> Integer
forall a. Integral a => a -> Integer
toInteger (Word16String -> Int
WS.length Word16String
x)
stringLitLength (Char8Literal ByteString
x)   = Int -> Integer
forall a. Integral a => a -> Integer
toInteger (ByteString -> Int
BS.length ByteString
x)

stringLitEmpty :: StringInfoRepr si -> StringLiteral si
stringLitEmpty :: StringInfoRepr si -> StringLiteral si
stringLitEmpty StringInfoRepr si
UnicodeRepr = Text -> StringLiteral Unicode
UnicodeLiteral Text
forall a. Monoid a => a
mempty
stringLitEmpty StringInfoRepr si
Char16Repr  = Word16String -> StringLiteral Char16
Char16Literal Word16String
forall a. Monoid a => a
mempty
stringLitEmpty StringInfoRepr si
Char8Repr   = ByteString -> StringLiteral Char8
Char8Literal ByteString
forall a. Monoid a => a
mempty

stringLitNull :: StringLiteral si -> Bool
stringLitNull :: StringLiteral si -> Bool
stringLitNull (UnicodeLiteral Text
x) = Text -> Bool
T.null Text
x
stringLitNull (Char16Literal Word16String
x)  = Word16String -> Bool
WS.null Word16String
x
stringLitNull (Char8Literal ByteString
x)   = ByteString -> Bool
BS.null ByteString
x

stringLitContains :: StringLiteral si -> StringLiteral si -> Bool
stringLitContains :: StringLiteral si -> StringLiteral si -> Bool
stringLitContains (UnicodeLiteral Text
x) (UnicodeLiteral Text
y) = Text -> Text -> Bool
T.isInfixOf Text
y Text
x
stringLitContains (Char16Literal Word16String
x) (Char16Literal Word16String
y) = Word16String -> Word16String -> Bool
WS.isInfixOf Word16String
y Word16String
x
stringLitContains (Char8Literal ByteString
x) (Char8Literal ByteString
y) = ByteString -> ByteString -> Bool
BS.isInfixOf ByteString
y ByteString
x

stringLitIsPrefixOf :: StringLiteral si -> StringLiteral si -> Bool
stringLitIsPrefixOf :: StringLiteral si -> StringLiteral si -> Bool
stringLitIsPrefixOf (UnicodeLiteral Text
x) (UnicodeLiteral Text
y) = Text -> Text -> Bool
T.isPrefixOf Text
x Text
y
stringLitIsPrefixOf (Char16Literal Word16String
x) (Char16Literal Word16String
y) = Word16String -> Word16String -> Bool
WS.isPrefixOf Word16String
x Word16String
y
stringLitIsPrefixOf (Char8Literal ByteString
x) (Char8Literal ByteString
y) = ByteString -> ByteString -> Bool
BS.isPrefixOf ByteString
x ByteString
y

stringLitIsSuffixOf :: StringLiteral si -> StringLiteral si -> Bool
stringLitIsSuffixOf :: StringLiteral si -> StringLiteral si -> Bool
stringLitIsSuffixOf (UnicodeLiteral Text
x) (UnicodeLiteral Text
y) = Text -> Text -> Bool
T.isSuffixOf Text
x Text
y
stringLitIsSuffixOf (Char16Literal Word16String
x) (Char16Literal Word16String
y) = Word16String -> Word16String -> Bool
WS.isSuffixOf Word16String
x Word16String
y
stringLitIsSuffixOf (Char8Literal ByteString
x) (Char8Literal ByteString
y) = ByteString -> ByteString -> Bool
BS.isSuffixOf ByteString
x ByteString
y

stringLitSubstring :: StringLiteral si -> Integer -> Integer -> StringLiteral si
stringLitSubstring :: StringLiteral si -> Integer -> Integer -> StringLiteral si
stringLitSubstring (UnicodeLiteral Text
x) Integer
len Integer
off =
  Text -> StringLiteral Unicode
UnicodeLiteral (Text -> StringLiteral Unicode) -> Text -> StringLiteral Unicode
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
T.take (Integer -> Int
forall a. Num a => Integer -> a
fromInteger Integer
len)  (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
T.drop (Integer -> Int
forall a. Num a => Integer -> a
fromInteger Integer
off) Text
x
stringLitSubstring (Char16Literal Word16String
x) Integer
len Integer
off =
  Word16String -> StringLiteral Char16
Char16Literal  (Word16String -> StringLiteral Char16)
-> Word16String -> StringLiteral Char16
forall a b. (a -> b) -> a -> b
$ Int -> Word16String -> Word16String
WS.take (Integer -> Int
forall a. Num a => Integer -> a
fromInteger Integer
len) (Word16String -> Word16String) -> Word16String -> Word16String
forall a b. (a -> b) -> a -> b
$ Int -> Word16String -> Word16String
WS.drop (Integer -> Int
forall a. Num a => Integer -> a
fromInteger Integer
off) Word16String
x
stringLitSubstring (Char8Literal ByteString
x) Integer
len Integer
off =
  ByteString -> StringLiteral Char8
Char8Literal   (ByteString -> StringLiteral Char8)
-> ByteString -> StringLiteral Char8
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
BS.take (Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
len) (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
BS.drop (Integer -> Int
forall a. Num a => Integer -> a
fromInteger Integer
off) ByteString
x

stringLitIndexOf :: StringLiteral si -> StringLiteral si -> Integer -> Integer
stringLitIndexOf :: StringLiteral si -> StringLiteral si -> Integer -> Integer
stringLitIndexOf (UnicodeLiteral Text
x) (UnicodeLiteral Text
y) Integer
k
   | Text -> Bool
T.null Text
y = Integer
0
   | Text -> Bool
T.null Text
b = -Integer
1
   | Bool
otherwise = Int -> Integer
forall a. Integral a => a -> Integer
toInteger (Text -> Int
T.length Text
a) Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Integer
k
  where (Text
a,Text
b) = Text -> Text -> (Text, Text)
T.breakOn Text
y (Int -> Text -> Text
T.drop (Integer -> Int
forall a. Num a => Integer -> a
fromInteger Integer
k) Text
x)

stringLitIndexOf (Char16Literal Word16String
x) (Char16Literal Word16String
y) Integer
k =
  case Word16String -> Word16String -> Maybe Int
WS.findSubstring Word16String
y (Int -> Word16String -> Word16String
WS.drop (Integer -> Int
forall a. Num a => Integer -> a
fromInteger Integer
k) Word16String
x) of
    Maybe Int
Nothing -> -Integer
1
    Just Int
n  -> Int -> Integer
forall a. Integral a => a -> Integer
toInteger Int
n Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Integer
k

stringLitIndexOf (Char8Literal ByteString
x) (Char8Literal ByteString
y) Integer
k =
  case ByteString -> ByteString -> Maybe Int
bsFindSubstring ByteString
y (Int -> ByteString -> ByteString
BS.drop (Integer -> Int
forall a. Num a => Integer -> a
fromInteger Integer
k) ByteString
x) of
    Maybe Int
Nothing -> -Integer
1
    Just Int
n  -> Int -> Integer
forall a. Integral a => a -> Integer
toInteger Int
n Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Integer
k

-- | Get the first index of a substring in another string,
--   or 'Nothing' if the string is not found.
--
--   Copy/pasted from the old `bytestring` implementation because it was
--   deprecated/removed for some reason.
bsFindSubstring :: BS.ByteString -- ^ String to search for.
              -> BS.ByteString -- ^ String to seach in.
              -> Maybe Int
bsFindSubstring :: ByteString -> ByteString -> Maybe Int
bsFindSubstring ByteString
pat ByteString
src
    | ByteString -> Bool
BS.null ByteString
pat Bool -> Bool -> Bool
&& ByteString -> Bool
BS.null ByteString
src = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
0
    | ByteString -> Bool
BS.null ByteString
b = Maybe Int
forall a. Maybe a
Nothing
    | Bool
otherwise = Int -> Maybe Int
forall a. a -> Maybe a
Just (ByteString -> Int
BS.length ByteString
a)
  where (ByteString
a, ByteString
b) = ByteString -> ByteString -> (ByteString, ByteString)
BS.breakSubstring ByteString
pat ByteString
src

stringLitBounds :: StringLiteral si -> Maybe (Int, Int)
stringLitBounds :: StringLiteral si -> Maybe (Int, Int)
stringLitBounds StringLiteral si
si =
  case StringLiteral si
si of
    UnicodeLiteral Text
t -> (Maybe (Int, Int) -> Char -> Maybe (Int, Int))
-> Maybe (Int, Int) -> Text -> Maybe (Int, Int)
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl' Maybe (Int, Int) -> Char -> Maybe (Int, Int)
forall a. Enum a => Maybe (Int, Int) -> a -> Maybe (Int, Int)
f Maybe (Int, Int)
forall a. Maybe a
Nothing Text
t
    Char16Literal Word16String
ws -> (Maybe (Int, Int) -> Word16 -> Maybe (Int, Int))
-> Maybe (Int, Int) -> Word16String -> Maybe (Int, Int)
forall a. (a -> Word16 -> a) -> a -> Word16String -> a
WS.foldl' Maybe (Int, Int) -> Word16 -> Maybe (Int, Int)
forall a. Enum a => Maybe (Int, Int) -> a -> Maybe (Int, Int)
f Maybe (Int, Int)
forall a. Maybe a
Nothing Word16String
ws
    Char8Literal ByteString
bs  -> (Maybe (Int, Int) -> Word8 -> Maybe (Int, Int))
-> Maybe (Int, Int) -> ByteString -> Maybe (Int, Int)
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
BS.foldl' Maybe (Int, Int) -> Word8 -> Maybe (Int, Int)
forall a. Enum a => Maybe (Int, Int) -> a -> Maybe (Int, Int)
f Maybe (Int, Int)
forall a. Maybe a
Nothing ByteString
bs

 where
 f :: Enum a =>  Maybe (Int,Int) -> a -> Maybe (Int, Int)
 f :: Maybe (Int, Int) -> a -> Maybe (Int, Int)
f Maybe (Int, Int)
Nothing a
c = (Int, Int) -> Maybe (Int, Int)
forall a. a -> Maybe a
Just (a -> Int
forall a. Enum a => a -> Int
fromEnum a
c, a -> Int
forall a. Enum a => a -> Int
fromEnum a
c)
 f (Just (Int
lo, Int
hi)) a
c = Int
lo' Int -> Maybe (Int, Int) -> Maybe (Int, Int)
`seq` Int
hi' Int -> Maybe (Int, Int) -> Maybe (Int, Int)
`seq` (Int, Int) -> Maybe (Int, Int)
forall a. a -> Maybe a
Just (Int
lo',Int
hi')
    where
    lo' :: Int
lo' = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
lo (a -> Int
forall a. Enum a => a -> Int
fromEnum a
c)
    hi' :: Int
hi' = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
hi (a -> Int
forall a. Enum a => a -> Int
fromEnum a
c)


instance Semigroup (StringLiteral si) where
  UnicodeLiteral Text
x <> :: StringLiteral si -> StringLiteral si -> StringLiteral si
<> UnicodeLiteral Text
y = Text -> StringLiteral Unicode
UnicodeLiteral (Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
y)
  Char16Literal Word16String
x  <> Char16Literal Word16String
y  = Word16String -> StringLiteral Char16
Char16Literal (Word16String
x Word16String -> Word16String -> Word16String
forall a. Semigroup a => a -> a -> a
<> Word16String
y)
  Char8Literal ByteString
x   <> Char8Literal ByteString
y   = ByteString -> StringLiteral Char8
Char8Literal (ByteString
x ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
y)

instance IsString (StringLiteral Unicode) where
  fromString :: String -> StringLiteral Unicode
fromString = Text -> StringLiteral Unicode
UnicodeLiteral (Text -> StringLiteral Unicode)
-> (String -> Text) -> String -> StringLiteral Unicode
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack