-- SPDX-FileCopyrightText: 2020 Tocqueville Group
--
-- SPDX-License-Identifier: LicenseRef-MIT-TQ

-- | Static analysis of Michelson code.

module Michelson.Analyzer
  ( AnalyzerRes (..)
  , analyze
  ) where

import Data.Default (def)
import qualified Data.HashMap.Strict as HM
import qualified Data.List.NonEmpty as NE
import Fmt (Buildable(..), Builder, blockMapF', hexF, nameF, (+|))

import Michelson.FailPattern
import Michelson.Text (MText)
import Michelson.Typed

data AnalyzerRes = AnalyzerRes
  { AnalyzerRes -> HashMap MText Word
arConstStrings :: HashMap MText Word
  -- ^ All string constants and number of their occurrences.
  , AnalyzerRes -> HashMap ByteString Word
arConstBytes :: HashMap ByteString Word
  -- ^ All bytes constants and number of their occurrences.
  , AnalyzerRes -> HashMap MText Word
arErrorTags :: HashMap MText Word
  -- ^ Which strings are used as error tags and how many times.
  -- There is no notion of "error tag" in Michelson, so we use a heuristic
  -- to find out whether a string is an error tag. Specifically, we consider
  -- three patterns:
  -- 1. A constant string is pushed and then there is `FAILWITH` immediately.
  -- 2. A constant string is pushed, followed by `PAIR` instruction and then
  -- `FAILWITH`.
  -- 3. A constant pair is pushed where the first item is a string and then
  -- there is `FAILWITH.
  } deriving stock (Int -> AnalyzerRes -> ShowS
[AnalyzerRes] -> ShowS
AnalyzerRes -> String
(Int -> AnalyzerRes -> ShowS)
-> (AnalyzerRes -> String)
-> ([AnalyzerRes] -> ShowS)
-> Show AnalyzerRes
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [AnalyzerRes] -> ShowS
$cshowList :: [AnalyzerRes] -> ShowS
show :: AnalyzerRes -> String
$cshow :: AnalyzerRes -> String
showsPrec :: Int -> AnalyzerRes -> ShowS
$cshowsPrec :: Int -> AnalyzerRes -> ShowS
Show, AnalyzerRes -> AnalyzerRes -> Bool
(AnalyzerRes -> AnalyzerRes -> Bool)
-> (AnalyzerRes -> AnalyzerRes -> Bool) -> Eq AnalyzerRes
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: AnalyzerRes -> AnalyzerRes -> Bool
$c/= :: AnalyzerRes -> AnalyzerRes -> Bool
== :: AnalyzerRes -> AnalyzerRes -> Bool
$c== :: AnalyzerRes -> AnalyzerRes -> Bool
Eq)

instance Buildable AnalyzerRes where
  build :: AnalyzerRes -> Builder
build (AnalyzerRes
    (HashMap MText Word
-> [(Key (HashMap MText Word), Val (HashMap MText Word))]
forall t. ToPairs t => t -> [(Key t, Val t)]
toPairs -> [(Key (HashMap MText Word), Val (HashMap MText Word))]
constStrings)
    (HashMap ByteString Word
-> [(Key (HashMap ByteString Word), Val (HashMap ByteString Word))]
forall t. ToPairs t => t -> [(Key t, Val t)]
toPairs -> [(Key (HashMap ByteString Word), Val (HashMap ByteString Word))]
constBytes)
    (HashMap MText Word
-> [(Key (HashMap MText Word), Val (HashMap MText Word))]
forall t. ToPairs t => t -> [(Key t, Val t)]
toPairs -> [(Key (HashMap MText Word), Val (HashMap MText Word))]
errorTags)
    ) =
    Builder -> Builder -> Builder
nameF "String constants"
      ([(MText, Word)] -> Builder
buildStrings ([(MText, Word)] -> Builder) -> [(MText, Word)] -> Builder
forall a b. (a -> b) -> a -> b
$ [(MText, Word)] -> [(MText, Word)]
forall k. [(k, Word)] -> [(k, Word)]
sortByCount [(Key (HashMap MText Word), Val (HashMap MText Word))]
[(MText, Word)]
constStrings)
    Builder -> Builder -> Builder
forall b. FromBuilder b => Builder -> Builder -> b
+|
    Builder -> Builder -> Builder
nameF "Bytes constants"
      ([(ByteString, Word)] -> Builder
buildBytes ([(ByteString, Word)] -> Builder)
-> [(ByteString, Word)] -> Builder
forall a b. (a -> b) -> a -> b
$ [(ByteString, Word)] -> [(ByteString, Word)]
forall k. [(k, Word)] -> [(k, Word)]
sortByCount [(ByteString, Word)]
[(Key (HashMap ByteString Word), Val (HashMap ByteString Word))]
constBytes)
    Builder -> Builder -> Builder
forall b. FromBuilder b => Builder -> Builder -> b
+|
    Builder -> Builder -> Builder
nameF "Error tags"
      ([(MText, Word)] -> Builder
buildStrings ([(MText, Word)] -> Builder) -> [(MText, Word)] -> Builder
forall a b. (a -> b) -> a -> b
$ [(MText, Word)] -> [(MText, Word)]
forall k. [(k, Word)] -> [(k, Word)]
sortByCount [(Key (HashMap MText Word), Val (HashMap MText Word))]
[(MText, Word)]
errorTags)
    Builder -> Builder -> Builder
forall b. FromBuilder b => Builder -> Builder -> b
+|
    Builder
-> [(MText, Word)] -> ([(MText, Word)] -> Builder) -> Builder
forall x.
Container x =>
Builder -> [(x, Word)] -> ([(x, Word)] -> Builder) -> Builder
longest "strings" [(Key (HashMap MText Word), Val (HashMap MText Word))]
[(MText, Word)]
constStrings [(MText, Word)] -> Builder
buildStrings
    Builder -> Builder -> Builder
forall b. FromBuilder b => Builder -> Builder -> b
+|
    Builder
-> [(ByteString, Word)]
-> ([(ByteString, Word)] -> Builder)
-> Builder
forall x.
Container x =>
Builder -> [(x, Word)] -> ([(x, Word)] -> Builder) -> Builder
longest "bytes" [(ByteString, Word)]
[(Key (HashMap ByteString Word), Val (HashMap ByteString Word))]
constBytes [(ByteString, Word)] -> Builder
buildBytes
    where
      inQuotes :: (t -> a) -> t -> a
inQuotes toBuilder :: t -> a
toBuilder x :: t
x = "\"" a -> a -> a
forall a. Semigroup a => a -> a -> a
<> t -> a
toBuilder t
x a -> a -> a
forall a. Semigroup a => a -> a -> a
<> "\""

      buildStrings :: [(MText, Word)] -> Builder
buildStrings = (MText -> Builder)
-> (Word -> Builder) -> [(MText, Word)] -> Builder
forall t k v.
(IsList t, Item t ~ (k, v)) =>
(k -> Builder) -> (v -> Builder) -> t -> Builder
blockMapF' ((MText -> Builder) -> MText -> Builder
forall a t. (Semigroup a, IsString a) => (t -> a) -> t -> a
inQuotes MText -> Builder
forall p. Buildable p => p -> Builder
build) Word -> Builder
forall p. Buildable p => p -> Builder
build
      buildBytes :: [(ByteString, Word)] -> Builder
buildBytes = (ByteString -> Builder)
-> (Word -> Builder) -> [(ByteString, Word)] -> Builder
forall t k v.
(IsList t, Item t ~ (k, v)) =>
(k -> Builder) -> (v -> Builder) -> t -> Builder
blockMapF' (Builder -> Builder -> Builder
forall a. Monoid a => a -> a -> a
mappend "0x" (Builder -> Builder)
-> (ByteString -> Builder) -> ByteString -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Builder
forall a. FormatAsHex a => a -> Builder
hexF) Word -> Builder
forall p. Buildable p => p -> Builder
build

      sortByCount :: [(k, Word)] -> [(k, Word)]
      sortByCount :: [(k, Word)] -> [(k, Word)]
sortByCount = ((k, Word) -> Word) -> [(k, Word)] -> [(k, Word)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortWith (k, Word) -> Word
forall a b. (a, b) -> b
snd

      sortByLength :: Container k => [(k, Word)] -> [(k, Word)]
      sortByLength :: [(k, Word)] -> [(k, Word)]
sortByLength = ((k, Word) -> Down Int) -> [(k, Word)] -> [(k, Word)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortWith (Int -> Down Int
forall a. a -> Down a
Down (Int -> Down Int) -> ((k, Word) -> Int) -> (k, Word) -> Down Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. k -> Int
forall t. Container t => t -> Int
length (k -> Int) -> ((k, Word) -> k) -> (k, Word) -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (k, Word) -> k
forall a b. (a, b) -> a
fst)

      longest ::
        Container x =>
        Builder -> [(x, Word)] -> ([(x, Word)] -> Builder) -> Builder
      longest :: Builder -> [(x, Word)] -> ([(x, Word)] -> Builder) -> Builder
longest name :: Builder
name items :: [(x, Word)]
items builder :: [(x, Word)] -> Builder
builder
        | [(x, Word)] -> Int
forall t. Container t => t -> Int
length [(x, Word)]
items Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 6 =
          Builder -> Builder -> Builder
nameF ("Longest " Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
name) (Builder -> Builder) -> Builder -> Builder
forall a b. (a -> b) -> a -> b
$ [(x, Word)] -> Builder
builder ([(x, Word)] -> Builder) -> [(x, Word)] -> Builder
forall a b. (a -> b) -> a -> b
$ Int -> [(x, Word)] -> [(x, Word)]
forall a. Int -> [a] -> [a]
take 4 ([(x, Word)] -> [(x, Word)]) -> [(x, Word)] -> [(x, Word)]
forall a b. (a -> b) -> a -> b
$ [(x, Word)] -> [(x, Word)]
forall k. Container k => [(k, Word)] -> [(k, Word)]
sortByLength [(x, Word)]
items
        | Bool
otherwise = Builder
forall a. Monoid a => a
mempty

instance Semigroup AnalyzerRes where
  ar1 :: AnalyzerRes
ar1 <> :: AnalyzerRes -> AnalyzerRes -> AnalyzerRes
<> ar2 :: AnalyzerRes
ar2 = $WAnalyzerRes :: HashMap MText Word
-> HashMap ByteString Word -> HashMap MText Word -> AnalyzerRes
AnalyzerRes
    { arConstStrings :: HashMap MText Word
arConstStrings = AnalyzerRes -> HashMap MText Word
arConstStrings AnalyzerRes
ar1 HashMap MText Word -> HashMap MText Word -> HashMap MText Word
forall k v.
(Eq k, Hashable k, Num v) =>
HashMap k v -> HashMap k v -> HashMap k v
<+> AnalyzerRes -> HashMap MText Word
arConstStrings AnalyzerRes
ar2
    , arConstBytes :: HashMap ByteString Word
arConstBytes = AnalyzerRes -> HashMap ByteString Word
arConstBytes AnalyzerRes
ar1 HashMap ByteString Word
-> HashMap ByteString Word -> HashMap ByteString Word
forall k v.
(Eq k, Hashable k, Num v) =>
HashMap k v -> HashMap k v -> HashMap k v
<+> AnalyzerRes -> HashMap ByteString Word
arConstBytes AnalyzerRes
ar2
    , arErrorTags :: HashMap MText Word
arErrorTags = AnalyzerRes -> HashMap MText Word
arErrorTags AnalyzerRes
ar1 HashMap MText Word -> HashMap MText Word -> HashMap MText Word
forall k v.
(Eq k, Hashable k, Num v) =>
HashMap k v -> HashMap k v -> HashMap k v
<+> AnalyzerRes -> HashMap MText Word
arErrorTags AnalyzerRes
ar2
    }
    where
      m1 :: HashMap k v
m1 <+> :: HashMap k v -> HashMap k v -> HashMap k v
<+> m2 :: HashMap k v
m2 = (v -> v -> v) -> HashMap k v -> HashMap k v -> HashMap k v
forall k v.
(Eq k, Hashable k) =>
(v -> v -> v) -> HashMap k v -> HashMap k v -> HashMap k v
HM.unionWith v -> v -> v
forall a. Num a => a -> a -> a
(+) HashMap k v
m1 HashMap k v
m2

instance Monoid AnalyzerRes where
  mempty :: AnalyzerRes
mempty = $WAnalyzerRes :: HashMap MText Word
-> HashMap ByteString Word -> HashMap MText Word -> AnalyzerRes
AnalyzerRes
    { arConstStrings :: HashMap MText Word
arConstStrings = HashMap MText Word
forall a. Monoid a => a
mempty
    , arConstBytes :: HashMap ByteString Word
arConstBytes = HashMap ByteString Word
forall a. Monoid a => a
mempty
    , arErrorTags :: HashMap MText Word
arErrorTags = HashMap MText Word
forall a. Monoid a => a
mempty
    }

-- | Statically analyze an instruction. Typed representation is used
-- because it's easier to analyze. It means that we can't analyze
-- ill-typed contracts, but hopefully it's not a serious limitation.
analyze :: Instr inp out -> AnalyzerRes
analyze :: Instr inp out -> AnalyzerRes
analyze = DfsSettings AnalyzerRes
-> (forall (i :: [T]) (o :: [T]). Instr i o -> AnalyzerRes)
-> Instr inp out
-> AnalyzerRes
forall x (inp :: [T]) (out :: [T]).
Semigroup x =>
DfsSettings x
-> (forall (i :: [T]) (o :: [T]). Instr i o -> x)
-> Instr inp out
-> x
dfsFoldInstr DfsSettings AnalyzerRes
forall a. Default a => a
def{ dsGoToValues :: Bool
dsGoToValues = Bool
True } forall (i :: [T]) (o :: [T]). Instr i o -> AnalyzerRes
step (Instr inp out -> AnalyzerRes)
-> (Instr inp out -> Instr inp out) -> Instr inp out -> AnalyzerRes
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Instr inp out -> Instr inp out
forall (inp :: [T]) (out :: [T]). Instr inp out -> Instr inp out
linearizeLeftDeep
  where
    step :: forall i o. Instr i o -> AnalyzerRes
    step :: Instr i o -> AnalyzerRes
step i :: Instr i o
i = Instr i o -> AnalyzerRes
forall (i :: [T]) (o :: [T]). Instr i o -> AnalyzerRes
analyzeConstants Instr i o
i AnalyzerRes -> AnalyzerRes -> AnalyzerRes
forall a. Semigroup a => a -> a -> a
<> Instr i o -> AnalyzerRes
forall (i :: [T]) (o :: [T]). Instr i o -> AnalyzerRes
analyzeErrorTags Instr i o
i

    countItems :: (Ord i, Hashable i) => [i] -> HashMap i Word
    countItems :: [i] -> HashMap i Word
countItems = [(i, Word)] -> HashMap i Word
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HM.fromList ([(i, Word)] -> HashMap i Word)
-> ([i] -> [(i, Word)]) -> [i] -> HashMap i Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (NonEmpty i -> (i, Word)) -> [NonEmpty i] -> [(i, Word)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map NonEmpty i -> (i, Word)
forall b a. Num b => NonEmpty a -> (a, b)
f ([NonEmpty i] -> [(i, Word)])
-> ([i] -> [NonEmpty i]) -> [i] -> [(i, Word)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [i] -> [NonEmpty i]
forall (f :: * -> *) a. (Foldable f, Eq a) => f a -> [NonEmpty a]
NE.group ([i] -> [NonEmpty i]) -> ([i] -> [i]) -> [i] -> [NonEmpty i]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [i] -> [i]
forall a. Ord a => [a] -> [a]
sort
      where
        f :: NonEmpty a -> (a, b)
f ne :: NonEmpty a
ne = (NonEmpty a -> a
forall a. NonEmpty a -> a
head NonEmpty a
ne, Int -> b
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> b) -> Int -> b
forall a b. (a -> b) -> a -> b
$ NonEmpty a -> Int
forall t. Container t => t -> Int
length NonEmpty a
ne)

    analyzeConstants :: forall i o. Instr i o -> AnalyzerRes
    analyzeConstants :: Instr i o -> AnalyzerRes
analyzeConstants = \case
      PUSH v :: Value' Instr t
v -> AnalyzerRes
forall a. Monoid a => a
mempty
        { arConstStrings :: HashMap MText Word
arConstStrings = [MText] -> HashMap MText Word
forall i. (Ord i, Hashable i) => [i] -> HashMap i Word
countItems ([MText] -> HashMap MText Word) -> [MText] -> HashMap MText Word
forall a b. (a -> b) -> a -> b
$ (forall (t' :: T). Value t' -> Maybe MText)
-> Value' Instr t -> [MText]
forall (t :: T) a.
(forall (t' :: T). Value t' -> Maybe a) -> Value t -> [a]
allAtomicValues forall (t' :: T). Value t' -> Maybe MText
isStringValue Value' Instr t
v
        , arConstBytes :: HashMap ByteString Word
arConstBytes = [ByteString] -> HashMap ByteString Word
forall i. (Ord i, Hashable i) => [i] -> HashMap i Word
countItems ([ByteString] -> HashMap ByteString Word)
-> [ByteString] -> HashMap ByteString Word
forall a b. (a -> b) -> a -> b
$ (forall (t' :: T). Value t' -> Maybe ByteString)
-> Value' Instr t -> [ByteString]
forall (t :: T) a.
(forall (t' :: T). Value t' -> Maybe a) -> Value t -> [a]
allAtomicValues forall (t' :: T). Value t' -> Maybe ByteString
isBytesValue Value' Instr t
v
        }
      _ -> AnalyzerRes
forall a. Monoid a => a
mempty

    analyzeErrorTags :: forall i o. Instr i o -> AnalyzerRes
    analyzeErrorTags :: Instr i o -> AnalyzerRes
analyzeErrorTags i :: Instr i o
i
      | Just tfw :: TypicalFailWith
tfw <- Instr i o -> Maybe TypicalFailWith
forall (inp :: [T]) (out :: [T]).
Instr inp out -> Maybe TypicalFailWith
isTypicalFailWith Instr i o
i =
          AnalyzerRes
forall a. Monoid a => a
mempty { arErrorTags :: HashMap MText Word
arErrorTags = OneItem (HashMap MText Word) -> HashMap MText Word
forall x. One x => OneItem x -> x
one (TypicalFailWith -> MText
typicalFailWithTag TypicalFailWith
tfw, 1) }
      | Bool
otherwise = AnalyzerRes
forall a. Monoid a => a
mempty