-- SPDX-FileCopyrightText: 2021 Oxhead Alpha
-- SPDX-License-Identifier: LicenseRef-MIT-OA

-- | Static analysis of Michelson code.

module Morley.Michelson.Analyzer
  ( AnalyzerRes (..)
  , analyze
  ) where

import Data.Default (def)
import Data.HashMap.Strict qualified as HM
import Data.List.NonEmpty qualified as NE
import Fmt (Buildable(..), Doc, blockMapF', hexF, nameF, (+|))

import Morley.Michelson.FailPattern
import Morley.Michelson.Text (MText)
import Morley.Michelson.Typed
import Morley.Util.Text (dquotes)

data AnalyzerRes = AnalyzerRes
  { AnalyzerRes -> HashMap MText Word
arConstStrings :: HashMap MText Word
  -- ^ All string constants and number of their occurrences.
  , AnalyzerRes -> HashMap ByteString Word
arConstBytes :: HashMap ByteString Word
  -- ^ All bytes constants and number of their occurrences.
  , AnalyzerRes -> HashMap MText Word
arErrorTags :: HashMap MText Word
  -- ^ Which strings are used as error tags and how many times.
  -- There is no notion of "error tag" in Michelson, so we use a heuristic
  -- to find out whether a string is an error tag. Specifically, we consider
  -- three patterns:
  -- 1. A constant string is pushed and then there is `FAILWITH` immediately.
  -- 2. A constant string is pushed, followed by `PAIR` instruction and then
  -- `FAILWITH`.
  -- 3. A constant pair is pushed where the first item is a string and then
  -- there is `FAILWITH.
  } deriving stock (Int -> AnalyzerRes -> ShowS
[AnalyzerRes] -> ShowS
AnalyzerRes -> String
(Int -> AnalyzerRes -> ShowS)
-> (AnalyzerRes -> String)
-> ([AnalyzerRes] -> ShowS)
-> Show AnalyzerRes
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> AnalyzerRes -> ShowS
showsPrec :: Int -> AnalyzerRes -> ShowS
$cshow :: AnalyzerRes -> String
show :: AnalyzerRes -> String
$cshowList :: [AnalyzerRes] -> ShowS
showList :: [AnalyzerRes] -> ShowS
Show, AnalyzerRes -> AnalyzerRes -> Bool
(AnalyzerRes -> AnalyzerRes -> Bool)
-> (AnalyzerRes -> AnalyzerRes -> Bool) -> Eq AnalyzerRes
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: AnalyzerRes -> AnalyzerRes -> Bool
== :: AnalyzerRes -> AnalyzerRes -> Bool
$c/= :: AnalyzerRes -> AnalyzerRes -> Bool
/= :: AnalyzerRes -> AnalyzerRes -> Bool
Eq)

instance Buildable AnalyzerRes where
  build :: AnalyzerRes -> Doc
build (AnalyzerRes
    (HashMap MText Word
-> [(Key (HashMap MText Word), Val (HashMap MText Word))]
forall t. ToPairs t => t -> [(Key t, Val t)]
toPairs -> [(Key (HashMap MText Word), Val (HashMap MText Word))]
constStrings)
    (HashMap ByteString Word
-> [(Key (HashMap ByteString Word), Val (HashMap ByteString Word))]
forall t. ToPairs t => t -> [(Key t, Val t)]
toPairs -> [(Key (HashMap ByteString Word), Val (HashMap ByteString Word))]
constBytes)
    (HashMap MText Word
-> [(Key (HashMap MText Word), Val (HashMap MText Word))]
forall t. ToPairs t => t -> [(Key t, Val t)]
toPairs -> [(Key (HashMap MText Word), Val (HashMap MText Word))]
errorTags)
    ) =
    Doc -> Doc -> Doc
forall a. Buildable a => Doc -> a -> Doc
nameF Doc
"String constants"
      ([(MText, Word)] -> Doc
buildStrings ([(MText, Word)] -> Doc) -> [(MText, Word)] -> Doc
forall a b. (a -> b) -> a -> b
$ [(MText, Word)] -> [(MText, Word)]
forall k. [(k, Word)] -> [(k, Word)]
sortByCount [(Key (HashMap MText Word), Val (HashMap MText Word))]
[(MText, Word)]
constStrings)
    Doc -> Doc -> Doc
forall b. FromDoc b => Doc -> Doc -> b
+|
    Doc -> Doc -> Doc
forall a. Buildable a => Doc -> a -> Doc
nameF Doc
"Bytes constants"
      ([(ByteString, Word)] -> Doc
buildBytes ([(ByteString, Word)] -> Doc) -> [(ByteString, Word)] -> Doc
forall a b. (a -> b) -> a -> b
$ [(ByteString, Word)] -> [(ByteString, Word)]
forall k. [(k, Word)] -> [(k, Word)]
sortByCount [(ByteString, Word)]
[(Key (HashMap ByteString Word), Val (HashMap ByteString Word))]
constBytes)
    Doc -> Doc -> Doc
forall b. FromDoc b => Doc -> Doc -> b
+|
    Doc -> Doc -> Doc
forall a. Buildable a => Doc -> a -> Doc
nameF Doc
"Error tags"
      ([(MText, Word)] -> Doc
buildStrings ([(MText, Word)] -> Doc) -> [(MText, Word)] -> Doc
forall a b. (a -> b) -> a -> b
$ [(MText, Word)] -> [(MText, Word)]
forall k. [(k, Word)] -> [(k, Word)]
sortByCount [(Key (HashMap MText Word), Val (HashMap MText Word))]
[(MText, Word)]
errorTags)
    Doc -> Doc -> Doc
forall b. FromDoc b => Doc -> Doc -> b
+|
    Doc -> [(MText, Word)] -> ([(MText, Word)] -> Doc) -> Doc
forall x.
Container x =>
Doc -> [(x, Word)] -> ([(x, Word)] -> Doc) -> Doc
longest Doc
"strings" [(Key (HashMap MText Word), Val (HashMap MText Word))]
[(MText, Word)]
constStrings [(MText, Word)] -> Doc
buildStrings
    Doc -> Doc -> Doc
forall b. FromDoc b => Doc -> Doc -> b
+|
    Doc -> [(ByteString, Word)] -> ([(ByteString, Word)] -> Doc) -> Doc
forall x.
Container x =>
Doc -> [(x, Word)] -> ([(x, Word)] -> Doc) -> Doc
longest Doc
"bytes" [(ByteString, Word)]
[(Key (HashMap ByteString Word), Val (HashMap ByteString Word))]
constBytes [(ByteString, Word)] -> Doc
buildBytes
    where
      buildStrings :: [(MText, Word)] -> Doc
buildStrings = (MText -> Doc) -> (Word -> Doc) -> [(MText, Word)] -> Doc
forall f k v.
(IsList f, Item f ~ (k, v)) =>
(k -> Doc) -> (v -> Doc) -> f -> Doc
blockMapF' (Doc -> Doc
forall a. (Semigroup a, IsString a) => a -> a
dquotes (Doc -> Doc) -> (MText -> Doc) -> MText -> Doc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. MText -> Doc
forall a. Buildable a => a -> Doc
build) Word -> Doc
forall a. Buildable a => a -> Doc
build
      buildBytes :: [(ByteString, Word)] -> Doc
buildBytes = (ByteString -> Doc) -> (Word -> Doc) -> [(ByteString, Word)] -> Doc
forall f k v.
(IsList f, Item f ~ (k, v)) =>
(k -> Doc) -> (v -> Doc) -> f -> Doc
blockMapF' (Doc -> Doc -> Doc
forall a. Monoid a => a -> a -> a
mappend Doc
"0x" (Doc -> Doc) -> (ByteString -> Doc) -> ByteString -> Doc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Doc
forall a. FormatAsHex a => a -> Doc
hexF) Word -> Doc
forall a. Buildable a => a -> Doc
build

      sortByCount :: [(k, Word)] -> [(k, Word)]
      sortByCount :: forall k. [(k, Word)] -> [(k, Word)]
sortByCount = ((k, Word) -> Word) -> [(k, Word)] -> [(k, Word)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortWith (k, Word) -> Word
forall a b. (a, b) -> b
snd

      sortByLength :: Container k => [(k, Word)] -> [(k, Word)]
      sortByLength :: forall k. Container k => [(k, Word)] -> [(k, Word)]
sortByLength = ((k, Word) -> Down Int) -> [(k, Word)] -> [(k, Word)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortWith (Int -> Down Int
forall a. a -> Down a
Down (Int -> Down Int) -> ((k, Word) -> Int) -> (k, Word) -> Down Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. k -> Int
forall i a.
(Integral i, Container a,
 DefaultToInt (IsIntSubType Length i) i) =>
a -> i
length (k -> Int) -> ((k, Word) -> k) -> (k, Word) -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (k, Word) -> k
forall a b. (a, b) -> a
fst)

      longest ::
        Container x =>
        Doc -> [(x, Word)] -> ([(x, Word)] -> Doc) -> Doc
      longest :: forall x.
Container x =>
Doc -> [(x, Word)] -> ([(x, Word)] -> Doc) -> Doc
longest Doc
name [(x, Word)]
items [(x, Word)] -> Doc
builder
        | [(x, Word)] -> Int
forall i a.
(Integral i, Container a,
 DefaultToInt (IsIntSubType Length i) i) =>
a -> i
length [(x, Word)]
items Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
6 =
          Doc -> Doc -> Doc
forall a. Buildable a => Doc -> a -> Doc
nameF (Doc
"Longest " Doc -> Doc -> Doc
forall a. Semigroup a => a -> a -> a
<> Doc
name) (Doc -> Doc) -> Doc -> Doc
forall a b. (a -> b) -> a -> b
$ [(x, Word)] -> Doc
builder ([(x, Word)] -> Doc) -> [(x, Word)] -> Doc
forall a b. (a -> b) -> a -> b
$ Int -> [(x, Word)] -> [(x, Word)]
forall a. Int -> [a] -> [a]
take Int
4 ([(x, Word)] -> [(x, Word)]) -> [(x, Word)] -> [(x, Word)]
forall a b. (a -> b) -> a -> b
$ [(x, Word)] -> [(x, Word)]
forall k. Container k => [(k, Word)] -> [(k, Word)]
sortByLength [(x, Word)]
items
        | Bool
otherwise = Doc
forall a. Monoid a => a
mempty

instance Semigroup AnalyzerRes where
  AnalyzerRes
ar1 <> :: AnalyzerRes -> AnalyzerRes -> AnalyzerRes
<> AnalyzerRes
ar2 = AnalyzerRes
    { arConstStrings :: HashMap MText Word
arConstStrings = AnalyzerRes -> HashMap MText Word
arConstStrings AnalyzerRes
ar1 HashMap MText Word -> HashMap MText Word -> HashMap MText Word
forall {k} {v}.
(Hashable k, Num v) =>
HashMap k v -> HashMap k v -> HashMap k v
<+> AnalyzerRes -> HashMap MText Word
arConstStrings AnalyzerRes
ar2
    , arConstBytes :: HashMap ByteString Word
arConstBytes = AnalyzerRes -> HashMap ByteString Word
arConstBytes AnalyzerRes
ar1 HashMap ByteString Word
-> HashMap ByteString Word -> HashMap ByteString Word
forall {k} {v}.
(Hashable k, Num v) =>
HashMap k v -> HashMap k v -> HashMap k v
<+> AnalyzerRes -> HashMap ByteString Word
arConstBytes AnalyzerRes
ar2
    , arErrorTags :: HashMap MText Word
arErrorTags = AnalyzerRes -> HashMap MText Word
arErrorTags AnalyzerRes
ar1 HashMap MText Word -> HashMap MText Word -> HashMap MText Word
forall {k} {v}.
(Hashable k, Num v) =>
HashMap k v -> HashMap k v -> HashMap k v
<+> AnalyzerRes -> HashMap MText Word
arErrorTags AnalyzerRes
ar2
    }
    where
      HashMap k v
m1 <+> :: HashMap k v -> HashMap k v -> HashMap k v
<+> HashMap k v
m2 = (v -> v -> v) -> HashMap k v -> HashMap k v -> HashMap k v
forall k v.
(Eq k, Hashable k) =>
(v -> v -> v) -> HashMap k v -> HashMap k v -> HashMap k v
HM.unionWith v -> v -> v
forall a. Num a => a -> a -> a
(+) HashMap k v
m1 HashMap k v
m2

instance Monoid AnalyzerRes where
  mempty :: AnalyzerRes
mempty = AnalyzerRes
    { arConstStrings :: HashMap MText Word
arConstStrings = HashMap MText Word
forall a. Monoid a => a
mempty
    , arConstBytes :: HashMap ByteString Word
arConstBytes = HashMap ByteString Word
forall a. Monoid a => a
mempty
    , arErrorTags :: HashMap MText Word
arErrorTags = HashMap MText Word
forall a. Monoid a => a
mempty
    }

-- | Statically analyze an instruction. Typed representation is used
-- because it's easier to analyze. It means that we can't analyze
-- ill-typed contracts, but hopefully it's not a serious limitation.
analyze :: Instr inp out -> AnalyzerRes
analyze :: forall (inp :: [T]) (out :: [T]). Instr inp out -> AnalyzerRes
analyze = DfsSettings (Writer AnalyzerRes)
-> (forall (inp :: [T]) (out :: [T]). Instr inp out -> AnalyzerRes)
-> Instr inp out
-> AnalyzerRes
forall x (inp :: [T]) (out :: [T]).
Monoid x =>
DfsSettings (Writer x)
-> (forall (i :: [T]) (o :: [T]). Instr i o -> x)
-> Instr inp out
-> x
dfsFoldInstr DfsSettings (Writer AnalyzerRes)
forall a. Default a => a
def{ dsGoToValues :: Bool
dsGoToValues = Bool
True } Instr i o -> AnalyzerRes
forall (inp :: [T]) (out :: [T]). Instr inp out -> AnalyzerRes
step (Instr inp out -> AnalyzerRes)
-> (Instr inp out -> Instr inp out) -> Instr inp out -> AnalyzerRes
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Instr inp out -> Instr inp out
forall (inp :: [T]) (out :: [T]). Instr inp out -> Instr inp out
linearizeLeftDeep
  where
    step :: forall i o. Instr i o -> AnalyzerRes
    step :: forall (inp :: [T]) (out :: [T]). Instr inp out -> AnalyzerRes
step Instr i o
i = Instr i o -> AnalyzerRes
forall (inp :: [T]) (out :: [T]). Instr inp out -> AnalyzerRes
analyzeConstants Instr i o
i AnalyzerRes -> AnalyzerRes -> AnalyzerRes
forall a. Semigroup a => a -> a -> a
<> Instr i o -> AnalyzerRes
forall (inp :: [T]) (out :: [T]). Instr inp out -> AnalyzerRes
analyzeErrorTags Instr i o
i

    countItems :: (Ord i, Hashable i) => [i] -> HashMap i Word
    countItems :: forall i. (Ord i, Hashable i) => [i] -> HashMap i Word
countItems = [(i, Word)] -> HashMap i Word
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HM.fromList ([(i, Word)] -> HashMap i Word)
-> ([i] -> [(i, Word)]) -> [i] -> HashMap i Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (NonEmpty i -> (i, Word)) -> [NonEmpty i] -> [(i, Word)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map NonEmpty i -> (i, Word)
forall {a}. NonEmpty a -> (a, Word)
f ([NonEmpty i] -> [(i, Word)])
-> ([i] -> [NonEmpty i]) -> [i] -> [(i, Word)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [i] -> [NonEmpty i]
forall (f :: * -> *) a. (Foldable f, Eq a) => f a -> [NonEmpty a]
NE.group ([i] -> [NonEmpty i]) -> ([i] -> [i]) -> [i] -> [NonEmpty i]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [i] -> [i]
forall a. Ord a => [a] -> [a]
sort
      where
        f :: NonEmpty a -> (a, Word)
f NonEmpty a
ne = (NonEmpty a -> a
forall a. NonEmpty a -> a
head NonEmpty a
ne, forall i a.
(Integral i, Container a,
 DefaultToInt (IsIntSubType Length i) i) =>
a -> i
length @Word NonEmpty a
ne)

    analyzeConstants :: forall i o. Instr i o -> AnalyzerRes
    analyzeConstants :: forall (inp :: [T]) (out :: [T]). Instr inp out -> AnalyzerRes
analyzeConstants = \case
      PUSH Value' Instr t
v -> AnalyzerRes
forall a. Monoid a => a
mempty
        { arConstStrings :: HashMap MText Word
arConstStrings = [MText] -> HashMap MText Word
forall i. (Ord i, Hashable i) => [i] -> HashMap i Word
countItems ([MText] -> HashMap MText Word) -> [MText] -> HashMap MText Word
forall a b. (a -> b) -> a -> b
$ (forall (t' :: T). Value t' -> Maybe MText)
-> Value' Instr t -> [MText]
forall (t :: T) a.
(forall (t' :: T). Value t' -> Maybe a) -> Value t -> [a]
allAtomicValues Value t' -> Maybe MText
forall (t' :: T). Value t' -> Maybe MText
isStringValue Value' Instr t
v
        , arConstBytes :: HashMap ByteString Word
arConstBytes = [ByteString] -> HashMap ByteString Word
forall i. (Ord i, Hashable i) => [i] -> HashMap i Word
countItems ([ByteString] -> HashMap ByteString Word)
-> [ByteString] -> HashMap ByteString Word
forall a b. (a -> b) -> a -> b
$ (forall (t' :: T). Value t' -> Maybe ByteString)
-> Value' Instr t -> [ByteString]
forall (t :: T) a.
(forall (t' :: T). Value t' -> Maybe a) -> Value t -> [a]
allAtomicValues Value t' -> Maybe ByteString
forall (t' :: T). Value t' -> Maybe ByteString
isBytesValue Value' Instr t
v
        }
      Instr i o
_ -> AnalyzerRes
forall a. Monoid a => a
mempty

    analyzeErrorTags :: forall i o. Instr i o -> AnalyzerRes
    analyzeErrorTags :: forall (inp :: [T]) (out :: [T]). Instr inp out -> AnalyzerRes
analyzeErrorTags Instr i o
i
      | Just TypicalFailWith
tfw <- Instr i o -> Maybe TypicalFailWith
forall (inp :: [T]) (out :: [T]).
Instr inp out -> Maybe TypicalFailWith
isTypicalFailWith Instr i o
i =
          AnalyzerRes
forall a. Monoid a => a
mempty { arErrorTags :: HashMap MText Word
arErrorTags = OneItem (HashMap MText Word) -> HashMap MText Word
forall x. One x => OneItem x -> x
one (TypicalFailWith -> MText
typicalFailWithTag TypicalFailWith
tfw, Word
1) }
      | Bool
otherwise = AnalyzerRes
forall a. Monoid a => a
mempty