-- SPDX-FileCopyrightText: 2021 Tocqueville Group
--
-- SPDX-License-Identifier: LicenseRef-MIT-TQ

{- |
String interpolation quasi-quoters. The most basic version is 'i', it returns anything that
has a 'Fmt.FromBuilder' instance: 'Text', 'ByteString', 'String', 'Builder'.

In some cases, it is more convenient to return a lazy @Text@ 'Builder', in that case 'ib', which
returns a non-polymorphic result, can be helpful, for instance, if you want to use one interpolated
string inside another one. Using 'Builder' is also more efficient in this case.

>>> let splice = [i|some text|]
>>> [i|larger text with #{splice}|] :: Text
...
... error:
... Ambiguous type variable ...
...

>>> let splice = [ib|some text|]
>>> [i|larger text with #{splice}|] :: Text
"larger text with some text"

There are also unindenting versions, i.e. those stripping the longest common indentation from each
line (note those do not consider indentation inside splices!), and trimming versions, i.e.
those that remove whitespace-only lines from beginning and end of the quote.

The mnemonic is @i@nterpolate @t@rimming @u@nindenting returning @b@uilder, i.e. 'itub' is
the trimming, unindenting version returning 'Builder'.

Versions not interpreting Haskell escape sequences additionally start with @l@, e.g. 'litu' is
@l@iteral @i@nterpolation @t@trimming @u@nindenting.

>>> [i|\955\x1F600\\|]
λ😀\

>>> [li|\955\x1F600\\|]
\955\x1F600\\

Splices are specified in the form @#{variableName}@. Note that expressions are not supported.
You can add @\@ before @#@, e.g. @\#{variableName}@, to interpret it as literal text. All the usual
Haskell string escapes also work, unless using "literal" versions. A warning will be issued if an
escape is not recognized, however the code will still compile, ignoring the backslash, e.g.
@\{@ will be treated as @{@.

All splices will be indented exactly to their column position in the original text, e.g.

>>> let splice = "multi\nline" :: Text
>>> [i|Some text #{splice}|]
Some text multi
          line

Multi-line splices will not automatically add any newlines after them, be mindful of that:

>>> let splice = "multi\nline" :: Text
>>> [i|Some text #{splice} trailing text|]
Some text multi
          line trailing text

If you wish to avoid that, either include the final newline in the splice explicitly (but be aware
that indentation of the trailing text won't be auto-adjusted, not even the leading spaces are
removed!), or include it in the quote:

>>> let splice = "multi\nline" :: Text
>>> :{
[itu|
  Some text #{splice}
  trailing text
  |]
:}
Some text multi
          line
trailing text

>>> let splice = "multi\nline\n" :: Text
>>> :{
[itu|
  Some text
    Some indented text #{splice} trailing text
  |]
:}
Some text
  Some indented text multi
                     line
 trailing text

Empty lines are never indented:

>>> let splice = "multi\n\nline" :: Text
>>> :{
print [itu|
  Some text #{splice}
  |]
:}
"Some text multi\n\n          line"

Unindenting versions will drop the first newline if the first line is empty, i.e.

>>> :{
print [iu|
There will be no leading newline here, but there will be a trailing one.
|]
:}
"There will be no leading newline here, but there will be a trailing one.\n"

but

>>> :{
print [iu|There will be a newline
here
|]
:}
"There will be a newline\nhere\n"

Unindent does not consider empty lines for finding common indentation, but it does consider
whitespace-only lines. As a result, one can control overall indentation by the indentation of the
last line:

>>> :{
let splice = "multi\nline" :: Text
in [itu|
    Some text #{splice}
    trailing text
  |]
:}
  Some text multi
            line
  trailing text
-}
module Morley.Util.Interpolate (
    i
  , ib
  , iu
  , iub
  , it
  , itb
  , itu
  , itub
  , li
  , lib
  , liu
  , liub
  , lit
  , litb
  , litu
  , litub
  ) where

import Prelude hiding (lift)

import Data.Char (isSpace)
import qualified Data.List as List
import qualified Data.Set as S
import Data.Text.Internal.Builder (Builder, fromLazyText, toLazyText)
import qualified Data.Text.Lazy as TL
import Fmt (build, fmt)
import Language.Haskell.TH (Exp, Q, lookupValueName, reportWarning, varE)
import Language.Haskell.TH.Quote (QuasiQuoter(..))
import Text.ParserCombinators.ReadP (readP_to_S)
import Text.Read.Lex (lexChar)

-- | Datatype encoding transformations we apply to a quoter input and output.
-- Note the order is important! We first unescape, then unindent, then trim.
data Transformation
  = Unescaping
  | Unindenting
  | Trimming
  | Polymorphic
  deriving stock (Eq, Ord, Enum, Bounded)

-- | A 'Set' of 'Transformation'
type Transformations = Set Transformation

-- | Actual quasi-quoters
i, ib, iu, iub, it, itb, itu, itub, li, lib, liu, liub, lit, litb, litu, litub :: QuasiQuoter

-- | 'Transformations' singletons
trimming, unindenting, unescaping, polymorphic :: Transformations
trimming = S.singleton Trimming
unindenting = S.singleton Unindenting
unescaping = S.singleton Unescaping
polymorphic = S.singleton Polymorphic

lib = mkQuoter mempty
liub = mkQuoter unindenting
litb = mkQuoter trimming
litub = mkQuoter $ trimming <> unindenting

ib = mkQuoter unescaping
iub = mkQuoter $ unindenting <> unescaping
itb = mkQuoter $ trimming <> unescaping
itub = mkQuoter $ trimming <> unindenting <> unescaping

li = mkQuoter polymorphic
liu = mkQuoter $ unindenting <> polymorphic
lit = mkQuoter $ trimming <> polymorphic
litu = mkQuoter $ trimming <> unindenting <> polymorphic

i = mkQuoter $ unescaping <> polymorphic
iu = mkQuoter $ unindenting <> unescaping <> polymorphic
it = mkQuoter $ trimming <> unescaping <> polymorphic
itu = mkQuoter $ trimming <> unindenting <> unescaping <> polymorphic

mkQuoter :: Transformations -> QuasiQuoter
mkQuoter ts = QuasiQuoter
  { quoteExp = foldr go makeBuilder $ toList ts
  , quotePat = \_ -> fail "Cannot be used at pattern position"
  , quoteType = \_ -> fail "Cannot be used at type position"
  , quoteDec = \_ -> fail "Cannot be used as declaration"
  }
  where
    go Unescaping  f = f <=< unescape
    go Unindenting f = f . unindent . dropLeadingNewline
    go Trimming    f = f . trim
    go Polymorphic f = \s -> [| fmt $(f s) |]

unescape :: String -> Q String
unescape ('\\':'#':xs) = ('\\':) . ('#':) <$> unescape xs
unescape xs@('\\':c:cs) = case readP_to_S lexChar xs of
  (ch, rest):_ -> (ch :) <$> unescape rest
  [] -> do
    reportWarning $ "Unrecognized escape sequence near '" <> takeWhile (not . isSpace) xs <> "'"
    (c :) <$> unescape cs
unescape (c:cs) = (c :) <$> unescape cs
unescape [] = pure []

makeBuilder :: String -> Q Exp
makeBuilder = go mempty
  where
    go tacc ('\\':'#':xs) = go (tacc <> "#") xs
    go tacc ('#':'{':xs)
      | (expr, '}':ys) <- List.span (/='}') xs
      = lookupValueName expr >>= \case
          Just valueName ->
            let txt = fmt tacc
                ls = splitTextLazy (=='\n') txt
                indent = length . last $ ls
            in [| build txt <> indentF'' indent (build $(varE valueName)) <> $(go mempty ys) |]
          Nothing -> fail $ "Variable '" ++ expr ++ "' is not in scope"
    go tacc (c:cs) = case List.span (`notElem` ['\\', '#']) cs of
      (str, rest) -> go (tacc <> build (c:str)) rest
    go tacc [] = let txt = fmt @Text tacc in [| build txt |]

-- | A version of @Fmt.indentF'@ that doesn't indent the first line, and keeps the final line break
-- intact.
indentF'' :: Int -> Builder -> Builder
indentF'' indent bld = case splitTextLazy (=='\n') $ toLazyText bld of
  (x :| xs) -> fromLazyText . TL.intercalate "\n" $ x : map addIndent xs
  where
    addIndent str | null str = str
                  | otherwise = spaces <> str
    spaces = TL.replicate (fromIntegral indent) " "

splitTextLazy :: (Char -> Bool) -> TL.Text -> NonEmpty TL.Text
splitTextLazy p t = case nonEmpty $ TL.split p t of
  Just ne -> ne
  Nothing -> error "Morley.Util.Interpolate.splitTextLazy: the impossible happened"

dropLeadingNewline :: String -> String
dropLeadingNewline ('\n':xs) = xs
dropLeadingNewline xs = xs

unindent :: String -> String
unindent s =
  case nonEmpty (filter (not . null) ls) of
    Just ne ->
      let minIndent = minimum $ indentOf <$> ne
      -- See Note [lines/unlines] below about the use of 'intercalate'
      in intercalate "\n" $ drop minIndent <$> ls
    Nothing -> s
  where
    ls = split (=='\n') s
    indentOf = length . takeWhile (== ' ')

-- See Note [lines/unlines] below about the use of this function
split :: (Char -> Bool) -> String -> [String]
split cond s = case break cond s of
  (pfx, _:suf) -> pfx : split cond suf
  (pfx, "") -> [pfx]

trim :: String -> String
trim = intercalate "\n" . List.dropWhileEnd (all (== ' '))
     -- See Note [lines/unlines] below about the use of 'intercalate'
     . dropWhile (all (== ' ')) . split (=='\n')

{- Note [lines/unlines]
   ~~~~~~~~~~~~~~~~~~~~

lines/unlines don't actually satisfy our requirements here due to
some edge case behaviour.

First of all, `lines` and `unlines` aren't strictly speaking inverse
of each other: `unlines` always adds a trailing newline. Consider:

>>> unlines . lines $ "hello\nworld"
"hello\nworld\n"

Furthermore, `lines` itself does "gobble" the trailing empty line, consider:

>>> lines $ "hello\nworld\n"
["hello","world"]

We actually need this to be `["hello","world",""]` for the unindenting
algorithm to work correctly (i.e. not unindent anything if the last indent is 0),
but that doesn't work with `lines` (also we either lose a trailing newline
or get an extra one with `unlines`). I could use `lines` with `trim`,
but for consistency chose not to.

Long story short, I use `split (=='\n')` and `Text.Lazy.split (=='\n')`
to get a list of lines, and `intercalate "\n"` to glue them back.
This has the behaviour we need.

- @lierdakil

-}