{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE Strict #-}
{-# LANGUAGE BinaryLiterals #-}
module Djot.Inlines
  ( parseInlines
  , parseTableCells
  )
where

import Data.Char (isAscii, isLetter, isAlphaNum, isSymbol, isPunctuation)
import Control.Monad (guard, when, mzero)
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import Data.Set (Set)
import qualified Data.Set as Set
import Djot.Parse
import Djot.Options (ParseOptions(..), SourcePosOption(..))
import Djot.Attributes (pAttributes)
import Djot.AST
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import Data.ByteString (ByteString)
import Data.Foldable as F
import Control.Applicative
import Data.Maybe (fromMaybe)

-- import Debug.Trace

{-# INLINE isSpecial #-}
isSpecial :: Char -> Bool
isSpecial c = c == '[' || c == ']' || c == '<' || c == '>' ||
              c == '$' || c == '!' || c == '{' || c == '}' || c == ':' ||
              c == '=' || c == '+' || c == '-' || c == '^' || c == '~' ||
              c == '*' || c == '_' || c == '\''|| c == '"' || c == '.' ||
              c == '|' || c == '`' || c == '\\'|| c == '\n' || c == '\r'

parseInlines :: ParseOptions -> Seq Chunk -> Either String Inlines
parseInlines opts chunks = do
  case parse (pInlines <* eof) ParserState{ mode = NormalMode
                                          , activeDelims = mempty
                                          , options = opts }
       (toList (stripEndChunks chunks)) of
    Just ils -> Right ils
    Nothing -> Left $ "parseInlines failed on input: "
                     <> show (foldMap chunkBytes chunks)

parseTableCells :: ParseOptions -> Chunk -> Either String [Inlines]
parseTableCells opts chunk = do
  case parse (asciiChar '|'
                 *> some (removeFinalWs <$> pInlines <* asciiChar '|')
                 <* skipMany ws
                 <* eof)
        ParserState{ mode = TableCellMode
                   , activeDelims = mempty
                   , options = opts }
       [chunk] of
    Just cells -> Right cells
    Nothing -> Left $ "parseTableCells failed on input: " <> show chunk

removeFinalWs :: Inlines -> Inlines
removeFinalWs (Many ils) = Many $
  case Seq.viewr ils of
    rest Seq.:> Node pos attr (Str bs)
      | B8.takeEnd 1 bs == " "
        -> case B8.dropWhileEnd (== ' ') bs of
             "" -> rest
             bs' -> rest Seq.|> Node pos attr (Str bs')
    _ -> ils

data InlineParseMode =
  NormalMode | TableCellMode
  deriving (Show, Ord, Eq)

data ParserState =
  ParserState
  { mode :: InlineParseMode
  , activeDelims :: Set Delim
  , options :: ParseOptions }
  deriving (Show)

data Delim = Delim Bool Char
  deriving (Show, Ord, Eq)

type P = Parser ParserState

pInlines :: P Inlines
pInlines = skipMany ws *> (mconcat <$> many pInline)

pInline :: P Inlines
pInline = do
  sline <- sourceLine
  scol <- sourceColumn
  res <- pInline'
  opts <- options <$> getState
  (case sourcePositions opts of
     AllSourcePos -> do
       eline <- sourceLine
       ecol <- sourceColumn
       pure $ addPos (Pos sline scol eline (ecol - 1)) <$> res
     _ -> pure res) >>= pOptionalAttributes

pOptionalAttributes :: Inlines -> P Inlines
pOptionalAttributes (Many ils) = pAddAttributes (Many ils) <|> pure (Many ils)

pAddAttributes :: Inlines -> P Inlines
pAddAttributes (Many ils) = do
  attr <- mconcat <$> some pAttributes
  pure $
    case attr of
      Attr [] -> Many ils
      _ -> case Seq.viewr ils of
             Seq.EmptyR -> mempty
             ils' Seq.:> Node pos attr' (Str bs)
               | B8.any isWs bs ->
               -- attach attribute to last word
               let (front, lastword) = B8.breakEnd isWs bs
               in if B.null lastword
                     then Many ils  -- ignore attr after whitespace
                     else
                       let (pos1, pos2) =
                               case pos of
                                 NoPos -> (NoPos, NoPos)
                                 Pos sl sc el ec ->
                                   let frontlen = B8.length
                                        (B8.filter (\c -> c < '\128' || c >= '\192') front)
                                   in (Pos sl sc sl (sc + frontlen),
                                       Pos sl (sc + frontlen + 1) el ec)
                       in Many (ils' Seq.|>
                                     Node pos1 attr' (Str front) Seq.|>
                                     Node pos2 attr (Str lastword))
             ils' Seq.:> Node pos attr' il ->
               Many (ils' Seq.|> Node pos (attr' <> attr) il)

pInline' :: P Inlines
pInline' = do
  (do c <- lookahead (satisfyByte isSpecial)
      fails pCloser
      (case c of
          '\\' -> pEscaped
          '[' -> pFootnoteReference
             <|> pLinkOrSpan
          '<' -> pAutolink
          '!' -> pImage
          '_' -> pEmph
          '*' -> pStrong
          '^' -> pSuperscript
          '~' -> pSubscript
          '{' -> pEmph
             <|> pStrong
             <|> pHighlight
             <|> pInsert
             <|> pDelete
             <|> pSuperscript
             <|> pSubscript
             <|> pDoubleQuote
             <|> pSingleQuote
             <|> (mempty <$ pAttributes)
          '`' -> pVerbatim
          ':' -> pSymbol
          '$' -> pMath
          '"' -> pDoubleQuote
          '\'' -> pSingleQuote
          '-' -> pHyphens
          '.' -> pEllipses
          '\n' -> pSoftBreak
          _ -> mzero)
        <|> pSpecial
       ) <|> pWords

pSpecial :: P Inlines
pSpecial = do
  st <- getState
  c <- satisfyByte (case mode st of
                       TableCellMode -> \d -> isSpecial d && d /= '|'
                       _ -> isSpecial)
  if c == '\r'
     then pure mempty
     else pure $ str $ B8.singleton c

pWords :: P Inlines
pWords = str <$> byteStringOf (skipSome (skipSatisfyByte (not . isSpecial)))

pEscaped :: P Inlines
pEscaped = do
  asciiChar '\\'
  c <- satisfyByte (\d ->
          isAscii d &&
            (isSymbol d || isPunctuation d || d == ' ' || d == '\t'))
         <|> ('\n' <$ endline)
         <|> pure '\\'
  case c of
    '\n' -> hardBreak <$ skipMany spaceOrTab
    _ | c == ' ' || c == '\t' -> pHardBreak
                             <|> if c == ' '
                                    then pure nonBreakingSpace
                                    else pure $ str "\\\t"
    _ -> pure $ str $ B8.singleton c

pHardBreak :: P Inlines
pHardBreak = do -- assumes we've parsed \ already
  skipMany spaceOrTab
  endline
  skipMany spaceOrTab
  pure hardBreak

pSoftBreak :: P Inlines
pSoftBreak = do
  endline
  skipMany spaceOrTab
  (mempty <$ eof) <|> pure softBreak

pSymbol :: P Inlines
pSymbol = do
  asciiChar ':'
  bs <- byteStringOf $ skipSome (skipSatisfyByte
                                    (\c -> c == '+' || c == '-' ||
                                         (isAscii c && isAlphaNum c)))
  asciiChar ':'
  pure $ symbol bs

pMath :: P Inlines
pMath = do
  asciiChar '$'
  mathStyle <- (DisplayMath <$ asciiChar '$') <|> pure InlineMath
  verb <- pVerbatim
  case unMany verb of
         [Node pos attr (Verbatim bs)] ->
              pure $ Many $ Seq.singleton $ Node pos attr (Math mathStyle bs)
         _ -> pure $ (case mathStyle of
                        DisplayMath -> str "$$"
                        _ -> str "$") <> verb

{-# INLINE bracesRequired #-}
bracesRequired :: Char -> Bool
bracesRequired '=' = True
bracesRequired '+' = True
bracesRequired '-' = True
bracesRequired _ = False

pCloser :: P ()
pCloser = do
  delims <- activeDelims <$> getState
  if Set.null delims
     then mzero
     else do
       openerHadBrace <- asum $
         map (\(Delim hadBrace c) -> hadBrace <$ asciiChar c) (F.toList delims)
       mblastc <- peekBack
       let afterws = maybe True isWs mblastc
       when ( afterws || openerHadBrace ) $ asciiChar '}'

pEmph, pStrong, pSuperscript, pSubscript :: P Inlines
pEmph = pBetween '_' emph
pStrong = pBetween '*' strong
pSuperscript = pBetween '^' superscript
pSubscript = pBetween '~' subscript

pHighlight, pInsert, pDelete :: P Inlines
pHighlight = pBetween '=' highlight
pInsert = pBetween '+' insert
pDelete = pBetween '-' delete

pBetween :: Char -> (Inlines -> Inlines) -> P Inlines
pBetween c constructor = do
  let starter leftBrace = do
        case leftBrace of
          False
            | bracesRequired c -> mzero
            | otherwise -> asciiChar c `notFollowedBy`
                                (ws <|> asciiChar '}')
          True -> asciiChar c `notFollowedBy` asciiChar '}'
  let ender leftBrace = do
        mblastc <- peekBack
        let afterws = maybe True isWs mblastc
        asciiChar c
        if leftBrace
           then asciiChar '}'
           else guard (not afterws) `notFollowedBy` asciiChar '}'
  leftBrace <- (True <$ asciiChar '{') <|> pure False
  starterBs <- (if leftBrace then ("{" <>) else id) <$>
                 byteStringOf (starter leftBrace) `notFollowedBy` pAttributes
                 -- don't let *{.foo} start emphasis, for example
  oldActiveDelims <- activeDelims <$> getState
  updateState $ \st -> st{ activeDelims = Set.insert (Delim leftBrace c)
                                             (activeDelims st) }
  firstIl <- pInline <|> pBetween c constructor -- to allow stacked cases like '**hi**'
  restIls <- many pInline
  let ils = mconcat (firstIl:restIls)
  updateState $ \st -> st{ activeDelims = oldActiveDelims }
  (constructor ils <$ ender leftBrace) <|> pure (str starterBs <> ils)

pTicks :: P Int
pTicks = do
  sp <- getOffset
  skipSome (asciiChar '`')
  ep <- getOffset
  pure (ep - sp)

pVerbatim :: P Inlines
pVerbatim = do
  numticks <- pTicks
  let ender = pTicks >>= guard . (== numticks)
  let content = skipSome (skipSatisfyByte (\c -> c /= '`' && c /= '\\')) <|>
                 (asciiChar '\\' <* anyChar) <|>
                 (fails ender *> skipSome (asciiChar '`'))
  bs <- trimSpaces <$> byteStringOf (skipMany content) <* (ender <|> eof)
  (rawInline <$> pRawAttribute <*> pure bs) <|> pure (verbatim bs)

-- Trim a leading space if first non-space character is `,
-- and similarly for trailing space/last non-space.
trimSpaces :: ByteString -> ByteString
trimSpaces = trimSpaceFront . trimSpaceBack
 where
   trimSpaceFront bs =
        case B8.span (== ' ') bs of
          (a, b) | B8.take 1 b == "`"
                 , not (B8.null a)
            -> B8.drop 1 bs
          _ -> bs
   trimSpaceBack bs =
        case B8.spanEnd (== ' ') bs of
          (a, b) | B8.takeEnd 1 a == "`"
                 , not (B8.null b)
            -> B8.dropEnd 1 bs
          _ -> bs

pRawAttribute :: P Format
pRawAttribute = do
  byteString "{="
  fmt <- Format <$>
             byteStringOf (skipMany (skipSatisfyByte (\c -> c /= '}' &&
                                                 not (isWs c))))
  asciiChar '}'
  pure fmt

pFootnoteReference :: P Inlines
pFootnoteReference = do
  asciiChar '['
  asciiChar '^'
  label <- byteStringOf $ skipMany $
             skipSatisfyByte (\c -> c /= ']' && not (isWs c))
  asciiChar ']'
  pure $ footnoteReference label

-- returns Left with parsed content if no ] has been reached, otherwise Right
-- with inner contents.
pBracketed :: P (Either Inlines Inlines)
pBracketed = do
  let starter = asciiChar '['
  let ender = asciiChar ']'
  starterBs <- byteStringOf starter
  oldActiveDelims <- activeDelims <$> getState
  updateState $ \st -> st{ activeDelims = Set.insert (Delim False ']') (activeDelims st) }
  ils <- mconcat <$> many pInline
  updateState $ \st -> st{ activeDelims = oldActiveDelims }
  (Right ils <$ ender) <|> pure (Left (str starterBs <> ils))

pImage :: P Inlines
pImage = do
  asciiChar '!'
  (res, raw) <- withByteString pBracketed
  case res of
    Left ils -> pure (str "!" <> ils)
    Right ils ->
            ((str "!" <>) <$> pAddAttributes (span_ ils))
        <|> (image ils <$> (pDestination <|> pReference raw))
        <|> pure (str "![" <> ils <> str "]")

pAutolink :: P Inlines
pAutolink = do
  asciiChar '<'
  res <- byteStringOf $ skipSome $ skipSatisfyByte (\c -> c /= '>' && c /= '<')
  asciiChar '>'
  let url = B8.filter (\c -> c /= '\n' && c /= '\r') res
  case B8.find (\c -> c == '@' || c == ':' || c == '.') url of
    Just '@' -> pure $ emailLink url
    Just _ -> pure $ urlLink url
    Nothing -> mzero

pLinkOrSpan :: P Inlines
pLinkOrSpan = do
  (res, raw) <- withByteString pBracketed
  case res of
    Left ils -> pure ils
    Right ils ->
            (span_ ils <$ lookahead (asciiChar '{'))
        <|> (link ils <$> (pDestination <|> pReference raw))
        <|> pure (str "[" <> ils <> str "]")

-- We allow balanced pairs of parens inside.
pDestination :: P Target
pDestination = do
  asciiChar '('
  res <- byteStringOf $ pInBalancedParens 0
  asciiChar ')'
  pure $ Direct (snd (handleEscapesAndNewlines res))
 where
  handleEscapesAndNewlines = B8.foldl' go (False, mempty)
  go (esc, bs) '\n' = (esc, bs)
  go (esc, bs) '\r' = (esc, bs)
  go (True, bs) c = (False, bs `B8.snoc` c)
  go (False, bs) '\\' = (True, bs)
  go (False, bs) c = (False, bs `B8.snoc` c)

pInBalancedParens :: Int -> P ()
pInBalancedParens nestlevel =
  (guard (nestlevel == 0) <* lookahead (asciiChar ')')) <|>
    do lev <-   (nestlevel <$ (fails pCloser *>
                               -- but see https://github.com/jgm/djot/discussions/247
                               skipSatisfyByte
                                 (\c -> c /= '(' && c /= ')' && c /= '\\')))
            <|> (nestlevel <$ (asciiChar '\\' <* anyChar))
            <|> ((nestlevel + 1) <$ asciiChar '(')
            <|> ((nestlevel - 1) <$ asciiChar ')')
       pInBalancedParens lev

pReference :: ByteString -> P Target
pReference rawDescription = do
  asciiChar '['
  bs <- byteStringOf $ pAtMost 400 $ skipSatisfyByte
           (\c -> c /= '[' && c /= ']')
  asciiChar ']'
  let label = normalizeLabel $
              if B.null bs
                 then B.drop 1 $ B.dropEnd 1
                               $ B8.filter (/= '\n') rawDescription
                 else bs
  pure $ Reference label

pAtMost :: Int -> P () -> P ()
pAtMost n pa = optional_ (pa *> when (n > 0) (pAtMost ( n - 1 ) pa))

pOpenDoubleQuote :: P ()
pOpenDoubleQuote = do
  lbrace <- (True <$ asciiChar '{') <|> pure False
  asciiChar '"'
  rbrace <- (True <$ asciiChar '}') <|> pure False
  guard $ lbrace || not rbrace

pCloseDoubleQuote :: P ()
pCloseDoubleQuote = do
  mblastc <- peekBack
  let whitespaceBefore = maybe True isWs mblastc
  lbrace <- (True <$ asciiChar '{') <|> pure False
  asciiChar '"'
  rbrace <- (True <$ asciiChar '}') <|> pure False
  whitespaceAfter <- (True <$ lookahead (skipSatisfyByte isWs)) <|> pure False
  guard $ not lbrace && (rbrace || not whitespaceBefore || whitespaceAfter)

pDoubleQuote :: P Inlines
pDoubleQuote = (do
  pOpenDoubleQuote
  contents <- mconcat <$> many (fails pCloseDoubleQuote *> pInline)
  (doubleQuoted contents <$ pCloseDoubleQuote)
    <|> pure (openDoubleQuote <> contents))
 <|> (closeDoubleQuote <$ asciiChar '"')

openDoubleQuote, closeDoubleQuote :: Inlines
openDoubleQuote = str "\226\128\156" -- utf8 0x201C
closeDoubleQuote = str "\226\128\157" -- utf8 0x201D

pOpenSingleQuote :: P ()
pOpenSingleQuote = do
  lastc <- fromMaybe '\n' <$> peekBack
  let openContext = lastc == '\t' || lastc == '\r' ||
                    lastc == '\n' || lastc == ' ' ||
                    lastc == '"' || lastc == '\'' ||
                    lastc == '(' || lastc == '[' || lastc == '\0'
  lbrace <- (True <$ asciiChar '{') <|> pure False
  asciiChar '\''
  rbrace <- (True <$ asciiChar '}') <|> pure False
  guard $ lbrace || (openContext && not rbrace)

pCloseSingleQuote :: P ()
pCloseSingleQuote = do
  mblastc <- peekBack
  let whitespaceBefore = maybe True isWs mblastc
  lbrace <- (True <$ asciiChar '{') <|> pure False
  asciiChar '\''
  rbrace <- (True <$ asciiChar '}') <|> pure False
  letterAfter <- (True <$ lookahead (satisfy isLetter)) <|> pure False
  guard $ not lbrace && (rbrace || not (whitespaceBefore || letterAfter))

pSingleQuote :: P Inlines
pSingleQuote = (do
  pOpenSingleQuote
  contents <- mconcat <$> many (fails pCloseSingleQuote *> pInline)
  (singleQuoted contents <$ pCloseSingleQuote)
    <|> pure (closeSingleQuote <> contents))
 <|> (closeSingleQuote <$ asciiChar '\'')

closeSingleQuote :: Inlines
closeSingleQuote = str "\226\128\153" -- utf8 0x2019

pHyphens :: P Inlines
pHyphens = do
  numHyphens <- length <$> some hyphen
  pure $ str $ go numHyphens
    where
     emdash = "\226\128\148" -- utf8 0x2014
     endash = "\226\128\147" -- utf8 0x2013
     hyphen = asciiChar '-' `notFollowedBy` asciiChar '}'
     go 1 = "-"
     go n | n `mod` 3 == 0
          = mconcat (replicate (n `Prelude.div` 3) emdash)
          | n `mod` 2 == 0
          = mconcat (replicate (n `Prelude.div` 2) endash)
          | n `mod` 3 == 2
          = mconcat (replicate (n `Prelude.div` 3) emdash) <> endash
          | n `mod` 3 == 1
          = mconcat (replicate (n `Prelude.div` 3 - 1) emdash) <>
              endash <> endash
          | otherwise
          = emdash <> go (n - 3)

pEllipses :: P Inlines
pEllipses = str "\226\128\166" {- utf8 0x2026 -} <$ byteString "..."

stripEndChunks :: Seq Chunk -> Seq Chunk
stripEndChunks cs =
  case Seq.viewr cs of
    initial Seq.:> c ->
      initial Seq.|> c{ chunkBytes = B8.dropWhileEnd isWs (chunkBytes c) }
    _ -> cs