-- | The lexical structure of an Agda file.
--
-- We parse nested block comments.
--
-- If a block comment of nesting level 0 starts ("{-") at the
-- beginning of a line, it is considered "commenting out".  We then
-- parse until we are back to nesting level 0 and return a Comment.
--
-- If not in a block comment a line starts with "--" it is considered
-- a line of text.
--
-- A line consisting only of whitespace is a blank line.
--
-- Everything else is Agda code.  If it contains a block comment start,
-- we parse until we are back at nesting level 0.

{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE PatternSynonyms #-}

{-# OPTIONS_GHC -Wall #-}
-- {-# OPTIONS_GHC -Wno-missing-signatures #-}
{-# OPTIONS_GHC -Wno-missing-pattern-synonym-signatures #-}

module LexicalStructure
  ( parseItems
  , Item
  , pattern CodeItem
  , pattern CommItem
  , pattern TextItem
  ) where

import Data.Bifunctor
import Data.Char (isSpace)
import Data.Maybe

-- * Item parser

-- | We recognize three different kinds of content in Agda files.
data Ty
  = Code    -- ^ Agda code.
  | Text    -- ^ Single line comments, to be converted to text.
  | Comment -- ^ Commented out parts in block comments.
  deriving (Show, Eq)

-- | The result of parsing is a list of items.
type Item = Item' String

data Item' a = Item Ty a
  deriving (Show, Functor)

-- | Agda code.
pattern CodeItem a = Item Code a

-- | Text from consecutive single line comments.
pattern TextItem a = Item Text a

-- | Commented out code from block comments.
pattern CommItem a = Item Comment a

-- | Entry point for the parser of the lexical structure.

parseItems
  :: RevList Item  -- ^ Accumulator, reversed list.
  -> String        -- ^ Input.
  -> [Item]        -- ^ Parse result.
parseItems acc = \case
  [] -> reverse acc
  s  ->
    case parseItem 0 Nothing s of
      (mi, s') -> parseItems (maybeToList mi ++ acc) s'

type RevString = String
type NumBlankLines = Int

-- | Type synonym for list to mark it as snoc-list.
type RevList a = [a]
type State = Maybe (Item' (RevList String))

-- | Parse one item.

parseItem
  :: NumBlankLines -- ^ Parser state part 1.
  -> State         -- ^ Parser state part 2: what are we parsing?
  -> String        -- ^ Not empty when called from outside.
  -> (Maybe Item, String)
parseItem n it s =
  -- We discard trailing blank lines.
  if null s then done

  -- Peek at next line
  else case (it, line) of

      (_, Nothing)   -> parseItem (n + 1) it s'

      -- Start parsing an item.  We discard initial blank lines.
      (Nothing, _)   -> parseItem 0 (lineToState line) s'

      (Just (Item t ls), Just (Item t' (k, l)))
        | t /= t'   -> done
        | otherwise -> parseItem 0 (Just $ Item t ls') s'
            where ls' = indent k l : replicate n "" ++ ls

  where
  (line, s') = parseLine 0 s
  stateToResult = fmap $ fmap $ unlines . reverse
  done = (stateToResult it, s)

-- * Line parser

type Line = Maybe (Item' (Indentation, String))

-- | Just whitespace.
pattern BlankLine = Nothing

-- | Agda code.
pattern CodeLine n s = Just (CodeItem (n, s))

-- | Single line comment.
pattern TextLine n s = Just (TextItem (n, s))

-- | Commented out.
pattern CommentedOut s = Just (CommItem (0, s))

type Indentation  = Int

lineToState :: Line -> State
lineToState = fmap $ fmap $ (:[]) . uncurry indent

-- UNUSED:
-- lineToItem :: Line -> Maybe Item
-- lineToItem = fmap $ fmap $ uncurry indent

-- lineToItem :: Line -> Maybe Item
-- lineToItem = \case
--   BlankLine      -> Nothing
--   CodeLine n s   -> CodeItem $ indent n s
--   TextLine n s   -> TextItem $ indent n s
--   CommentedOut s -> Comment s

indent :: Indentation -> String -> String
indent n s = replicate n ' ' ++ s

type RespectBlockComment = Bool
type NestingLevel = Int

-- | Parse next line (including block comment if it starts on that line).

parseLine :: Indentation -> String -> (Line, String)
parseLine ind = \case

  -- End of line/file

  []          -> (BlankLine, [])
  '\r':'\n':s -> (BlankLine, s)
  '\r':s      -> (BlankLine, s)
  '\n':s      -> (BlankLine, s)

  -- Space
  ' ':s     -> parseLine (ind + 1) s

  -- -- Pragma
  -- s@('{':'-':'#':_) -> first (CodeLine ind . reverse) $ parseToEOL True [] s

  -- Commenting out
  '{':'-':s | ind == 0, take 1 s /= "#"
            -> first (CommentedOut . dropOneSpace . reverse . trimLeft) $
                 parseBlockComment False 0 [] s

  -- Line comment to text; ingore block comments here
  '-':'-':s -> first (TextLine ind . dropOneSpace . reverse . trimLeft) $
                 parseToEOL False [] s

  -- Other: Agda code
  s         -> first (CodeLine ind . reverse . trimLeft) $
                 parseToEOL True [] s
  where
  dropOneSpace = \case
    ' ':x -> x
    x     -> x


-- | Parse to the end of the line (unless block comment opens; then include it).

parseToEOL
  :: RespectBlockComment
     -- ^ How should we treat a block commenting opening?
     --
     --   If 'False', ignore it and treat it as ordinary text
     --   (e.g. if inside a line comment).
     --
     --   If 'True', respect it: look for the matching closing
     --   and continue after that.
     --
     --   In any case, do not discard the delimiter(s)!
  -> RevString
  -> String
  -> (RevString, String)
parseToEOL b acc = \case

  -- Reached end of line (EOL).
  []            -> (acc, [])
  '\r':'\n':s   -> (acc, s)
  '\r':s        -> (acc, s)
  '\n':s        -> (acc, s)

  -- Skip over block comment.
  '{':'-':s | b -> uncurry (parseToEOL b) $
                     parseBlockComment True 0 ('-':'{':acc) s

  -- Skip over character.
  c:s           -> parseToEOL b (c:acc) s

-- | Skip over nested block comments.

parseBlockComment
  :: Bool
     -- ^ Include final closing block delimiter?
  -> NestingLevel
  -> RevString
     -- ^ Accumulator, reversed.
  -> String
     -- ^ Input
  -> (RevString, String)
     -- ^ Block comment, reversed, containing inner comment delimiters and line breaks; output.
parseBlockComment b n acc = \case

  -- Close comment.
  '-':'}':s
   | n <= 0    -> (if b then '}':'-':acc else acc, s)
   | otherwise -> parseBlockComment b (n - 1) ('}':'-':acc) s

  -- Open comment.
  '{':'-':s    -> parseBlockComment b (n + 1) ('-':'{':acc) s

  []  -> (acc,[])
  c:s -> parseBlockComment b n (c:acc) s


-- * Auxiliary functions.

trimLeft :: String -> String
trimLeft = dropWhile isSpace