{-# LANGUAGE Haskell2010 #-}
-- |
-- Module      : Language.Preprocessor.Unlit
-- Copyright   : (c) 1992 authors of the Haskell 1.2 Report,
--               (c) 2013 Stijn van Drongelen
--
-- License     : BSD-style
-- Maintainer  : rhymoid@gmail.com
-- Stability   : experimental
-- Portability : portable
--

module Language.Preprocessor.Unlit
    (
    -- * @unlit@
      unlit
    , UnlitError (..)
    , unlitLinesWith

    -- * Parsers
    , ghcParsers
    , birdParser
    , latexParser
    , cppParser
    , pandocMarkdownParser

    -- ** Defining custom parsers
    , Parser (..)
    , Classified (..)
    , NeedHug (..)
    , Active (..)

    -- * Splitting and joining lines
    , lines
    , unlines
    ) where

import Prelude hiding (lines, unlines)
import Control.Arrow (first, second, (&&&))
import Data.Char (isSpace)
import Data.Function
import Data.List hiding (lines, unlines)
import Data.Maybe (fromMaybe)
import qualified Data.Text as T

-- | Runs 'unlitLinesWith' on a blob of text, using the default parsers of GHC.
unlit :: T.Text -> ([UnlitError], T.Text)
unlit = second unlines . unlitLinesWith ghcParsers . lines

-- | Parse a list of lines, given a sequence of parsers.
unlitLinesWith :: [Parser T.Text] -> [T.Text] -> ([UnlitError], [T.Text])
unlitLinesWith parsers
    = (checkErrors &&& map unclassify)
    . classify parsers
  where
    checkErrors :: [Classified x] -> [UnlitError]
    checkErrors [] = []
    checkErrors (c:cs) = concat $ zipWith3 adjacentError [1..] (c:cs) (cs ++ [Blank])

adjacentError :: Integer -> Classified x -> Classified x -> [UnlitError]
adjacentError _ (Program NeedHug _ _) Blank = []
adjacentError _ Blank (Program NeedHug _ _) = []
adjacentError ln (Program NeedHug _ _) _ = [NotFollowedByBlank ln]
adjacentError ln _ (Program NeedHug _ _) = [NotPrecededByBlank (succ ln)]
adjacentError _ _ _ = []

data UnlitError
    = NotPrecededByBlank Integer
    | NotFollowedByBlank Integer

data Classified x
    = Program NeedHug Active x  -- ^ Program lines.
    | Comment                   -- ^ Comment lines.
    | Blank                     -- ^ Blank lines.

data NeedHug
    = NeedHug   -- ^ A sequence of program lines needs blank lines around it.
    | Whatever  -- ^ This line doesn't care about adjacent lines.

data Active
    = Active    -- ^ An active line of code.
    | Inactive  -- ^ An inactive line of code.

newtype Parser x = Parser (x -> [x] -> Maybe ([Classified x], Maybe [x]))

classify :: [Parser T.Text] -> [T.Text] -> [Classified T.Text]
classify parsers = go
  where
    go [] = []
    go (x:xs)
        | Just (cs, more) <- checkParsers x xs
        -- TODO Communicate error when 'more' is Nothing.
        = cs ++ go (fromMaybe [] more)
        | T.all isSpace x = Blank : go xs
        | otherwise = Comment : go xs

    checkParsers p1 p2
        = foldr orElse Nothing
        . map (\(Parser f) -> f p1 p2)
        $ parsers

unclassify :: Classified T.Text -> T.Text
unclassify (Program _ Active x) = x
unclassify _ = T.empty

-----

-- | The default parsers used by GHC.
ghcParsers :: [Parser T.Text]
ghcParsers = [latexParser, birdParser, cppParser]

-- | Parser for blocks of code delimited by @\\begin{code}@ and @\\end{code}@
--   or @\\begin{pseudocode}@ and @\\end{pseudocode}@.
latexParser :: Parser T.Text
latexParser = Parser p
  where
    p ln lns
        | Just sbz <- T.pack "\\begin{code}" `T.stripPrefix` sln
        = block sbz Active (T.pack "\\end{code}") lns
        | Just sbz <- T.pack "\\begin{pseudocode}" `T.stripPrefix` sln
        = block sbz Active (T.pack "\\end{pseudocode}") lns
        | otherwise = Nothing
      where
        sln = T.dropWhile isSpace ln

    block sbz act delim
        | not (T.all isSpace sbz)
        -- TODO Communicate error.
        = block T.empty act delim
    block _ act delim = Just . first (Blank:) . go
      where
        go [] = ([], Nothing)
        go (ln:lns)
            | Just _sbz <- delim `T.stripPrefix` sln
            -- TODO Communicate error if not (T.all isSpace sbz)
            = ([Blank], Just lns)
            | otherwise = first (Program Whatever act ln:) (go lns)
          where
            sln = T.dropWhile isSpace ln

-- | Parser for blocks of code prefixed by @>@ or @<@.
birdParser :: Parser T.Text
birdParser = Parser p
  where
    p ln lns
        | T.pack ">" `T.isPrefixOf` sln = result Active
        | T.pack "<" `T.isPrefixOf` sln = result Inactive
        | otherwise = Nothing
      where
        (prefix, sln) = T.span isSpace ln
        rln = T.concat [prefix, T.pack " ", T.drop 1 ln]
        result act = Just ([Program NeedHug act rln], Just lns)

-- | Parser for lines prefixed by @#@.
cppParser :: Parser T.Text
cppParser = Parser p
  where
    p ln lns
        | T.pack "#" `T.isPrefixOf` ln
        = Just ([Program Whatever Active ln], Just lns)
        | otherwise = Nothing

-- | Parser for lines fenced by backticks or tildes.
pandocMarkdownParser :: (T.Text -> Active) -> Parser T.Text
pandocMarkdownParser testAttributes = Parser p
  where
    p ln lns
        | T.pack "```" `T.isPrefixOf` ln
        = block (testAttributes btTail) btFence lns
        | T.pack "~~~" `T.isPrefixOf` ln
        = block (testAttributes tiTail) tiFence lns
        | otherwise = Nothing
      where
        (btFence, btTail) = T.span (== '`') ln
        (tiFence, tiTail) = T.span (== '~') ln

    block act delim = Just . first (Blank:) . go
      where
        go [] = ([], Nothing)
        go (ln:lns)
            | delim `T.isPrefixOf` ln = ([Blank], Just lns)
            | otherwise = first (Program NeedHug act ln:) (go lns)

---------------
-- Utilities --
---------------

-- | Breaks a 'Text' up into a list of 'Text's at newline sequences.
--   The resulting strings do not contain newline sequences.
lines :: T.Text -> [T.Text]
lines = linesWith [T.pack "\r\n", T.pack "\r", T.pack "\n"] -- U+2028, U+2029?

-- | Joins lines of 'Text'. In the resulting string, lines are terminated
--   with a single newline character (@'\\n'@).
unlines :: [T.Text] -> T.Text
unlines = unlinesWith (T.pack "\n")

linesWith :: [T.Text] -> T.Text -> [T.Text]
linesWith delimiters tx = if T.null tx then [] else go tx
  where
    ds = sortBy (compare `on` (negate . T.length)) delimiters
    stripAnyPrefix t = foldr orElse Nothing . map (flip T.stripPrefix t) $ ds
    dh = nub . map (T.head) . filter (not . T.null) $ delimiters

    go t | T.null t = []
         | T.null tt = [th]
         | Just tn <- stripAnyPrefix tt = th : go tn
         | True = case go tt' of
                    [] -> [th']
                    (y:ys) -> T.append th' y:ys
      where
        (th, tt) = T.span (not . (`elem` dh)) t
        (th', tt') = (th' `T.snoc` T.head tt', T.tail tt')

unlinesWith :: T.Text -> [T.Text] -> T.Text
unlinesWith nl = T.concat . map (`T.append` nl)

orElse :: Maybe a -> Maybe a -> Maybe a
orElse Nothing y = y
orElse x _ = x