-- -*- coding: utf-8; mode: haskell; -*-

-- File: tests/Tests/ReferenceLexer.hs
--
-- License:
--     Copyright Neil Mitchell  2011-2017.
--     Copyright Awake Security 2017.
--     All rights reserved.
--
--     Redistribution and use in source and binary forms, with or without
--     modification, are permitted provided that the following conditions are
--     met:
--
--         * Redistributions of source code must retain the above copyright
--           notice, this list of conditions and the following disclaimer.
--
--         * Redistributions in binary form must reproduce the above
--           copyright notice, this list of conditions and the following
--           disclaimer in the documentation and/or other materials provided
--           with the distribution.
--
--         * Neither the name of Neil Mitchell nor the names of other
--           contributors may be used to endorse or promote products derived
--           from this software without specific prior written permission.
--
--     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
--     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
--     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
--     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
--     OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
--     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
--     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
--     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
--     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
--     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
--     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

{-# LANGUAGE FlexibleContexts      #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings     #-}
{-# LANGUAGE PatternGuards         #-}
{-# LANGUAGE RankNTypes            #-}
{-# LANGUAGE ScopedTypeVariables   #-}
{-# LANGUAGE ViewPatterns          #-}

-- |
--   Module      : Tests.ReferenceLexer
--   Copyright   : Copyright 2011-2017 Neil Mitchell
--   License     : BSD3
--   Maintainer  : opensource@awakesecurity.com
--   Stability   : experimental
--
--   Lexing is a slow point, the code below is optimised.
module Tests.ReferenceLexer
  ( -- * Lexing
    lexFile, lexText, lexBS

    -- * Types
  , Lexer.Lexeme (..)
  , Lexer.LName  (..)
  , Lexer.LFile  (..)
  , Lexer.LBind  (..)
  , Lexer.LBuild (..)
  ) where

import           Control.Arrow             (first)
import           Control.Monad.Error.Class (MonadError)

import qualified Data.ByteString.Char8     as BSC8

import           Data.Text                 (Text)
import qualified Data.Text                 as Text
import qualified Data.Text.Encoding        as Text

import           Data.Char                 (isAsciiLower, isAsciiUpper, isDigit)

import           Flow                      ((.>), (|>))

import qualified Language.Ninja.AST        as AST
import qualified Language.Ninja.Errors     as Errors
import qualified Language.Ninja.Misc       as Misc
import qualified Language.Ninja.Mock       as Mock

import qualified Language.Ninja.Lexer      as Lexer

import           Tests.ReferenceLexer.Str0 (Str0 (MkStr0))
import qualified Tests.ReferenceLexer.Str0 as Str0

--------------------------------------------------------------------------------

type Str = BSC8.ByteString

-- | Lex the given file.
lexFile :: (MonadError Errors.ParseError m, Mock.MonadReadFile m)
          => Misc.Path -> m [Lexer.Lexeme ()]
lexFile file = Mock.readFile file >>= lexText

-- | Lex the given 'Text'.
lexText :: (MonadError Errors.ParseError m)
          => Text -> m [Lexer.Lexeme ()]
lexText = Text.encodeUtf8 .> lexBS

-- | Lex the given 'BSC8.ByteString'.
lexBS :: (MonadError Errors.ParseError m)
        => BSC8.ByteString -> m [Lexer.Lexeme ()]
lexBS x = lexerLoop (MkStr0 (BSC8.append x "\n\n\0"))

--------------------------------------------------------------------------------

lexerLoop :: (MonadError Errors.ParseError m) => Str0 -> m [Lexer.Lexeme ()]
lexerLoop c_x
  = case c of
      '\r'                                  -> lexerLoop x0
      '\n'                                  -> lexerLoop x0
      '#'                                   -> lexerLoop $ removeComment x0
      ' '                                   -> lexBind     $ dropSpace x0
      'b'  | Just x1 <- strip "uild "    x0 -> lexBuild    $ dropSpace x1
      'r'  | Just x1 <- strip "ule "     x0 -> lexRule     $ dropSpace x1
      'd'  | Just x1 <- strip "efault "  x0 -> lexDefault  $ dropSpace x1
      'p'  | Just x1 <- strip "ool "     x0 -> lexPool     $ dropSpace x1
      'i'  | Just x1 <- strip "nclude "  x0 -> lexInclude  $ dropSpace x1
      's'  | Just x1 <- strip "ubninja " x0 -> lexSubninja $ dropSpace x1
      '\0'                                  -> pure []
      _                                     -> lexDefine c_x
  where
    removeComment = Str0.dropWhile0 (/= '\n')

    (c, x0) = Str0.list0 c_x

    strip str (MkStr0 x) = let b = BSC8.pack str
                           in if b `BSC8.isPrefixOf` x
                              then Just $ MkStr0 $ BSC8.drop (BSC8.length b) x
                              else Nothing

lexBind :: (MonadError Errors.ParseError m) => Str0 -> m [Lexer.Lexeme ()]
lexBind c_x | (c, x) <- Str0.list0 c_x
  = case c of
      '\r' -> lexerLoop x
      '\n' -> lexerLoop x
      '#'  -> lexerLoop $ Str0.dropWhile0 (/= '\n') x
      '\0' -> pure []
      _    -> lexxBind (Lexer.LexBind ()) c_x

lexBuild :: (MonadError Errors.ParseError m) => Str0 -> m [Lexer.Lexeme ()]
lexBuild x0 = do
  (outputs, x1) <- lexxExprs True x0
  let (rule, x2) = Str0.span0 isVarDot $ dropSpace x1
  (deps, x3) <- lexxExprs False $ dropSpace x2
  x4 <- lexerLoop x3
  let name = Lexer.MkLName () rule
  pure (Lexer.LexBuild () (Lexer.makeLBuild () outputs name deps) : x4)

lexDefault :: (MonadError Errors.ParseError m) => Str0 -> m [Lexer.Lexeme ()]
lexDefault str0 = do
  (files, str1) <- lexxExprs False str0
  str2 <- lexerLoop str1
  pure (Lexer.LexDefault () files : str2)

lexRule, lexPool, lexInclude, lexSubninja, lexDefine
  :: (MonadError Errors.ParseError m) => Str0 -> m [Lexer.Lexeme ()]
lexRule     = lexxName (Lexer.LexRule     ())
lexPool     = lexxName (Lexer.LexPool     ())
lexInclude  = lexxFile (Lexer.LexInclude  ())
lexSubninja = lexxFile (Lexer.LexSubninja ())
lexDefine   = lexxBind (Lexer.LexDefine   ())

lexxBind :: (MonadError Errors.ParseError m)
         => (Lexer.LBind () -> Lexer.Lexeme ()) -> Str0 -> m [Lexer.Lexeme ()]
lexxBind ctor x0 = do
  let (var,  x1) = Str0.span0 isVarDot x0
  let (eq,   x2) = Str0.list0 $ dropSpace x1
  (expr, x3) <- lexxExpr False False $ dropSpace x2
  x4         <- lexerLoop x3
  if (eq == '=')
    then pure (ctor (Lexer.MkLBind () (Lexer.MkLName () var) expr) : x4)
    else Errors.throwLexBindingFailure (Text.pack (show (Str0.take0 100 x0)))

lexxFile :: (MonadError Errors.ParseError m)
         => (Lexer.LFile () -> Lexer.Lexeme ()) -> Str0 -> m [Lexer.Lexeme ()]
lexxFile ctor str0 = do
  (expr, str1) <- lexxExpr False False (dropSpace str0)
  str2         <- lexerLoop str1
  pure (ctor (Lexer.MkLFile expr) : str2)

lexxName :: (MonadError Errors.ParseError m)
         => (Lexer.LName () -> Lexer.Lexeme ()) -> Str0 -> m [Lexer.Lexeme ()]
lexxName ctor x = do
  let (name, rest) = splitLineCont x
  lexemes <- lexerLoop rest
  pure (ctor (Lexer.MkLName () name) : lexemes)

lexxExprs :: (MonadError Errors.ParseError m)
          => Bool -> Str0 -> m ([AST.Expr ()], Str0)
lexxExprs sColon x0 = do
  (a, c_x) <- lexxExpr sColon True x0
  let c  = Str0.head0 c_x
  let x1 = Str0.tail0 c_x
  case c of
    ' '           -> do (exprs, x2) <- lexxExprs sColon $ dropSpace x1
                        pure (a:exprs, x2)
    ':'  | sColon -> pure ([a], x1)
    _    | sColon -> Errors.throwLexExpectedColon
    '\r'          -> pure (a $: dropN x1)
    '\n'          -> pure (a $: x1)
    '\0'          -> pure (a $: c_x)
    _             -> Errors.throwLexUnexpectedSeparator c
  where
    ($:) :: AST.Expr () -> Str0 -> ([AST.Expr ()], Str0)
    (AST.Exprs () []) $: s = ([],     s)
    expr              $: s = ([expr], s)

{-# NOINLINE lexxExpr #-}
lexxExpr :: (MonadError Errors.ParseError m)
         => Bool
         -- ^ @stopColon@
         -> Bool
         -- ^ @stopSpace@
         -> Str0
         -- ^ The input bytestring
         -> m (AST.Expr (), Str0)
         -- ^ The second field will start with one of @" :\n\r"@ or be empty
lexxExpr stopColon stopSpace str0 = first exprs <$> f str0
  where
    exprs :: [AST.Expr ()] -> AST.Expr ()
    exprs [x] = AST.normalizeExpr x
    exprs xs  = AST.normalizeExpr (AST.Exprs () xs)

    special :: Char -> Bool
    special x = let b = x `elem` ['$', '\r', '\n', '\0']
                in case (stopColon, stopSpace) of
                     (True , True ) -> (x <= ':') && or [x == ':', x == ' ', b]
                     (True , False) -> (x <= ':') && or [x == ':',           b]
                     (False, True ) -> (x <= '$') && or [          x == ' ', b]
                     (False, False) -> (x <= '$') && or [                    b]

    f :: (MonadError Errors.ParseError m) => Str0 -> m ([AST.Expr ()], Str0)
    f (Str0.break00 special -> (a, x))
      = if BSC8.null a
        then g x
        else g x >>= \y -> pure (AST.Lit () (Text.decodeUtf8 a) $: y)

    g :: (MonadError Errors.ParseError m) => Str0 -> m ([AST.Expr ()], Str0)
    g x0 | (Str0.head0 x0 /= '$') = pure ([], x0)
    g (Str0.tail0 -> c_x) =
      let (c, x0) = Str0.list0 c_x
      in case c of
           '$'   -> f x0 >>= (AST.Lit () "$" $:) .> pure
           ' '   -> f x0 >>= (AST.Lit () " " $:) .> pure
           ':'   -> f x0 >>= (AST.Lit () ":" $:) .> pure
           '\n'  -> f (dropSpace x0)
           '\r'  -> f (dropSpace (dropN x0))
           '{' | (name, x1) <- Str0.span0 isVarDot x0
               , ('}',  x2) <- Str0.list0 x1
               , not (BSC8.null name)
                 -> f x2 >>= (AST.Var () (Text.decodeUtf8 name) $:) .> pure
           _   | (name, x1) <- Str0.span0 isVar c_x
               , not $ BSC8.null name
                 -> f x1 >>= (AST.Var () (Text.decodeUtf8 name) $:) .> pure
           _     -> Errors.throwLexUnexpectedDollar

    ($:) :: a -> ([a], b) -> ([a], b)
    x $: (xs, y) = (x:xs, y)

splitLineCont :: Str0 -> (Str, Str0)
splitLineCont = go .> first BSC8.concat
  where
    go = splitLineCR .> go'

    go' (a, b) | not (endsDollar a) = ([a], b)
               | otherwise          = let (c, d) = go (dropSpace b)
                                      in (BSC8.init a : c, d)

splitLineCR :: Str0 -> (Str, Str0)
splitLineCR x = if BSC8.isSuffixOf (BSC8.singleton '\r') a
                then (BSC8.init a, dropN b)
                else (a, dropN b)
  where
    (a, b) = Str0.break0 (== '\n') x

isVar :: Char -> Bool
isVar x = [ (x == '-')
          , (x == '_')
          , isAsciiLower x
          , isAsciiUpper x
          , isDigit x
          ] |> or

isVarDot :: Char -> Bool
isVarDot x = [ x == '.'
             , isVar x
             ] |> or

endsDollar :: Str -> Bool
endsDollar = BSC8.isSuffixOf (BSC8.singleton '$')

dropN :: Str0 -> Str0
dropN x = if (Str0.head0 x == '\n')
          then Str0.tail0 x
          else x

dropSpace :: Str0 -> Str0
dropSpace = Str0.dropWhile0 (== ' ')

--------------------------------------------------------------------------------