-- |
-- Module      :  StreamEd
-- Copyright   :  (c) Vitaliy Rukavishnikov
-- License     :  BSD-style (see the file LICENSE)
-- 
-- Maintainer  :  virukav@gmail.com
-- Stability   :  experimental
-- Portability :  non-portable
--
-- The Sed runtime engine

module Hsed.StreamEd where

import System.IO 
import Control.Monad (unless, when, forM_, zipWithM)
import qualified Control.Monad.State as S
import Control.Monad.Trans.Goto
import Data.List (isPrefixOf)
import Data.Char (isPrint)
import Data.Maybe (fromMaybe)
import qualified Data.ByteString.Char8 as B
import Text.Printf (printf)

import Hsed.Parsec (parseSed, sedCmds)
import Hsed.Ast
import Hsed.SedRegex
import Hsed.SedState

type SedEngine a = GotoT a (S.StateT Env IO) a

data Status = EOF | Cont 
   deriving (Eq, Show)

data FlowControl = 
    Next                      -- ^ Apply the next sed command from the script to the pattern space
  | Break                     -- ^ Read the new line to the pattern space and apply sed script  
  | Continue                  -- ^ Reapply the sed script to the current pattern space
  | Goto (Maybe B.ByteString) -- ^ Jump to the marked sed command and apply it to the pattern space   
  | Exit                      -- ^ Quit 
   deriving (Eq, Show)

-- | Compile and execute the sed script
runSed :: [FilePath] -> String -> Env -> IO Env
runSed fs sed env = do
  S.execStateT (runGotoT $ do
       when ("#n" `isPrefixOf` sed) $ 
         S.lift $ set defOutput False 
       S.lift $ compile sed
       execute fs 
   ) env

-- | Parse the Sed commands
compile :: String -> SedState ()
compile cmds = do
  case parseSed sedCmds cmds of
     Right x -> set ast x
     Left e  -> error $ show e ++ " in " ++ cmds
  return ()

-- | Execute the parsed Sed commands against input data
execute :: [FilePath] -> SedEngine ()
execute fs = do
   processFiles fs
   fout <- S.lift $ get fileout
   S.liftIO $ S.mapM_ hClose (map snd fout)

-- | Process the input text files
processFiles :: [FilePath] -> SedEngine ()
processFiles files = do
   if null files then processFile stdin True
    else do
      let len = length files
      let fs = zipWith (\x y -> (x, y == len)) files [1..len]
      S.forM_ fs $ \(file, lastFile) -> do
         h <- S.liftIO $ openFile file ReadMode
         processFile h lastFile
   where
      processFile h lastFile = do
         S.lift $ set curFile (h, lastFile)
         nextLine

-- | Process the next input line from the file
nextLine :: SedEngine ()
nextLine = do
    (res, str) <- S.lift line
    case res of
        EOF  -> return ()
        Cont -> do
          S.lift $ set patternSpace str
          S.lift $ set appendSpace []
          cs <- S.lift $ get ast
          execCmds cs  
          nextLine

-- | Execute sed script
execCmds :: [SedCmd] -> SedEngine ()
execCmds cs = do
    forM_ cs $ \cmd -> do
      sch <- S.lift $ execCmd cmd
      case sch of
        Next -> return ()
        Break -> goto nextLine
        Continue -> goto (execCmds cs >> nextLine)
        Goto lbl -> (S.lift $ get ast) >>= \a -> goto (execCmds (jump a lbl) >> nextLine)
        Exit -> prnPat >> goto (return ())
    prnPat
    where prnPat = S.lift $ printPatSpace >> get appendSpace >>= \a -> mapM_ prnStr a

-- | Transfer control to the command marked with the label
jump :: [SedCmd] -> Maybe Label -> [SedCmd]      
jump cmds = maybe [] (go cmds)
  where 
        go [] _ = []
        go (SedCmd _ fun:cs) str = case fun of
           Group cs' -> go cs' str
           Label x -> if x == str then cs
                       else go cs str
           _ -> go cs str 

-- | Read an input line
line :: SedState (Status, B.ByteString)
line = do
   (h,b) <- get curFile
   p <- S.lift $ hIsEOF h
   if p then return (EOF,B.empty)
     else do 
       str <- S.lift $ B.hGetLine h
       modify curLine (+1)
       isLast <- if h == stdin then return False 
                   else S.lift (hIsEOF h) >>= \eof -> return eof
       if isLast && b then                
           get curLine >>= \l -> set lastLine l >> return (Cont, str)
        else return (Cont, str)  

-- | Execute the Sed function if the address is matched
execCmd :: SedCmd -> SedState FlowControl
execCmd (SedCmd a fun) = do
     b <- matchAddress a
     if b then runCmd fun
      else return Next

-- | Check if the address interval is matched  
matchAddress :: Address -> SedState Bool
matchAddress (Address addr1 addr2 invert) = 
    case (addr1,addr2) of
      (Just x, Nothing) -> matchAddr x x >>= \b -> return $ b /= invert
      (Just x, Just y)  -> matchAddr x y >>= \b -> return $ b /= invert
      _                 -> return $ not invert
    where
      matchAddr :: Addr -> Addr -> SedState Bool
      matchAddr a1 a2 = do 
         lineNum <- get curLine
         patSpace <- get patternSpace
         lastLineNum <- get lastLine 
         case (a1,a2) of
           (LineNumber x, LineNumber y) -> matchRange (x == lineNum) (y == lineNum)
           (LineNumber x, Pat y) -> matchRange (x == lineNum) (matchRE y patSpace)
           (LineNumber x, LastLine) -> matchRange (x == lineNum) (lineNum == lastLineNum)
           (LastLine, _) -> return $ lineNum == lastLineNum
           (Pat x, Pat y) -> matchRange (matchRE x patSpace) (matchRE y patSpace)
           (Pat x, LineNumber y) -> matchRange (matchRE x patSpace) (y == lineNum) 
           (Pat x, LastLine) -> matchRange (matchRE x patSpace) (lineNum == lastLineNum)       
      matchRange :: Bool -> Bool -> SedState Bool
      matchRange b1 b2 = do
         let setRange = set inRange
         range <- get inRange           
         if not range then 
            if b1 && b2 then return True
             else if b1 then setRange True >> return True
                   else return False
          else if b2 then setRange False >> return True
                else return True

-- | Execute the Sed function
runCmd :: SedFun -> SedState FlowControl
runCmd cmd = 
    case cmd of
      Group cs -> group cs
      LineNum -> lineNum
      Append txt -> append txt
      Branch lbl -> branch lbl
      Change txt -> change txt
      DeleteLine -> deleteLine
      DeletePat -> deletePat
      ReplacePat -> replacePat
      AppendPat -> appendPat
      ReplaceHold -> replaceHold
      AppendHold -> appendHold
      Insert txt -> insert txt
      List -> list
      NextLine -> next
      AppendLinePat -> appendLinePat
      PrintPat -> printPat
      WriteUpPat -> writeUpPat
      Quit -> quit
      ReadFile file -> readF file
      Substitute pat repl fs -> substitute pat repl fs
      Test lbl -> test lbl
      WriteFile file -> writeF file
      Exchange -> exchange
      Transform t1 t2 -> transform t1 t2
      Label lbl -> label lbl
      Comment -> comment
      EmptyCmd -> emptyCmd

-- | '{cmd...}' Groups subcommands enclosed in {} (braces)
group :: [SedCmd]  -> SedState FlowControl 
group [] = return Next
group (cmd:xs) = do
    sch <- execCmd cmd
    if sch == Next then
       group xs
     else return sch

-- | '=' Writes the current line number to standard output as a line
lineNum :: SedState FlowControl
lineNum = 
    get curLine >>= 
    (prnStrLn . B.pack . show) >> 
    return Next

-- | 'a\\ntext' Places the text variable in output before reading 
-- the next input line
append :: B.ByteString -> SedState FlowControl
append txt = 
    modify appendSpace (++ [txt,B.pack "\n"]) >> 
    return Next 

-- | 'b label' Transfer control to :label elsewhere in script
branch :: Maybe Label -> SedState FlowControl
branch = return . Goto

-- | 'c\\ntext' Replace the lines with the text variable
change :: B.ByteString -> SedState FlowControl
change txt = do
    range <- get inRange
    unless range $ prnStrLn txt
    return Break

-- | 'd' Delete line(s) from pattern space
deleteLine :: SedState FlowControl
deleteLine = 
    set patternSpace B.empty >> 
    return Break

-- | 'D' Delete first part (up to embedded newline) of multiline pattern space
deletePat :: SedState FlowControl
deletePat = do
    p <- get patternSpace
    let p' = B.drop 1 $ B.dropWhile (/='\n') p
    set patternSpace p'
    return Continue

-- | 'g' Copy contents of hold space into the pattern space
replacePat :: SedState FlowControl
replacePat = 
    get holdSpace >>= \h -> 
    set patternSpace h >> 
    return Next

-- | 'G' Append newline followed by contents of hold space 
-- to contents of the pattern space.
appendPat :: SedState FlowControl
appendPat = 
    get holdSpace >>= \h -> 
    modify patternSpace (`B.append` B.cons '\n' h) >> 
    return Next

-- | 'h' Copy pattern space into hold space
replaceHold :: SedState FlowControl
replaceHold = 
    get patternSpace >>= \p -> 
    set holdSpace p >> 
    return Next

-- | 'H' Append newline and contents of pattern space to contents 
-- of the hold space
appendHold :: SedState FlowControl
appendHold = 
    get patternSpace >>= \p -> 
    modify holdSpace (`B.append` B.cons '\n' p) >> 
    return Next

-- | 'i\\ntext' Writes the text variable to standard output before 
-- reading the next line into the pattern space.
insert :: B.ByteString -> SedState FlowControl
insert txt = prnStrLn txt >> return Next

-- | 't label' Jump to line if successful substitutions have been made
test :: Maybe Label -> SedState FlowControl
test lbl = 
    get subst >>= \s -> 
    if s then return $ Goto lbl 
     else return Next

-- | 's/pattern/replacement/[flags]' Substitute replacement for pattern
substitute :: B.ByteString -> B.ByteString -> Flags -> SedState FlowControl
substitute pat repl fs = do
  let (gn, p, w) = getFlags fs 
  patSpace <- get patternSpace
  let (repl', b) = sedSubRegex pat patSpace repl gn
  set subst b
  when b $ do
    set patternSpace repl'
    when p $ get patternSpace >>= \ps -> prnStrLn ps
    unless (null w) $ writeF w >> return ()
  return Next
   where
     getFlags :: Flags -> (Int, Bool, FilePath)
     getFlags (Flags o f) = (occurr, printPat, file) where
        (occurr, printPat) = case o of
           Nothing -> (1, False)
           Just (OccurrencePrint x y) -> (occ x, y)
           Just (PrintOccurrence x y) -> (occ y, x)
        occ x = case x of
           Nothing -> 1
           Just ReplaceAll -> 0
           Just (Replace n) -> n        
        file = fromMaybe "" f

-- | 'n' Read next line of input into pattern space. 
next :: SedState FlowControl
next = do
    printPatSpace
    (res,str) <- line
    set patternSpace str
    if res == EOF then return Break else return Next

-- | 'l' List the contents of the pattern space, showing 
-- nonprinting characters as ASCII codes
list :: SedState FlowControl
list = do
    patSpace <- get patternSpace
    S.forM_ (B.unpack patSpace) $ \ch ->
      if isPrint ch then prnChar ch
       else case lookup ch esc of
             Nothing -> do 
                 prnChar '\\'
                 prnPrintf ch
             Just x -> prnStr (B.pack x)
    prnChar '\n'
    return Next
    where esc = zip "\\\a\b\f\r\t\v"
                    ["\\\\","\\a", "\\b", "\\f", "\\r", "\\t", "\\v"]

-- | 'x' Exchange contents of the pattern space with the 
-- contents of the hold space
exchange :: SedState FlowControl
exchange = do
    hold <- get holdSpace
    pat  <- get patternSpace
    set holdSpace pat
    set patternSpace hold
    return Next

-- | 'N' Append next input line to contents of pattern space
appendLinePat :: SedState FlowControl
appendLinePat = do
    (res,ln) <- line
    if res == EOF then return Break
      else do
       let suffix = B.append (B.pack "\n") ln
       modify patternSpace (`B.append` suffix)
       return Next

-- | 'p' Print the lines
printPat :: SedState FlowControl
printPat = 
     get patternSpace >>= \p -> 
     prnStrLn p >> 
     return Next

-- | 'P' Print first part (up to embedded newline) of 
-- multiline pattern space
writeUpPat :: SedState FlowControl
writeUpPat = 
     get patternSpace >>= 
     (prnStrLn . B.takeWhile (/='\n')) >> 
     return Next

-- | 'q' Quit
quit :: SedState FlowControl
quit = return Exit

-- | 'y/abc/xyz' Transform each character by position in string abc 
-- to its equivalent in string xyz
transform :: B.ByteString -> B.ByteString -> SedState FlowControl
transform t1 t2 = do
    when (B.length t1 /= B.length t2) $ 
      error "Transform strings are not the same length"
    patSpace <- get patternSpace
    let tr = B.map go patSpace
    set patternSpace tr 
    return Next
    where go ch = fromMaybe ch (lookup ch (B.zip t1 t2))

-- | 'w file' Append contents of pattern space to file
writeF :: FilePath -> SedState FlowControl
writeF file = do
    fout <- get fileout
    patSpace <- get patternSpace
    let printFileout h = S.lift $ B.hPutStrLn h patSpace
    case lookup file fout of
       Nothing -> do
          h <- S.lift $ openFile file WriteMode
          modify fileout (++ [(file,h)])
          printFileout h
       Just h -> printFileout h
    return Next

-- | 'r' Read contents of file and append after the contents of the 
-- pattern space
readF :: FilePath -> SedState FlowControl
readF file = do
    cont <- S.lift $ B.readFile file `catch` \_ -> return B.empty
    modify appendSpace (++ [cont])
    return Next

-- | Skip label, comment and empty command
label _ = return Next
comment  = return Next
emptyCmd = return Next

-- | Print the pattern space to the standard output
printPatSpace :: SedState ()
printPatSpace = do
   out <- get defOutput
   when out $ get patternSpace >>= \p -> prnStrLn p

-- | Check if the current line in the pattern space is the last line
isLastLine :: SedState Bool
isLastLine = do
    l <- get lastLine
    cur <- get curLine
    return $ l == cur

-- | Writes the string to the standard output or save the string in the memory buffer
prnStr :: B.ByteString -> SedState ()
prnStr str = do
   useMem <- get useMemSpace
   if useMem then modify memorySpace (`B.append` str) 
     else S.lift $ B.putStr str

-- | The same as prnStr, but adds a newline character
prnStrLn :: B.ByteString -> SedState ()
prnStrLn str = prnStr $ B.snoc str '\n'

-- | The same as prnStr, but for char
prnChar :: Char -> SedState ()
prnChar c = prnStr $ B.singleton c

-- | Print the character as three-digit octal number
prnPrintf :: Char -> SedState ()
prnPrintf c = do
    let str = printf "%03o" c :: String 
    prnStr $ B.pack str