{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Text.Regex
-- Copyright   :  (c) Chris Kuklewicz 2006, derived from (c) The University of Glasgow 2001
-- License     :  BSD-style (see the file LICENSE)
--
-- Maintainer  :  hvr@gnu.org
-- Stability   :  experimental
-- Portability :  non-portable (regex-base needs MPTC+FD)
--
-- Regular expression matching.  Uses the POSIX regular expression
-- interface in "Text.Regex.Posix".
--
---------------------------------------------------------------------------

--
-- Modified by Chris Kuklewicz to be a thin layer over the regex-posix
-- package, and moved into a regex-compat package.
--
module Text.Regex (
    -- * Regular expressions
    Regex,
    mkRegex,
    mkRegexWithOpts,
    matchRegex,
    matchRegexAll,
    subRegex,
    splitRegex
  ) where

import Data.Array((!))
import Data.Bits((.|.))
import Text.Regex.Base(RegexMaker(makeRegexOpts),defaultExecOpt,RegexLike(matchAll,matchAllText),RegexContext(matchM),MatchText)
import Text.Regex.Posix(Regex,compNewline,compIgnoreCase,compExtended)

-- | Makes a regular expression with the default options (multi-line,
-- case-sensitive).  The syntax of regular expressions is
-- otherwise that of @egrep@ (i.e. POSIX \"extended\" regular
-- expressions).
mkRegex :: String -> Regex
mkRegex s = makeRegexOpts opt defaultExecOpt s
  where opt = compExtended .|. compNewline

-- | Makes a regular expression, where the multi-line and
-- case-sensitive options can be changed from the default settings.
mkRegexWithOpts
   :: String  -- ^ The regular expression to compile
   -> Bool    -- ^ 'True' @\<=>@ @\'^\'@ and @\'$\'@ match the beginning and
              -- end of individual lines respectively, and @\'.\'@ does /not/
              -- match the newline character.
   -> Bool    -- ^ 'True' @\<=>@ matching is case-sensitive
   -> Regex   -- ^ Returns: the compiled regular expression

mkRegexWithOpts s single_line case_sensitive
  = let opt = (if single_line then (compNewline .|.) else id) .
              (if case_sensitive then id else (compIgnoreCase .|.)) $
              compExtended
    in makeRegexOpts opt defaultExecOpt s

-- | Match a regular expression against a string
matchRegex
   :: Regex     -- ^ The regular expression
   -> String    -- ^ The string to match against
   -> Maybe [String]    -- ^ Returns: @'Just' strs@ if the match succeeded
                        -- (and @strs@ is the list of subexpression matches),
                        -- or 'Nothing' otherwise.
matchRegex p str = fmap (\(_,_,_,str) -> str) (matchRegexAll p str)

-- | Match a regular expression against a string, returning more information
-- about the match.
matchRegexAll
   :: Regex     -- ^ The regular expression
   -> String    -- ^ The string to match against
   -> Maybe ( String, String, String, [String] )
                -- ^ Returns: 'Nothing' if the match failed, or:
                --
                -- >  Just ( everything before match,
                -- >         portion matched,
                -- >         everything after the match,
                -- >         subexpression matches )

matchRegexAll p str = matchM p str

{- | Replaces every occurance of the given regexp with the replacement string.

In the replacement string, @\"\\1\"@ refers to the first substring;
@\"\\2\"@ to the second, etc; and @\"\\0\"@ to the entire match.
@\"\\\\\\\\\"@ will insert a literal backslash.

This does not advance if the regex matches an empty string.  This
misfeature is here to match the behavior of the the original
Text.Regex API.
-}

subRegex :: Regex                          -- ^ Search pattern
         -> String                         -- ^ Input string
         -> String                         -- ^ Replacement text
         -> String                         -- ^ Output string
subRegex _ "" _ = ""
subRegex regexp inp repl =
  let compile _i str [] = \ _m ->  (str++)
      compile i str (("\\",(off,len)):rest) =
        let i' = off+len
            pre = take (off-i) str
            str' = drop (i'-i) str
        in if null str' then \ _m -> (pre ++) . ('\\':)
             else \  m -> (pre ++) . ('\\' :) . compile i' str' rest m
      compile i str ((xstr,(off,len)):rest) =
        let i' = off+len
            pre = take (off-i) str
            str' = drop (i'-i) str
            x = read xstr
        in if null str' then \ m -> (pre++) . ((fst (m!x))++)
             else \ m -> (pre++) . ((fst (m!x))++) . compile i' str' rest m
      compiled :: MatchText String -> String -> String
      compiled = compile 0 repl findrefs where
        -- bre matches a backslash then capture either a backslash or some digits
        bre = mkRegex "\\\\(\\\\|[0-9]+)"
        findrefs = map (\m -> (fst (m!1),snd (m!0))) (matchAllText bre repl)
      go _i str [] = str
      go i str (m:ms) =
        let (_,(off,len)) = m!0
            i' = off+len
            pre = take (off-i) str
            str' = drop (i'-i) str
        in if null str' then pre ++ (compiled m "")
             else pre ++ (compiled m (go i' str' ms))
  in go 0 inp (matchAllText regexp inp)

{- | Splits a string based on a regular expression.  The regular expression
should identify one delimiter.

This does not advance and produces an infinite list of [] if the regex
matches an empty string.  This misfeature is here to match the
behavior of the the original Text.Regex API.
-}

splitRegex :: Regex -> String -> [String]
splitRegex _ [] = []
splitRegex delim strIn =
  let matches = map (!0) (matchAll delim strIn)
      go _i str [] = str : []
      go i str ((off,len):rest) =
        let i' = off+len
            firstline = take (off-i) str
            remainder = drop (i'-i) str
        in seq i' $
           if null remainder then [firstline,""]
             else firstline : go i' remainder rest
  in go 0 strIn matches

{-

-- These are the older versions which failed on (correct answer:)
-- let r = mkRegex "^(.)" in subRegex2 r "abc\ndef" "|\\1"
-- "|abc\n|def"

subRegex :: Regex                          -- ^ Search pattern
      -> String                         -- ^ Input string
      -> String                         -- ^ Replacement text
      -> String                         -- ^ Output string
subRegex _ "" _ = ""
subRegex regexp inp repl =
  let -- bre matches a backslash then capture either a backslash or some digits
      bre = mkRegex "\\\\(\\\\|[0-9]+)"
      lookup _ [] _ = []
      lookup [] _ _ = []
      lookup match repl groups =
        case matchRegexAll bre repl of
          Nothing -> repl
          Just (lead, _, trail, bgroups) ->
            let newval =
                 if (head bgroups) == "\\"
                   then "\\"
                   else let index :: Int
                            index = (read (head bgroups)) - 1
                        in if index == -1
                             then match
                             else groups !! index
            in lead ++ newval ++ lookup match trail groups
  in case matchRegexAll regexp inp of
       Nothing -> inp
       Just (lead, match, trail, groups) ->
         lead ++ lookup match repl groups ++ (subRegex regexp trail repl)

splitRegex :: Regex -> String -> [String]
splitRegex _ [] = []
splitRegex delim strIn = loop strIn where
  loop str = case matchOnceText delim str of
                Nothing -> [str]
                Just (firstline, _, remainder) ->
                  if null remainder
                    then [firstline,""]
                    else firstline : loop remainder

-}