{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveGeneric #-}

-- Copyright (c) Jean-Philippe Bernardy 2008
module Yi.Regex
  ( SearchOption(..), makeSearchOptsM
  , SearchExp(..), searchString, searchRegex, emptySearch
  , emptyRegex
  , regexEscapeString
  , reversePattern
  , module Text.Regex.TDFA
  ) where

import Data.Bifunctor (first)
import Data.Binary
import GHC.Generics (Generic)
import Yi.Buffer.Basic (Direction(..))

import Text.Regex.TDFA ( Regex, CompOption(..), caseSensitive, multiline
                       , defaultCompOpt, defaultExecOpt, makeRegexOptsM
                       , matchOnceText, makeRegex, RegexLike(matchAll)
                       , AllTextSubmatches(..), (=~))
import Text.Regex.TDFA.Pattern (Pattern(..), DoPa(..), showPattern)
import Text.Regex.TDFA.ReadRegex(parseRegex)
import Text.Regex.TDFA.TDFA(patternToRegex)

-- input string, regexexp, backward regex.
data SearchExp = SearchExp
    { SearchExp -> String
seInput        :: String
    , SearchExp -> Regex
seCompiled     :: Regex
    , SearchExp -> Regex
seBackCompiled :: Regex
    , SearchExp -> [SearchOption]
seOptions      :: [SearchOption]
    }

searchString :: SearchExp -> String
searchString :: SearchExp -> String
searchString = SearchExp -> String
seInput

searchRegex :: Direction -> SearchExp -> Regex
searchRegex :: Direction -> SearchExp -> Regex
searchRegex Direction
Forward = SearchExp -> Regex
seCompiled
searchRegex Direction
Backward = SearchExp -> Regex
seBackCompiled

--
-- What would be interesting would be to implement our own general
-- mechanism to allow users to supply a regex function of any kind, and
-- search with that. This removes the restriction on strings be valid
-- under regex(3).
--

data SearchOption
    = IgnoreCase   -- ^ Compile for matching that ignores char case
    | NoNewLine    -- ^ Compile for newline-insensitive matching
    | QuoteRegex   -- ^ Treat the input not as a regex but as a literal string to search for.
    deriving (SearchOption -> SearchOption -> Bool
(SearchOption -> SearchOption -> Bool)
-> (SearchOption -> SearchOption -> Bool) -> Eq SearchOption
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SearchOption -> SearchOption -> Bool
$c/= :: SearchOption -> SearchOption -> Bool
== :: SearchOption -> SearchOption -> Bool
$c== :: SearchOption -> SearchOption -> Bool
Eq, (forall x. SearchOption -> Rep SearchOption x)
-> (forall x. Rep SearchOption x -> SearchOption)
-> Generic SearchOption
forall x. Rep SearchOption x -> SearchOption
forall x. SearchOption -> Rep SearchOption x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep SearchOption x -> SearchOption
$cfrom :: forall x. SearchOption -> Rep SearchOption x
Generic)

instance Binary SearchOption

searchOpt :: SearchOption -> CompOption -> CompOption
searchOpt :: SearchOption -> CompOption -> CompOption
searchOpt SearchOption
IgnoreCase = \CompOption
o->CompOption
o{caseSensitive :: Bool
caseSensitive = Bool
False}
searchOpt SearchOption
NoNewLine = \CompOption
o->CompOption
o{multiline :: Bool
multiline = Bool
False}
searchOpt SearchOption
QuoteRegex = CompOption -> CompOption
forall a. a -> a
id

makeSearchOptsM :: [SearchOption] -> String -> Either String SearchExp
makeSearchOptsM :: [SearchOption] -> String -> Either String SearchExp
makeSearchOptsM [SearchOption]
opts String
re = (\(Pattern, (GroupIndex, DoPa))
p->SearchExp :: String -> Regex -> Regex -> [SearchOption] -> SearchExp
SearchExp { seInput :: String
seInput        = String
re
                                         , seCompiled :: Regex
seCompiled     = (Pattern, (GroupIndex, DoPa)) -> Regex
compile (Pattern, (GroupIndex, DoPa))
p
                                         , seBackCompiled :: Regex
seBackCompiled = (Pattern, (GroupIndex, DoPa)) -> Regex
compile ((Pattern, (GroupIndex, DoPa)) -> Regex)
-> (Pattern, (GroupIndex, DoPa)) -> Regex
forall a b. (a -> b) -> a -> b
$ (Pattern, (GroupIndex, DoPa)) -> (Pattern, (GroupIndex, DoPa))
forall t. (Pattern, (t, DoPa)) -> (Pattern, (t, DoPa))
reversePattern (Pattern, (GroupIndex, DoPa))
p
                                         , seOptions :: [SearchOption]
seOptions      = [SearchOption]
opts
                                         }) ((Pattern, (GroupIndex, DoPa)) -> SearchExp)
-> Either String (Pattern, (GroupIndex, DoPa))
-> Either String SearchExp
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Either String (Pattern, (GroupIndex, DoPa))
pattern
    where searchOpts :: [SearchOption] -> CompOption -> CompOption
searchOpts = (SearchOption
 -> (CompOption -> CompOption) -> CompOption -> CompOption)
-> (CompOption -> CompOption)
-> [SearchOption]
-> CompOption
-> CompOption
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((CompOption -> CompOption)
-> (CompOption -> CompOption) -> CompOption -> CompOption
forall b c a. (b -> c) -> (a -> b) -> a -> c
(.) ((CompOption -> CompOption)
 -> (CompOption -> CompOption) -> CompOption -> CompOption)
-> (SearchOption -> CompOption -> CompOption)
-> SearchOption
-> (CompOption -> CompOption)
-> CompOption
-> CompOption
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SearchOption -> CompOption -> CompOption
searchOpt) CompOption -> CompOption
forall a. a -> a
id
          compile :: (Pattern, (GroupIndex, DoPa)) -> Regex
compile (Pattern, (GroupIndex, DoPa))
source = (Pattern, (GroupIndex, DoPa)) -> CompOption -> ExecOption -> Regex
patternToRegex (Pattern, (GroupIndex, DoPa))
source ([SearchOption] -> CompOption -> CompOption
searchOpts [SearchOption]
opts CompOption
forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
compOpt
defaultCompOpt) ExecOption
forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
execOpt
defaultExecOpt
          pattern :: Either String (Pattern, (GroupIndex, DoPa))
pattern = if SearchOption
QuoteRegex SearchOption -> [SearchOption] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [SearchOption]
opts
                          then (Pattern, (GroupIndex, DoPa))
-> Either String (Pattern, (GroupIndex, DoPa))
forall a b. b -> Either a b
Right (String -> (Pattern, (GroupIndex, DoPa))
forall t. Num t => String -> (Pattern, (t, DoPa))
literalPattern String
re)
                          else (ParseError -> String)
-> Either ParseError (Pattern, (GroupIndex, DoPa))
-> Either String (Pattern, (GroupIndex, DoPa))
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first ParseError -> String
forall a. Show a => a -> String
show (String -> Either ParseError (Pattern, (GroupIndex, DoPa))
parseRegex String
re)

instance Binary SearchExp where
  get :: Get SearchExp
get = do String
re   <- Get String
forall t. Binary t => Get t
get
           [SearchOption]
opts <- Get [SearchOption]
forall t. Binary t => Get t
get
           SearchExp -> Get SearchExp
forall (m :: * -> *) a. Monad m => a -> m a
return (SearchExp -> Get SearchExp) -> SearchExp -> Get SearchExp
forall a b. (a -> b) -> a -> b
$ case [SearchOption] -> String -> Either String SearchExp
makeSearchOptsM [SearchOption]
opts String
re of
                      Left String
err -> String -> SearchExp
forall a. HasCallStack => String -> a
error String
err
                      Right SearchExp
se -> SearchExp
se
  put :: SearchExp -> Put
put (SearchExp { seInput :: SearchExp -> String
seInput   = String
re,
                   seOptions :: SearchExp -> [SearchOption]
seOptions = [SearchOption]
opts, Regex
seBackCompiled :: Regex
seCompiled :: Regex
seBackCompiled :: SearchExp -> Regex
seCompiled :: SearchExp -> Regex
.. }) = do String -> Put
forall t. Binary t => t -> Put
put String
re
                                                [SearchOption] -> Put
forall t. Binary t => t -> Put
put [SearchOption]
opts

-- | Return an escaped (for parseRegex use) version of the string.
regexEscapeString :: String -> String
regexEscapeString :: String -> String
regexEscapeString String
source = Pattern -> String
showPattern (Pattern -> String) -> (String -> Pattern) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Pattern
literalPattern' (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ String
source

-- | Return a pattern that matches its argument.
literalPattern :: (Num t) => String -> (Pattern, (t, DoPa))
literalPattern :: String -> (Pattern, (t, DoPa))
literalPattern String
source = (String -> Pattern
literalPattern' String
source, (t
0,GroupIndex -> DoPa
DoPa GroupIndex
0))

literalPattern' :: String -> Pattern
literalPattern' :: String -> Pattern
literalPattern' = [Pattern] -> Pattern
PConcat ([Pattern] -> Pattern)
-> (String -> [Pattern]) -> String -> Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Pattern) -> String -> [Pattern]
forall a b. (a -> b) -> [a] -> [b]
map (DoPa -> Char -> Pattern
PChar (GroupIndex -> DoPa
DoPa GroupIndex
0))

-- | Reverse a pattern. Note that the submatches will be reversed as well.
reversePattern :: (Pattern, (t, DoPa)) -> (Pattern, (t, DoPa))
reversePattern :: (Pattern, (t, DoPa)) -> (Pattern, (t, DoPa))
reversePattern (Pattern
pattern,(t, DoPa)
rest) = (Pattern -> Pattern
rev Pattern
pattern, (t, DoPa)
rest)
    where rev :: Pattern -> Pattern
rev (PConcat [Pattern]
l)      = [Pattern] -> Pattern
PConcat ([Pattern] -> [Pattern]
forall a. [a] -> [a]
reverse ((Pattern -> Pattern) -> [Pattern] -> [Pattern]
forall a b. (a -> b) -> [a] -> [b]
map Pattern -> Pattern
rev [Pattern]
l))
          rev (PCarat  DoPa
dp)     = DoPa -> Pattern
PDollar DoPa
dp
          rev (PDollar DoPa
dp)     = DoPa -> Pattern
PCarat  DoPa
dp
          rev (PEscape DoPa
dp Char
'<') = DoPa -> Char -> Pattern
PEscape DoPa
dp Char
'>'
          rev (PEscape DoPa
dp Char
'>') = DoPa -> Char -> Pattern
PEscape DoPa
dp Char
'<'
          rev (PGroup Maybe GroupIndex
a Pattern
x)     = Maybe GroupIndex -> Pattern -> Pattern
PGroup Maybe GroupIndex
a (Pattern -> Pattern
rev Pattern
x)
          rev (POr [Pattern]
l)          = [Pattern] -> Pattern
POr ((Pattern -> Pattern) -> [Pattern] -> [Pattern]
forall a b. (a -> b) -> [a] -> [b]
map Pattern -> Pattern
rev [Pattern]
l)
          rev (PQuest Pattern
x)       = Pattern -> Pattern
PQuest (Pattern -> Pattern
rev Pattern
x)
          rev (PPlus Pattern
x)        = Pattern -> Pattern
PPlus (Pattern -> Pattern
rev Pattern
x)
          rev (PStar Bool
b Pattern
x)      = Bool -> Pattern -> Pattern
PStar Bool
b (Pattern -> Pattern
rev Pattern
x)
          rev (PBound GroupIndex
i Maybe GroupIndex
m Pattern
x)   = GroupIndex -> Maybe GroupIndex -> Pattern -> Pattern
PBound GroupIndex
i Maybe GroupIndex
m (Pattern -> Pattern
rev Pattern
x)
          rev (PNonCapture Pattern
x)  = Pattern -> Pattern
PNonCapture (Pattern -> Pattern
rev Pattern
x)
          rev (PNonEmpty Pattern
x)    = Pattern -> Pattern
PNonEmpty (Pattern -> Pattern
rev Pattern
x)
          rev Pattern
x = Pattern
x

{-
Chris K Commentary:

I have one DIRE WARNING and one suggestion.

The DIRE WARNING is against using the reversed Pattern to find captured subexpressions.
It will work perfectly to find the longest match but give nonsense for captures.  In
particular matching text "abc" with "(.)*" forward returns the 1st capture as "c".
Searching "cba" with the reverse of "(.)*", which is identical, returns the 1st capture as "a".

Enough changes to the matching engine could allow for the reversed search on the
reversed text to return the same captures as the the forward search on the forward
text.  Rather than that tricky complexity, if you need the substring captures you
can use the reversed pattern to find a whole match and then run the forward pattern
on that substring.

The one suggestion is that the DoPa are irrelevant to the matching — they are there to
allow a person to understand how the output of each stage of the regex-tdfa code relates
to the input pattern.

-}

emptySearch :: SearchExp
emptySearch :: SearchExp
emptySearch = String -> Regex -> Regex -> [SearchOption] -> SearchExp
SearchExp String
"" Regex
emptyRegex Regex
emptyRegex []


-- | The regular expression that matches nothing.
emptyRegex :: Regex
Just Regex
emptyRegex = CompOption -> ExecOption -> String -> Maybe Regex
forall regex compOpt execOpt source (m :: * -> *).
(RegexMaker regex compOpt execOpt source, MonadFail m) =>
compOpt -> execOpt -> source -> m regex
makeRegexOptsM CompOption
forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
compOpt
defaultCompOpt ExecOption
forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
execOpt
defaultExecOpt String
"[[:empty:]]"