{-# LANGUAGE CPP                 #-}
{-# LANGUAGE DeriveDataTypeable  #-}
{-# LANGUAGE DeriveGeneric       #-}
{-# LANGUAGE OverloadedStrings   #-}
{-# LANGUAGE ScopedTypeVariables #-}

module Skylighting.Regex (
                Regex
              , RegexException
              , RE(..)
              , compileRegex
              , matchRegex
              , convertOctalEscapes
              ) where

import qualified Control.Exception as E
import Data.Aeson
import Data.Binary (Binary)
import qualified Data.ByteString.Base64 as Base64
import qualified Data.ByteString.Char8 as BS
import Data.ByteString.UTF8 (toString)
import Data.Data
import qualified Data.Text as Text
import qualified Data.Text.Encoding as TE
import GHC.Generics (Generic)
import System.IO.Unsafe (unsafePerformIO)
import Text.Printf
import Text.Regex.PCRE.ByteString
#if !MIN_VERSION_base(4,13,0)
import Control.Monad.Fail (MonadFail)
#endif

-- | An exception in compiling or executing a regex.
newtype RegexException = RegexException String
      deriving (Int -> RegexException -> ShowS
[RegexException] -> ShowS
RegexException -> String
(Int -> RegexException -> ShowS)
-> (RegexException -> String)
-> ([RegexException] -> ShowS)
-> Show RegexException
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RegexException] -> ShowS
$cshowList :: [RegexException] -> ShowS
show :: RegexException -> String
$cshow :: RegexException -> String
showsPrec :: Int -> RegexException -> ShowS
$cshowsPrec :: Int -> RegexException -> ShowS
Show, Typeable, (forall x. RegexException -> Rep RegexException x)
-> (forall x. Rep RegexException x -> RegexException)
-> Generic RegexException
forall x. Rep RegexException x -> RegexException
forall x. RegexException -> Rep RegexException x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep RegexException x -> RegexException
$cfrom :: forall x. RegexException -> Rep RegexException x
Generic)

instance E.Exception RegexException

-- | A representation of a regular expression.
data RE = RE{
    RE -> ByteString
reString        :: BS.ByteString
  , RE -> Bool
reCaseSensitive :: Bool
} deriving (Int -> RE -> ShowS
[RE] -> ShowS
RE -> String
(Int -> RE -> ShowS)
-> (RE -> String) -> ([RE] -> ShowS) -> Show RE
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RE] -> ShowS
$cshowList :: [RE] -> ShowS
show :: RE -> String
$cshow :: RE -> String
showsPrec :: Int -> RE -> ShowS
$cshowsPrec :: Int -> RE -> ShowS
Show, ReadPrec [RE]
ReadPrec RE
Int -> ReadS RE
ReadS [RE]
(Int -> ReadS RE)
-> ReadS [RE] -> ReadPrec RE -> ReadPrec [RE] -> Read RE
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [RE]
$creadListPrec :: ReadPrec [RE]
readPrec :: ReadPrec RE
$creadPrec :: ReadPrec RE
readList :: ReadS [RE]
$creadList :: ReadS [RE]
readsPrec :: Int -> ReadS RE
$creadsPrec :: Int -> ReadS RE
Read, Eq RE
Eq RE
-> (RE -> RE -> Ordering)
-> (RE -> RE -> Bool)
-> (RE -> RE -> Bool)
-> (RE -> RE -> Bool)
-> (RE -> RE -> Bool)
-> (RE -> RE -> RE)
-> (RE -> RE -> RE)
-> Ord RE
RE -> RE -> Bool
RE -> RE -> Ordering
RE -> RE -> RE
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: RE -> RE -> RE
$cmin :: RE -> RE -> RE
max :: RE -> RE -> RE
$cmax :: RE -> RE -> RE
>= :: RE -> RE -> Bool
$c>= :: RE -> RE -> Bool
> :: RE -> RE -> Bool
$c> :: RE -> RE -> Bool
<= :: RE -> RE -> Bool
$c<= :: RE -> RE -> Bool
< :: RE -> RE -> Bool
$c< :: RE -> RE -> Bool
compare :: RE -> RE -> Ordering
$ccompare :: RE -> RE -> Ordering
$cp1Ord :: Eq RE
Ord, RE -> RE -> Bool
(RE -> RE -> Bool) -> (RE -> RE -> Bool) -> Eq RE
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: RE -> RE -> Bool
$c/= :: RE -> RE -> Bool
== :: RE -> RE -> Bool
$c== :: RE -> RE -> Bool
Eq, Typeable RE
DataType
Constr
Typeable RE
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g) -> RE -> c RE)
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c RE)
-> (RE -> Constr)
-> (RE -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c RE))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c RE))
-> ((forall b. Data b => b -> b) -> RE -> RE)
-> (forall r r'.
    (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> RE -> r)
-> (forall r r'.
    (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> RE -> r)
-> (forall u. (forall d. Data d => d -> u) -> RE -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> RE -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d) -> RE -> m RE)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> RE -> m RE)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> RE -> m RE)
-> Data RE
RE -> DataType
RE -> Constr
(forall b. Data b => b -> b) -> RE -> RE
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> RE -> c RE
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c RE
forall a.
Typeable a
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g) -> a -> c a)
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c a)
-> (a -> Constr)
-> (a -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c a))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c a))
-> ((forall b. Data b => b -> b) -> a -> a)
-> (forall r r'.
    (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall r r'.
    (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall u. (forall d. Data d => d -> u) -> a -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> a -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> Data a
forall u. Int -> (forall d. Data d => d -> u) -> RE -> u
forall u. (forall d. Data d => d -> u) -> RE -> [u]
forall r r'.
(r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> RE -> r
forall r r'.
(r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> RE -> r
forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> RE -> m RE
forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> RE -> m RE
forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c RE
forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> RE -> c RE
forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c RE)
forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c RE)
$cRE :: Constr
$tRE :: DataType
gmapMo :: (forall d. Data d => d -> m d) -> RE -> m RE
$cgmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> RE -> m RE
gmapMp :: (forall d. Data d => d -> m d) -> RE -> m RE
$cgmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> RE -> m RE
gmapM :: (forall d. Data d => d -> m d) -> RE -> m RE
$cgmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> RE -> m RE
gmapQi :: Int -> (forall d. Data d => d -> u) -> RE -> u
$cgmapQi :: forall u. Int -> (forall d. Data d => d -> u) -> RE -> u
gmapQ :: (forall d. Data d => d -> u) -> RE -> [u]
$cgmapQ :: forall u. (forall d. Data d => d -> u) -> RE -> [u]
gmapQr :: (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> RE -> r
$cgmapQr :: forall r r'.
(r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> RE -> r
gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> RE -> r
$cgmapQl :: forall r r'.
(r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> RE -> r
gmapT :: (forall b. Data b => b -> b) -> RE -> RE
$cgmapT :: (forall b. Data b => b -> b) -> RE -> RE
dataCast2 :: (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c RE)
$cdataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c RE)
dataCast1 :: (forall d. Data d => c (t d)) -> Maybe (c RE)
$cdataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c RE)
dataTypeOf :: RE -> DataType
$cdataTypeOf :: RE -> DataType
toConstr :: RE -> Constr
$ctoConstr :: RE -> Constr
gunfold :: (forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c RE
$cgunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c RE
gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> RE -> c RE
$cgfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> RE -> c RE
$cp1Data :: Typeable RE
Data, Typeable, (forall x. RE -> Rep RE x)
-> (forall x. Rep RE x -> RE) -> Generic RE
forall x. Rep RE x -> RE
forall x. RE -> Rep RE x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep RE x -> RE
$cfrom :: forall x. RE -> Rep RE x
Generic)

instance Binary RE

instance ToJSON RE where
  toJSON :: RE -> Value
toJSON RE
re = [Pair] -> Value
object [ Text
"reString"        Text -> Text -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
.= ByteString -> Text
encodeToText (RE -> ByteString
reString RE
re)
                     , Text
"reCaseSensitive" Text -> Bool -> Pair
forall kv v. (KeyValue kv, ToJSON v) => Text -> v -> kv
.= RE -> Bool
reCaseSensitive RE
re ]
instance FromJSON RE where
  parseJSON :: Value -> Parser RE
parseJSON = String -> (Object -> Parser RE) -> Value -> Parser RE
forall a. String -> (Object -> Parser a) -> Value -> Parser a
withObject String
"RE" ((Object -> Parser RE) -> Value -> Parser RE)
-> (Object -> Parser RE) -> Value -> Parser RE
forall a b. (a -> b) -> a -> b
$ \Object
v ->
    ByteString -> Bool -> RE
RE (ByteString -> Bool -> RE)
-> Parser ByteString -> Parser (Bool -> RE)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ((Object
v Object -> Text -> Parser Text
forall a. FromJSON a => Object -> Text -> Parser a
.: Text
"reString") Parser Text -> (Text -> Parser ByteString) -> Parser ByteString
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Text -> Parser ByteString
forall (m :: * -> *).
(Monad m, MonadFail m) =>
Text -> m ByteString
decodeFromText)
       Parser (Bool -> RE) -> Parser Bool -> Parser RE
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
v Object -> Text -> Parser Bool
forall a. FromJSON a => Object -> Text -> Parser a
.: Text
"reCaseSensitive"

-- | Compile a PCRE regex.  If the first parameter is True, the regex is
-- case-sensitive, otherwise caseless.  The regex is compiled from
-- a bytestring interpreted as UTF-8.  If the regex cannot be compiled,
-- a 'RegexException' is thrown.
compileRegex :: Bool -> BS.ByteString -> Regex
compileRegex :: Bool -> ByteString -> Regex
compileRegex Bool
caseSensitive ByteString
regexpStr =
  let opts :: CompOption
opts = CompOption
compAnchored CompOption -> CompOption -> CompOption
forall a. Num a => a -> a -> a
+ CompOption
compUTF8 CompOption -> CompOption -> CompOption
forall a. Num a => a -> a -> a
+
               if Bool
caseSensitive then CompOption
0 else CompOption
compCaseless
  in  case IO (Either (Int, String) Regex) -> Either (Int, String) Regex
forall a. IO a -> a
unsafePerformIO (IO (Either (Int, String) Regex) -> Either (Int, String) Regex)
-> IO (Either (Int, String) Regex) -> Either (Int, String) Regex
forall a b. (a -> b) -> a -> b
$ CompOption
-> ExecOption -> ByteString -> IO (Either (Int, String) Regex)
compile CompOption
opts (ExecOption
execNotEmpty) ByteString
regexpStr of
            Left (Int
off,String
msg) -> RegexException -> Regex
forall a e. Exception e => e -> a
E.throw (RegexException -> Regex) -> RegexException -> Regex
forall a b. (a -> b) -> a -> b
$ String -> RegexException
RegexException (String -> RegexException) -> String -> RegexException
forall a b. (a -> b) -> a -> b
$
                        String
"Error compiling regex /" String -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> String
toString ByteString
regexpStr String -> ShowS
forall a. [a] -> [a] -> [a]
++
                        String
"/ at offset " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
off String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"\n" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
msg
            Right Regex
r -> Regex
r

-- | Convert octal escapes to the form pcre wants.  Note:
-- need at least pcre 8.34 for the form \o{dddd}.
-- So we prefer \ddd or \x{...}.
convertOctalEscapes :: String -> String
convertOctalEscapes :: ShowS
convertOctalEscapes [] = String
""
convertOctalEscapes (Char
'\\':Char
'0':Char
x:Char
y:Char
z:String
rest)
  | (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isOctalDigit [Char
x,Char
y,Char
z] = Char
'\\'Char -> ShowS
forall a. a -> [a] -> [a]
:Char
xChar -> ShowS
forall a. a -> [a] -> [a]
:Char
yChar -> ShowS
forall a. a -> [a] -> [a]
:Char
zChar -> ShowS
forall a. a -> [a] -> [a]
: ShowS
convertOctalEscapes String
rest
convertOctalEscapes (Char
'\\':Char
x:Char
y:Char
z:String
rest)
  | (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isOctalDigit [Char
x,Char
y,Char
z] =Char
'\\'Char -> ShowS
forall a. a -> [a] -> [a]
:Char
xChar -> ShowS
forall a. a -> [a] -> [a]
:Char
yChar -> ShowS
forall a. a -> [a] -> [a]
:Char
zChar -> ShowS
forall a. a -> [a] -> [a]
: ShowS
convertOctalEscapes String
rest
convertOctalEscapes (Char
'\\':Char
'o':Char
'{':String
zs) =
  case (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'}') String
zs of
       (String
ds, Char
'}':String
rest) | (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isOctalDigit String
ds Bool -> Bool -> Bool
&& Bool -> Bool
not (String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
ds) ->
            case ReadS Int
forall a. Read a => ReadS a
reads (Char
'0'Char -> ShowS
forall a. a -> [a] -> [a]
:Char
'o'Char -> ShowS
forall a. a -> [a] -> [a]
:String
ds) of
                 ((Int
n :: Int,[]):[(Int, String)]
_) ->
                     String -> Int -> String
forall r. PrintfType r => String -> r
printf String
"\\x{%x}" Int
n String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
convertOctalEscapes String
rest
                 [(Int, String)]
_          -> RegexException -> String
forall a e. Exception e => e -> a
E.throw (RegexException -> String) -> RegexException -> String
forall a b. (a -> b) -> a -> b
$ String -> RegexException
RegexException (String -> RegexException) -> String -> RegexException
forall a b. (a -> b) -> a -> b
$
                                   String
"Unable to read octal number: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
ds
       (String, String)
_  -> Char
'\\'Char -> ShowS
forall a. a -> [a] -> [a]
:Char
'o'Char -> ShowS
forall a. a -> [a] -> [a]
:Char
'{'Char -> ShowS
forall a. a -> [a] -> [a]
: ShowS
convertOctalEscapes String
zs
convertOctalEscapes (Char
x:String
xs) = Char
x Char -> ShowS
forall a. a -> [a] -> [a]
: ShowS
convertOctalEscapes String
xs

isOctalDigit :: Char -> Bool
isOctalDigit :: Char -> Bool
isOctalDigit Char
c = Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'7'

-- | Match a 'Regex' against a bytestring.  Returns 'Nothing' if
-- no match, otherwise 'Just' a nonempty list of bytestrings. The first
-- bytestring in the list is the match, the others the captures, if any.
-- If there are errors in executing the regex, a 'RegexException' is
-- thrown.
matchRegex :: Regex -> BS.ByteString -> Maybe [BS.ByteString]
matchRegex :: Regex -> ByteString -> Maybe [ByteString]
matchRegex Regex
r ByteString
s = case IO
  (Either
     WrapError
     (Maybe (ByteString, ByteString, ByteString, [ByteString])))
-> Either
     WrapError
     (Maybe (ByteString, ByteString, ByteString, [ByteString]))
forall a. IO a -> a
unsafePerformIO (Regex
-> ByteString
-> IO
     (Either
        WrapError
        (Maybe (ByteString, ByteString, ByteString, [ByteString])))
regexec Regex
r ByteString
s) of
                      Right (Just (ByteString
_, ByteString
mat, ByteString
_ , [ByteString]
capts)) ->
                                       [ByteString] -> Maybe [ByteString]
forall a. a -> Maybe a
Just (ByteString
mat ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
capts)
                      Right Maybe (ByteString, ByteString, ByteString, [ByteString])
Nothing    -> Maybe [ByteString]
forall a. Maybe a
Nothing
                      -- treat match error as no match, like Kate: #81
                      Left (ReturnCode
_rc, String
_msg) -> Maybe [ByteString]
forall a. Maybe a
Nothing

-- functions to marshall bytestrings to text

encodeToText :: BS.ByteString -> Text.Text
encodeToText :: ByteString -> Text
encodeToText = ByteString -> Text
TE.decodeUtf8 (ByteString -> Text)
-> (ByteString -> ByteString) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
Base64.encode

decodeFromText :: (Monad m, MonadFail m) => Text.Text -> m BS.ByteString
decodeFromText :: Text -> m ByteString
decodeFromText = (String -> m ByteString)
-> (ByteString -> m ByteString)
-> Either String ByteString
-> m ByteString
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> m ByteString
forall (m :: * -> *) a. MonadFail m => String -> m a
fail ByteString -> m ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String ByteString -> m ByteString)
-> (Text -> Either String ByteString) -> Text -> m ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Either String ByteString
Base64.decode (ByteString -> Either String ByteString)
-> (Text -> ByteString) -> Text -> Either String ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TE.encodeUtf8