{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}

module Typst.Regex
  ( RE (..),
    RegexMatch (..),
    replaceRegex,
    splitRegex,
    makeLiteralRE,
    makeRE,
    match,
    matchAll,
    -- re-export
    extract,
  )
where

import qualified Data.Array as Array
import Data.Text (Text)
import qualified Data.Text as T
import Data.Typeable (Typeable)
import Text.Regex.TDFA (Regex, extract)
import qualified Text.Regex.TDFA as TDFA
import qualified Text.Regex.TDFA.Text as TDFA

-- import Debug.Trace

data RE = RE !Text !Regex
  deriving (Typeable)

instance Eq RE where
  RE Text
t1 Regex
_ == :: RE -> RE -> Bool
== RE Text
t2 Regex
_ = Text
t1 forall a. Eq a => a -> a -> Bool
== Text
t2

instance Ord RE where
  compare :: RE -> RE -> Ordering
compare (RE Text
t1 Regex
_) (RE Text
t2 Regex
_) = forall a. Ord a => a -> a -> Ordering
compare Text
t1 Text
t2

instance Show RE where
  show :: RE -> String
show (RE Text
t Regex
_) = String
"/" forall a. Semigroup a => a -> a -> a
<> Text -> String
T.unpack Text
t forall a. Semigroup a => a -> a -> a
<> String
"/"

data RegexMatch = RegexMatch
  { RegexMatch -> Int
matchStart :: Int,
    RegexMatch -> Int
matchEnd :: Int,
    RegexMatch -> Text
matchText :: Text,
    RegexMatch -> [Text]
matchCaptures :: [Text]
  }
  deriving (RegexMatch -> RegexMatch -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: RegexMatch -> RegexMatch -> Bool
$c/= :: RegexMatch -> RegexMatch -> Bool
== :: RegexMatch -> RegexMatch -> Bool
$c== :: RegexMatch -> RegexMatch -> Bool
Eq, Eq RegexMatch
RegexMatch -> RegexMatch -> Bool
RegexMatch -> RegexMatch -> Ordering
RegexMatch -> RegexMatch -> RegexMatch
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: RegexMatch -> RegexMatch -> RegexMatch
$cmin :: RegexMatch -> RegexMatch -> RegexMatch
max :: RegexMatch -> RegexMatch -> RegexMatch
$cmax :: RegexMatch -> RegexMatch -> RegexMatch
>= :: RegexMatch -> RegexMatch -> Bool
$c>= :: RegexMatch -> RegexMatch -> Bool
> :: RegexMatch -> RegexMatch -> Bool
$c> :: RegexMatch -> RegexMatch -> Bool
<= :: RegexMatch -> RegexMatch -> Bool
$c<= :: RegexMatch -> RegexMatch -> Bool
< :: RegexMatch -> RegexMatch -> Bool
$c< :: RegexMatch -> RegexMatch -> Bool
compare :: RegexMatch -> RegexMatch -> Ordering
$ccompare :: RegexMatch -> RegexMatch -> Ordering
Ord, Typeable)

replaceRegex :: RE -> Maybe Int -> (RegexMatch -> Text) -> Text -> Text
replaceRegex :: RE -> Maybe Int -> (RegexMatch -> Text) -> Text -> Text
replaceRegex (RE Text
_ Regex
re) Maybe Int
mbCount RegexMatch -> Text
replaceFn Text
strIn =
  let matches :: [MatchArray]
matches = forall b a. b -> (a -> b) -> Maybe a -> b
maybe forall a. a -> a
id forall a. Int -> [a] -> [a]
take Maybe Int
mbCount forall a b. (a -> b) -> a -> b
$ forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
TDFA.matchAll Regex
re Text
strIn
      getCaptures :: Array i (Int, Int) -> [Text]
getCaptures Array i (Int, Int)
m =
        forall a b. (a -> b) -> [a] -> [b]
map
          (\(Int
off, Int
len) -> forall source. Extract source => (Int, Int) -> source -> source
extract (Int
off, Int
len) Text
strIn)
          (forall a. Int -> [a] -> [a]
drop Int
1 (forall i e. Array i e -> [e]
Array.elems Array i (Int, Int)
m))
      go :: Int -> [Array i (Int, Int)] -> Text
go Int
i [] = Int -> Text -> Text
T.drop Int
i Text
strIn
      go Int
i (Array i (Int, Int)
m : [Array i (Int, Int)]
rest) =
        seq :: forall a b. a -> b -> b
seq Int
i forall a b. (a -> b) -> a -> b
$
          let (Int
off, Int
len) = Array i (Int, Int)
m forall i e. Ix i => Array i e -> i -> e
Array.! i
0
           in ( if Int
off forall a. Ord a => a -> a -> Bool
> Int
i
                  then Int -> Int -> Text -> Text
slice Int
i (Int
off forall a. Num a => a -> a -> a
- Int
i) Text
strIn
                  else forall a. Monoid a => a
mempty
              )
                forall a. Semigroup a => a -> a -> a
<> RegexMatch -> Text
replaceFn
                  RegexMatch
                    { matchStart :: Int
matchStart = Int
off,
                      matchEnd :: Int
matchEnd = Int
off forall a. Num a => a -> a -> a
+ Int
len,
                      matchText :: Text
matchText = forall source. Extract source => (Int, Int) -> source -> source
extract (Int
off, Int
len) Text
strIn,
                      matchCaptures :: [Text]
matchCaptures = forall {i}. Array i (Int, Int) -> [Text]
getCaptures Array i (Int, Int)
m
                    }
                forall a. Semigroup a => a -> a -> a
<> Int -> [Array i (Int, Int)] -> Text
go (Int
off forall a. Num a => a -> a -> a
+ Int
len) [Array i (Int, Int)]
rest
      slice :: Int -> Int -> Text -> Text
slice Int
pos Int
len = Int -> Text -> Text
T.take Int
len forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
T.drop Int
pos
   in forall {i}. (Ix i, Num i) => Int -> [Array i (Int, Int)] -> Text
go Int
0 [MatchArray]
matches

makeRE :: MonadFail m => Text -> m RE
makeRE :: forall (m :: * -> *). MonadFail m => Text -> m RE
makeRE Text
t =
  Text -> Regex -> RE
RE Text
t'
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either
      forall (m :: * -> *) a. MonadFail m => String -> m a
fail
      forall (f :: * -> *) a. Applicative f => a -> f a
pure
      (CompOption -> ExecOption -> Text -> Either String Regex
TDFA.compile CompOption
compopts forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
execOpt
TDFA.defaultExecOpt Text
t')
  where
    (Bool
caseSensitive, Text
t') =
      if Text
"(?i)" Text -> Text -> Bool
`T.isPrefixOf` Text
t
        then (Bool
False, String -> Text
T.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
go forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
T.drop Int
4 Text
t)
        else (Bool
True, String -> Text
T.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
go forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack forall a b. (a -> b) -> a -> b
$ Text
t)
    compopts :: CompOption
compopts = forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
compOpt
TDFA.defaultCompOpt {caseSensitive :: Bool
TDFA.caseSensitive = Bool
caseSensitive}
    -- handle things not supported in TFFA (posix) regexes, e.g. \d \w \s, +, ?
    go :: ShowS
go [] = []
    go (Char
'?' : String
cs) = String
"{0,1}" forall a. [a] -> [a] -> [a]
++ ShowS
go String
cs
    go (Char
'+' : String
cs) = String
"{1,}" forall a. [a] -> [a] -> [a]
++ ShowS
go String
cs
    go (Char
'\\' : Char
c : String
cs)
      | Char
c forall a. Eq a => a -> a -> Bool
== Char
'd' = String
"[[:digit:]]" forall a. [a] -> [a] -> [a]
++ ShowS
go String
cs
      | Char
c forall a. Eq a => a -> a -> Bool
== Char
'D' = String
"[^[:digit:]]" forall a. [a] -> [a] -> [a]
++ ShowS
go String
cs
      | Char
c forall a. Eq a => a -> a -> Bool
== Char
's' = String
"[[:space:]]" forall a. [a] -> [a] -> [a]
++ ShowS
go String
cs
      | Char
c forall a. Eq a => a -> a -> Bool
== Char
'S' = String
"[^[:space:]]" forall a. [a] -> [a] -> [a]
++ ShowS
go String
cs
      | Char
c forall a. Eq a => a -> a -> Bool
== Char
'w' = String
"[[:word:]]" forall a. [a] -> [a] -> [a]
++ ShowS
go String
cs
      | Char
c forall a. Eq a => a -> a -> Bool
== Char
'W' = String
"[^[:word:]]" forall a. [a] -> [a] -> [a]
++ ShowS
go String
cs
      | Bool
otherwise = Char
'\\' forall a. a -> [a] -> [a]
: Char
c forall a. a -> [a] -> [a]
: ShowS
go String
cs
    go (Char
c : String
cs) = Char
c forall a. a -> [a] -> [a]
: ShowS
go String
cs

match :: TDFA.RegexContext Regex source target => RE -> source -> target
match :: forall source target.
RegexContext Regex source target =>
RE -> source -> target
match (RE Text
_ Regex
re) source
t = forall regex source target.
RegexContext regex source target =>
regex -> source -> target
TDFA.match Regex
re source
t

matchAll :: TDFA.RegexLike Regex source => RE -> source -> [TDFA.MatchArray]
matchAll :: forall source.
RegexLike Regex source =>
RE -> source -> [MatchArray]
matchAll (RE Text
_ Regex
re) source
t = forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
TDFA.matchAll Regex
re source
t

makeLiteralRE :: MonadFail m => Text -> m RE
makeLiteralRE :: forall (m :: * -> *). MonadFail m => Text -> m RE
makeLiteralRE Text
t
  | Text -> Bool
T.null Text
t = forall (m :: * -> *). MonadFail m => Text -> m RE
makeRE Text
".{0,0}" -- experimentally behaves as typst does
  | Bool
otherwise = forall (m :: * -> *). MonadFail m => Text -> m RE
makeRE forall a b. (a -> b) -> a -> b
$ forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl Text -> Char -> Text
go forall a. Monoid a => a
mempty Text
t
  where
    go :: Text -> Char -> Text
go Text
acc Char
c = if Char -> Bool
isSpecial Char
c then Text
acc forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack [Char
'\\', Char
c] else Text -> Char -> Text
T.snoc Text
acc Char
c
    isSpecial :: Char -> Bool
isSpecial Char
c = Char
c forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
".*?+{}[]\\" :: [Char])

-- from regex-compat but for Text
splitRegex :: RE -> Text -> [Text]
splitRegex :: RE -> Text -> [Text]
splitRegex (RE Text
_ Regex
delim) Text
strIn =
  let matches :: [(Int, Int)]
matches = forall a b. (a -> b) -> [a] -> [b]
map (forall i e. Ix i => Array i e -> i -> e
Array.! Int
0) (forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
TDFA.matchAll Regex
delim Text
strIn)
      go :: Int -> Text -> [(Int, Int)] -> [Text]
go Int
_i Text
str [] = Text
str forall a. a -> [a] -> [a]
: []
      go Int
i Text
str ((Int
off, Int
len) : [(Int, Int)]
rest) =
        let i' :: Int
i' = Int
off forall a. Num a => a -> a -> a
+ Int
len
            firstline :: Text
firstline = Int -> Text -> Text
T.take (Int
off forall a. Num a => a -> a -> a
- Int
i) Text
str
            remainder :: Text
remainder = Int -> Text -> Text
T.drop (Int
i' forall a. Num a => a -> a -> a
- Int
i) Text
str
         in seq :: forall a b. a -> b -> b
seq Int
i' forall a b. (a -> b) -> a -> b
$
              if Text -> Bool
T.null Text
remainder
                then [Text
firstline, Text
""]
                else Text
firstline forall a. a -> [a] -> [a]
: Int -> Text -> [(Int, Int)] -> [Text]
go Int
i' Text
remainder [(Int, Int)]
rest
   in Int -> Text -> [(Int, Int)] -> [Text]
go Int
0 Text
strIn [(Int, Int)]
matches