{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}

module Typst.Regex
  ( RE (..),
    RegexMatch (..),
    replaceRegex,
    splitRegex,
    makeLiteralRE,
    makeRE,
    match,
    matchAll,
    -- re-export
    extract,
  )
where

import qualified Data.Array as Array
import Data.Text (Text)
import qualified Data.Text as T
import Data.Typeable (Typeable)
import Text.Regex.TDFA (Regex, extract)
import qualified Text.Regex.TDFA as TDFA
import qualified Text.Regex.TDFA.Text as TDFA

-- import Debug.Trace

-- | A regular expression. Note that typst-hs does not use the same Regex engine
-- as Typst. See issue [#28](https://github.com/jgm/typst-hs/issues/28).
data RE = RE !Text !Regex
  deriving (Typeable)

instance Eq RE where
  RE Text
t1 Regex
_ == :: RE -> RE -> Bool
== RE Text
t2 Regex
_ = Text
t1 Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
t2

instance Ord RE where
  compare :: RE -> RE -> Ordering
compare (RE Text
t1 Regex
_) (RE Text
t2 Regex
_) = Text -> Text -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Text
t1 Text
t2

instance Show RE where
  show :: RE -> String
show (RE Text
t Regex
_) = String
"/" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Text -> String
T.unpack Text
t String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"/"

data RegexMatch = RegexMatch
  { RegexMatch -> Int
matchStart :: Int,
    RegexMatch -> Int
matchEnd :: Int,
    RegexMatch -> Text
matchText :: Text,
    RegexMatch -> [Text]
matchCaptures :: [Text]
  }
  deriving (RegexMatch -> RegexMatch -> Bool
(RegexMatch -> RegexMatch -> Bool)
-> (RegexMatch -> RegexMatch -> Bool) -> Eq RegexMatch
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: RegexMatch -> RegexMatch -> Bool
== :: RegexMatch -> RegexMatch -> Bool
$c/= :: RegexMatch -> RegexMatch -> Bool
/= :: RegexMatch -> RegexMatch -> Bool
Eq, Eq RegexMatch
Eq RegexMatch =>
(RegexMatch -> RegexMatch -> Ordering)
-> (RegexMatch -> RegexMatch -> Bool)
-> (RegexMatch -> RegexMatch -> Bool)
-> (RegexMatch -> RegexMatch -> Bool)
-> (RegexMatch -> RegexMatch -> Bool)
-> (RegexMatch -> RegexMatch -> RegexMatch)
-> (RegexMatch -> RegexMatch -> RegexMatch)
-> Ord RegexMatch
RegexMatch -> RegexMatch -> Bool
RegexMatch -> RegexMatch -> Ordering
RegexMatch -> RegexMatch -> RegexMatch
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: RegexMatch -> RegexMatch -> Ordering
compare :: RegexMatch -> RegexMatch -> Ordering
$c< :: RegexMatch -> RegexMatch -> Bool
< :: RegexMatch -> RegexMatch -> Bool
$c<= :: RegexMatch -> RegexMatch -> Bool
<= :: RegexMatch -> RegexMatch -> Bool
$c> :: RegexMatch -> RegexMatch -> Bool
> :: RegexMatch -> RegexMatch -> Bool
$c>= :: RegexMatch -> RegexMatch -> Bool
>= :: RegexMatch -> RegexMatch -> Bool
$cmax :: RegexMatch -> RegexMatch -> RegexMatch
max :: RegexMatch -> RegexMatch -> RegexMatch
$cmin :: RegexMatch -> RegexMatch -> RegexMatch
min :: RegexMatch -> RegexMatch -> RegexMatch
Ord, Typeable)

replaceRegex :: RE -> Maybe Int -> (RegexMatch -> Text) -> Text -> Text
replaceRegex :: RE -> Maybe Int -> (RegexMatch -> Text) -> Text -> Text
replaceRegex (RE Text
_ Regex
re) Maybe Int
mbCount RegexMatch -> Text
replaceFn Text
strIn =
  let matches :: [MatchArray]
matches = ([MatchArray] -> [MatchArray])
-> (Int -> [MatchArray] -> [MatchArray])
-> Maybe Int
-> [MatchArray]
-> [MatchArray]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [MatchArray] -> [MatchArray]
forall a. a -> a
id Int -> [MatchArray] -> [MatchArray]
forall a. Int -> [a] -> [a]
take Maybe Int
mbCount ([MatchArray] -> [MatchArray]) -> [MatchArray] -> [MatchArray]
forall a b. (a -> b) -> a -> b
$ Regex -> Text -> [MatchArray]
forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
TDFA.matchAll Regex
re Text
strIn
      getCaptures :: Array i (Int, Int) -> [Text]
getCaptures Array i (Int, Int)
m =
        ((Int, Int) -> Text) -> [(Int, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map
          (\(Int
off, Int
len) -> (Int, Int) -> Text -> Text
forall source. Extract source => (Int, Int) -> source -> source
extract (Int
off, Int
len) Text
strIn)
          (Int -> [(Int, Int)] -> [(Int, Int)]
forall a. Int -> [a] -> [a]
drop Int
1 (Array i (Int, Int) -> [(Int, Int)]
forall i e. Array i e -> [e]
Array.elems Array i (Int, Int)
m))
      go :: Int -> [Array i (Int, Int)] -> Text
go Int
i [] = Int -> Text -> Text
T.drop Int
i Text
strIn
      go Int
i (Array i (Int, Int)
m : [Array i (Int, Int)]
rest) =
        Int -> Text -> Text
forall a b. a -> b -> b
seq Int
i (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$
          let (Int
off, Int
len) = Array i (Int, Int)
m Array i (Int, Int) -> i -> (Int, Int)
forall i e. Ix i => Array i e -> i -> e
Array.! i
0
           in ( if Int
off Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
i
                  then Int -> Int -> Text -> Text
slice Int
i (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i) Text
strIn
                  else Text
forall a. Monoid a => a
mempty
              )
                Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> RegexMatch -> Text
replaceFn
                  RegexMatch
                    { matchStart :: Int
matchStart = Int
off,
                      matchEnd :: Int
matchEnd = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len,
                      matchText :: Text
matchText = (Int, Int) -> Text -> Text
forall source. Extract source => (Int, Int) -> source -> source
extract (Int
off, Int
len) Text
strIn,
                      matchCaptures :: [Text]
matchCaptures = Array i (Int, Int) -> [Text]
forall {i}. Array i (Int, Int) -> [Text]
getCaptures Array i (Int, Int)
m
                    }
                Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Int -> [Array i (Int, Int)] -> Text
go (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len) [Array i (Int, Int)]
rest
      slice :: Int -> Int -> Text -> Text
slice Int
pos Int
len = Int -> Text -> Text
T.take Int
len (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
T.drop Int
pos
   in Int -> [MatchArray] -> Text
forall {i}. (Ix i, Num i) => Int -> [Array i (Int, Int)] -> Text
go Int
0 [MatchArray]
matches

makeRE :: MonadFail m => Text -> m RE
makeRE :: forall (m :: * -> *). MonadFail m => Text -> m RE
makeRE Text
t =
  Text -> Regex -> RE
RE Text
t'
    (Regex -> RE) -> m Regex -> m RE
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (String -> m Regex)
-> (Regex -> m Regex) -> Either String Regex -> m Regex
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either
      String -> m Regex
forall a. String -> m a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail
      Regex -> m Regex
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure
      (CompOption -> ExecOption -> Text -> Either String Regex
TDFA.compile CompOption
compopts ExecOption
forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
execOpt
TDFA.defaultExecOpt Text
t')
  where
    (Bool
caseSensitive, Text
t') =
      if Text
"(?i)" Text -> Text -> Bool
`T.isPrefixOf` Text
t
        then (Bool
False, String -> Text
T.pack (String -> Text) -> (Text -> String) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> ShowS
go Bool
False ShowS -> (Text -> String) -> Text -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
T.drop Int
4 Text
t)
        else (Bool
True, String -> Text
T.pack (String -> Text) -> (Text -> String) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> ShowS
go Bool
False ShowS -> (Text -> String) -> Text -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text
t)
    compopts :: CompOption
compopts = CompOption
forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
compOpt
TDFA.defaultCompOpt {TDFA.caseSensitive = caseSensitive}

    -- Handle things not supported in TFFA posix regexes, e.g. \d \w \s, +, ?
    -- Note that we have to track whether we're in a character class, because
    -- the expansions will be different in that case.  The first
    -- parameter of `go` is True if in a character class.
    go :: Bool -> ShowS
go Bool
_ [] = []
    go Bool
True (Char
']' : String
cs) = Char
']' Char -> ShowS
forall a. a -> [a] -> [a]
: Bool -> ShowS
go Bool
False String
cs
    go Bool
False (Char
'[' : String
cs) = Char
'[' Char -> ShowS
forall a. a -> [a] -> [a]
:
      case String
cs of
        Char
'^':Char
']':String
ds -> Char
'^' Char -> ShowS
forall a. a -> [a] -> [a]
: Char
']' Char -> ShowS
forall a. a -> [a] -> [a]
: Bool -> ShowS
go Bool
True String
ds
        Char
'^':Char
'\\':Char
']':String
ds -> Char
'^' Char -> ShowS
forall a. a -> [a] -> [a]
: Char
']' Char -> ShowS
forall a. a -> [a] -> [a]
: Bool -> ShowS
go Bool
True String
ds
        Char
']':String
ds -> Char
']' Char -> ShowS
forall a. a -> [a] -> [a]
: Bool -> ShowS
go Bool
True String
ds
        Char
'\\':Char
']':String
ds -> Char
']' Char -> ShowS
forall a. a -> [a] -> [a]
: Bool -> ShowS
go Bool
True String
ds
        String
_ -> Bool -> ShowS
go Bool
True String
cs
    go Bool
False (Char
'?' : String
cs) = String
"{0,1}" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Bool -> ShowS
go Bool
False String
cs
    go Bool
False (Char
'+' : String
cs) = String
"{1,}" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Bool -> ShowS
go Bool
False String
cs
    go Bool
inCharClass (Char
'\\' : Char
c : String
cs)
      = let f :: ShowS
f = if Bool
inCharClass
                   then ShowS
forall a. a -> a
id
                   else \String
x -> String
"[" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
x String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"]"
            r :: String
r = case Char
c of
                  Char
'd' -> ShowS
f String
"[:digit:]"
                  Char
'D' -> ShowS
f String
"^[:digit:]"
                  Char
's' -> ShowS
f String
"[:space:]"
                  Char
'S' -> ShowS
f String
"^[:space:]"
                  Char
'w' -> ShowS
f String
"[:word:]"
                  Char
'W' -> ShowS
f String
"^[:word:]"
                  Char
_ -> [Char
'\\', Char
c]
        in String
r String -> ShowS
forall a. [a] -> [a] -> [a]
++ Bool -> ShowS
go Bool
inCharClass String
cs
    go Bool
inCharClass (Char
c : String
cs) = Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: Bool -> ShowS
go Bool
inCharClass String
cs

match :: TDFA.RegexContext Regex source target => RE -> source -> target
match :: forall source target.
RegexContext Regex source target =>
RE -> source -> target
match (RE Text
_ Regex
re) source
t = Regex -> source -> target
forall regex source target.
RegexContext regex source target =>
regex -> source -> target
TDFA.match Regex
re source
t

matchAll :: TDFA.RegexLike Regex source => RE -> source -> [TDFA.MatchArray]
matchAll :: forall source.
RegexLike Regex source =>
RE -> source -> [MatchArray]
matchAll (RE Text
_ Regex
re) source
t = Regex -> source -> [MatchArray]
forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
TDFA.matchAll Regex
re source
t

makeLiteralRE :: MonadFail m => Text -> m RE
makeLiteralRE :: forall (m :: * -> *). MonadFail m => Text -> m RE
makeLiteralRE Text
t
  | Text -> Bool
T.null Text
t = Text -> m RE
forall (m :: * -> *). MonadFail m => Text -> m RE
makeRE Text
".{0,0}" -- experimentally behaves as typst does
  | Bool
otherwise = Text -> m RE
forall (m :: * -> *). MonadFail m => Text -> m RE
makeRE (Text -> m RE) -> Text -> m RE
forall a b. (a -> b) -> a -> b
$ (Text -> Char -> Text) -> Text -> Text -> Text
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl Text -> Char -> Text
go Text
forall a. Monoid a => a
mempty Text
t
  where
    go :: Text -> Char -> Text
go Text
acc Char
c = if Char -> Bool
isSpecial Char
c then Text
acc Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack [Char
'\\', Char
c] else Text -> Char -> Text
T.snoc Text
acc Char
c
    isSpecial :: Char -> Bool
isSpecial Char
c = Char
c Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
".*?+(){}[]|\\^$" :: [Char])

-- from regex-compat but for Text
splitRegex :: RE -> Text -> [Text]
splitRegex :: RE -> Text -> [Text]
splitRegex (RE Text
_ Regex
delim) Text
strIn =
  let matches :: [(Int, Int)]
matches = (MatchArray -> (Int, Int)) -> [MatchArray] -> [(Int, Int)]
forall a b. (a -> b) -> [a] -> [b]
map (MatchArray -> Int -> (Int, Int)
forall i e. Ix i => Array i e -> i -> e
Array.! Int
0) (Regex -> Text -> [MatchArray]
forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
TDFA.matchAll Regex
delim Text
strIn)
      go :: Int -> Text -> [(Int, Int)] -> [Text]
go Int
_i Text
str [] = Text
str Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: []
      go Int
i Text
str ((Int
off, Int
len) : [(Int, Int)]
rest) =
        let i' :: Int
i' = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len
            firstline :: Text
firstline = Int -> Text -> Text
T.take (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i) Text
str
            remainder :: Text
remainder = Int -> Text -> Text
T.drop (Int
i' Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i) Text
str
         in Int -> [Text] -> [Text]
forall a b. a -> b -> b
seq Int
i' ([Text] -> [Text]) -> [Text] -> [Text]
forall a b. (a -> b) -> a -> b
$
              if Text -> Bool
T.null Text
remainder
                then [Text
firstline, Text
""]
                else Text
firstline Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: Int -> Text -> [(Int, Int)] -> [Text]
go Int
i' Text
remainder [(Int, Int)]
rest
   in Int -> Text -> [(Int, Int)] -> [Text]
go Int
0 Text
strIn [(Int, Int)]
matches