module Language.Lexer.Tlex.Plugin.Encoding.UTF8 (
    charSetPUtf8,
) where

import           Language.Lexer.Tlex.Prelude

import qualified Data.CharSet                                   as CharSet
import qualified Data.IntSet                                    as IntSet
import qualified Language.Lexer.Tlex.Data.EnumMap               as EnumMap
import qualified Language.Lexer.Tlex.Data.EnumSet               as EnumSet
import qualified Language.Lexer.Tlex.Data.NonEmptyEnumStringSet as NonEmptyEnumStringSet
import qualified Language.Lexer.Tlex.Plugin.Encoding.CharSetP   as CharSetP
import qualified Language.Lexer.Tlex.Syntax                     as Tlex


charSetPUtf8 :: CharSetP.CharSetEncoder m => CharSetP.CharSetP m
charSetPUtf8 :: CharSetP m
charSetPUtf8 = CharSetP :: forall (m :: * -> *). (CharSet -> m Pattern) -> CharSetP m
CharSetP.CharSetP
        { $sel:charSetEncodingP:CharSetP :: CharSet -> m Pattern
CharSetP.charSetEncodingP = \case
            CharSet.CharSet Bool
True  ByteSet
_ IntSet
is -> IntSet -> m Pattern
forall (m :: * -> *). CharSetEncoder m => IntSet -> m Pattern
goStraight IntSet
is
            CharSet.CharSet Bool
False ByteSet
_ IntSet
is -> IntSet -> m Pattern
forall (m :: * -> *). CharSetEncoder m => IntSet -> m Pattern
goComplement IntSet
is
        }
    where
        goStraight :: IntSet -> m Pattern
goStraight IntSet
is = do
            NonEmptyEnumStringSet Word8
bsSet <- IntSet -> m (NonEmptyEnumStringSet Word8)
forall (m :: * -> *).
CharSetEncoder m =>
IntSet -> m (NonEmptyEnumStringSet Word8)
charSetToByteStringSetUtf8 IntSet
is
            Pattern -> m Pattern
forall (f :: * -> *) a. Applicative f => a -> f a
pure do NonEmptyEnumStringSet Word8 -> Pattern
forall e. Enum e => NonEmptyEnumStringSet e -> Pattern e
straightP NonEmptyEnumStringSet Word8
bsSet

        straightP :: NonEmptyEnumStringSet e -> Pattern e
straightP NonEmptyEnumStringSet e
s =
            let singleByteP :: Pattern e
singleByteP = EnumSet e -> Pattern e
forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP do
                    NonEmptyEnumStringSet e -> EnumSet e
forall k (a :: k). NonEmptyEnumStringSet a -> EnumSet a
NonEmptyEnumStringSet.singleEnums NonEmptyEnumStringSet e
s
            in [Pattern e] -> Pattern e
forall e. Enum e => [Pattern e] -> Pattern e
Tlex.orP do
                Pattern e
singleBytePPattern e -> [Pattern e] -> [Pattern e]
forall a. a -> [a] -> [a]
:
                    [ [e] -> Pattern e
forall e. Enum e => [e] -> Pattern e
Tlex.enumsP [e
c] Pattern e -> Pattern e -> Pattern e
forall a. Semigroup a => a -> a -> a
<> NonEmptyEnumStringSet e -> Pattern e
straightP NonEmptyEnumStringSet e
s'
                    | (e
c, NonEmptyEnumStringSet e
s') <- EnumMap e (NonEmptyEnumStringSet e)
-> [(e, NonEmptyEnumStringSet e)]
forall k a. Enum k => EnumMap k a -> [(k, a)]
EnumMap.assocs do
                        NonEmptyEnumStringSet e -> EnumMap e (NonEmptyEnumStringSet e)
forall k (a :: k).
NonEmptyEnumStringSet a -> EnumMap a (NonEmptyEnumStringSet a)
NonEmptyEnumStringSet.enumStrings NonEmptyEnumStringSet e
s
                    ]

        goComplement :: IntSet -> m Pattern
goComplement IntSet
is = do
            NonEmptyEnumStringSet Word8
bsSet <- IntSet -> m (NonEmptyEnumStringSet Word8)
forall (m :: * -> *).
CharSetEncoder m =>
IntSet -> m (NonEmptyEnumStringSet Word8)
charSetToByteStringSetUtf8 IntSet
is
            Pattern -> m Pattern
forall (f :: * -> *) a. Applicative f => a -> f a
pure do NonEmptyEnumStringSet Word8 -> Pattern
complementPFromEnumStrings NonEmptyEnumStringSet Word8
bsSet

charSetToByteStringSetUtf8 :: CharSetP.CharSetEncoder m
    => IntSet.IntSet -> m (NonEmptyEnumStringSet.NonEmptyEnumStringSet Word8)
charSetToByteStringSetUtf8 :: IntSet -> m (NonEmptyEnumStringSet Word8)
charSetToByteStringSetUtf8 IntSet
is = (NonEmptyEnumStringSet Word8
 -> Int -> m (NonEmptyEnumStringSet Word8))
-> NonEmptyEnumStringSet Word8
-> [Int]
-> m (NonEmptyEnumStringSet Word8)
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM
        do \NonEmptyEnumStringSet Word8
s Int
c -> NonEmptyEnumStringSet Word8
-> Int -> m (NonEmptyEnumStringSet Word8)
forall (f :: * -> *).
CharSetEncoder f =>
NonEmptyEnumStringSet Word8
-> Int -> f (NonEmptyEnumStringSet Word8)
foldStep NonEmptyEnumStringSet Word8
s Int
c
        do NonEmptyEnumStringSet Word8
forall a. Enum a => NonEmptyEnumStringSet a
NonEmptyEnumStringSet.empty
        do IntSet -> [Int]
IntSet.toAscList IntSet
is
    where
        foldStep :: NonEmptyEnumStringSet Word8
-> Int -> f (NonEmptyEnumStringSet Word8)
foldStep NonEmptyEnumStringSet Word8
s Int
c = if
            | Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7F -> NonEmptyEnumStringSet Word8 -> f (NonEmptyEnumStringSet Word8)
forall (f :: * -> *) a. Applicative f => a -> f a
pure do
                Word8 -> NonEmptyEnumStringSet Word8 -> NonEmptyEnumStringSet Word8
forall a.
Enum a =>
a -> NonEmptyEnumStringSet a -> NonEmptyEnumStringSet a
NonEmptyEnumStringSet.insertSingleByte
                    do Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
c
                    do NonEmptyEnumStringSet Word8
s
            | Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7FF ->
                let (Word8
c', [Word8]
l) = Int -> Int -> (Word8, [Word8])
stringTails Int
c Int
1
                in NonEmptyEnumStringSet Word8 -> f (NonEmptyEnumStringSet Word8)
forall (f :: * -> *) a. Applicative f => a -> f a
pure do
                    NonEmpty Word8
-> NonEmptyEnumStringSet Word8 -> NonEmptyEnumStringSet Word8
forall a.
Enum a =>
NonEmpty a -> NonEmptyEnumStringSet a -> NonEmptyEnumStringSet a
NonEmptyEnumStringSet.insert
                        do (Word8
0xC0 Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
c') Word8 -> [Word8] -> NonEmpty Word8
forall a. a -> [a] -> NonEmpty a
:| [Word8]
l
                        do NonEmptyEnumStringSet Word8
s
            | Int
0xD800 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
c Bool -> Bool -> Bool
&& Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xDFFF -> do
                EncodeWarning -> f ()
forall (m :: * -> *). CharSetEncoder m => EncodeWarning -> m ()
CharSetP.reportEncodeWarning
                    do Char -> EncodeWarning
CharSetP.NotSupportedChar do Int -> Char
forall a. Enum a => Int -> a
toEnum Int
c
                NonEmptyEnumStringSet Word8 -> f (NonEmptyEnumStringSet Word8)
forall (f :: * -> *) a. Applicative f => a -> f a
pure NonEmptyEnumStringSet Word8
s
            | Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xFFFF ->
                let (Word8
c', [Word8]
l) = Int -> Int -> (Word8, [Word8])
stringTails Int
c Int
2
                in NonEmptyEnumStringSet Word8 -> f (NonEmptyEnumStringSet Word8)
forall (f :: * -> *) a. Applicative f => a -> f a
pure do
                    NonEmpty Word8
-> NonEmptyEnumStringSet Word8 -> NonEmptyEnumStringSet Word8
forall a.
Enum a =>
NonEmpty a -> NonEmptyEnumStringSet a -> NonEmptyEnumStringSet a
NonEmptyEnumStringSet.insert
                        do (Word8
0xE0 Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
c') Word8 -> [Word8] -> NonEmpty Word8
forall a. a -> [a] -> NonEmpty a
:| [Word8]
l
                        do NonEmptyEnumStringSet Word8
s
            | Bool
otherwise ->
                let (Word8
c', [Word8]
l) = Int -> Int -> (Word8, [Word8])
stringTails Int
c Int
3
                in NonEmptyEnumStringSet Word8 -> f (NonEmptyEnumStringSet Word8)
forall (f :: * -> *) a. Applicative f => a -> f a
pure do
                    NonEmpty Word8
-> NonEmptyEnumStringSet Word8 -> NonEmptyEnumStringSet Word8
forall a.
Enum a =>
NonEmpty a -> NonEmptyEnumStringSet a -> NonEmptyEnumStringSet a
NonEmptyEnumStringSet.insert
                        do (Word8
0xF0 Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
c') Word8 -> [Word8] -> NonEmpty Word8
forall a. a -> [a] -> NonEmpty a
:| [Word8]
l
                        do NonEmptyEnumStringSet Word8
s

        stringTails :: Int -> Int -> (Word8, [Word8])
        stringTails :: Int -> Int -> (Word8, [Word8])
stringTails Int
c Int
n = Int -> [Word8] -> Int -> (Word8, [Word8])
forall t t a a.
(Integral t, Num t, Num a, Num a, Eq t) =>
t -> [a] -> t -> (a, [a])
stringTails' Int
c [] Int
n
        stringTails' :: t -> [a] -> t -> (a, [a])
stringTails' t
c [a]
l = \case
            t
0 -> (t -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral t
c, [a]
l)
            t
n ->
                let (t
c', t
x) = t -> t -> (t, t)
forall a. Integral a => a -> a -> (a, a)
quotRem t
c t
0x40
                    x' :: a
x' = t -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral do t
0x80 t -> t -> t
forall a. Num a => a -> a -> a
+ t
x
                in t -> [a] -> t -> (a, [a])
stringTails' t
c'
                    do a
x' a -> [a] -> [a]
forall a. a -> [a] -> [a]
: [a]
l
                    do t
n t -> t -> t
forall a. Num a => a -> a -> a
- t
1

complementPFromEnumStrings
    :: NonEmptyEnumStringSet.NonEmptyEnumStringSet Word8 -> Tlex.Pattern Word8
complementPFromEnumStrings :: NonEmptyEnumStringSet Word8 -> Pattern
complementPFromEnumStrings NonEmptyEnumStringSet Word8
ess0 = [Pattern] -> Pattern
forall e. Enum e => [Pattern e] -> Pattern e
Tlex.orP
        [ [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr1es] []
        , [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr2es, EnumSet Word8
seqes] [Pattern
seqesP]
        , [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr3p1o1es, EnumSet Word8
pr3p1o2es, EnumSet Word8
seqes]
            [ EnumSet Word8 -> Pattern
forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p1o1es
            , EnumSet Word8 -> Pattern
forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p1o2es
            , Pattern
seqesP
            ]
        , [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr3p2es, EnumSet Word8
seqes, EnumSet Word8
seqes]
            [ EnumSet Word8 -> Pattern
forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p2es
            , Pattern
seqesP
            , Pattern
seqesP
            ]
        , [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr3p3o1es, EnumSet Word8
pr3p3o2es, EnumSet Word8
seqes]
            [ EnumSet Word8 -> Pattern
forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p3o1es
            , EnumSet Word8 -> Pattern
forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p3o2es
            , Pattern
seqesP
            ]
        , [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr3p4es, EnumSet Word8
seqes, EnumSet Word8
seqes]
            [ EnumSet Word8 -> Pattern
forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p4es
            , Pattern
seqesP
            , Pattern
seqesP
            ]
        , [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr4p1o1es, EnumSet Word8
pr4p1o2es, EnumSet Word8
seqes, EnumSet Word8
seqes]
            [ EnumSet Word8 -> Pattern
forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr4p1o1es
            , EnumSet Word8 -> Pattern
forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr4p1o2es
            , Pattern
seqesP
            , Pattern
seqesP
            ]
        , [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr4p2es, EnumSet Word8
seqes, EnumSet Word8
seqes, EnumSet Word8
seqes]
            [ EnumSet Word8 -> Pattern
forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr4p1o1es
            , Pattern
seqesP
            , Pattern
seqesP
            , Pattern
seqesP
            ]
        , [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr4p3o1es, EnumSet Word8
pr4p3o2es, EnumSet Word8
seqes, EnumSet Word8
seqes]
            [ EnumSet Word8 -> Pattern
forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr4p3o1es
            , EnumSet Word8 -> Pattern
forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr4p3o2es
            , Pattern
seqesP
            , Pattern
seqesP
            ]
        ]
    where
        seqes :: EnumSet Word8
seqes = [Word8] -> EnumSet Word8
forall a. Enum a => [a] -> EnumSet a
EnumSet.fromList @Word8 [Word8
0x80..Word8
0xBF]
        seqesP :: Pattern
seqesP = EnumSet Word8 -> Pattern
forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
seqes

        pr1es :: EnumSet Word8
pr1es = [Word8] -> EnumSet Word8
forall a. Enum a => [a] -> EnumSet a
EnumSet.fromList @Word8 [Word8
0x00..Word8
0x7F]
        pr2es :: EnumSet Word8
pr2es = [Word8] -> EnumSet Word8
forall a. Enum a => [a] -> EnumSet a
EnumSet.fromList @Word8 [Word8
0xC2..Word8
0xDF]
        pr3p1o1es :: EnumSet Word8
pr3p1o1es = [Word8] -> EnumSet Word8
forall a. Enum a => [a] -> EnumSet a
EnumSet.fromList @Word8 [Word8
0xE0]
        pr3p1o2es :: EnumSet Word8
pr3p1o2es = [Word8] -> EnumSet Word8
forall a. Enum a => [a] -> EnumSet a
EnumSet.fromList @Word8 [Word8
0xA0..Word8
0xBF]
        pr3p2es :: EnumSet Word8
pr3p2es = [Word8] -> EnumSet Word8
forall a. Enum a => [a] -> EnumSet a
EnumSet.fromList @Word8 [Word8
0xE1..Word8
0xEC]
        pr3p3o1es :: EnumSet Word8
pr3p3o1es = [Word8] -> EnumSet Word8
forall a. Enum a => [a] -> EnumSet a
EnumSet.fromList @Word8 [Word8
0xED]
        pr3p3o2es :: EnumSet Word8
pr3p3o2es = [Word8] -> EnumSet Word8
forall a. Enum a => [a] -> EnumSet a
EnumSet.fromList @Word8 [Word8
0x80..Word8
0x9F]
        pr3p4es :: EnumSet Word8
pr3p4es = [Word8] -> EnumSet Word8
forall a. Enum a => [a] -> EnumSet a
EnumSet.fromList @Word8 [Word8
0xEE..Word8
0xEF]
        pr4p1o1es :: EnumSet Word8
pr4p1o1es = [Word8] -> EnumSet Word8
forall a. Enum a => [a] -> EnumSet a
EnumSet.fromList @Word8 [Word8
0xF0]
        pr4p1o2es :: EnumSet Word8
pr4p1o2es = [Word8] -> EnumSet Word8
forall a. Enum a => [a] -> EnumSet a
EnumSet.fromList @Word8 [Word8
0x90..Word8
0xBF]
        pr4p2es :: EnumSet Word8
pr4p2es = [Word8] -> EnumSet Word8
forall a. Enum a => [a] -> EnumSet a
EnumSet.fromList @Word8 [Word8
0xF1..Word8
0xF3]
        pr4p3o1es :: EnumSet Word8
pr4p3o1es = [Word8] -> EnumSet Word8
forall a. Enum a => [a] -> EnumSet a
EnumSet.fromList @Word8 [Word8
0xF4]
        pr4p3o2es :: EnumSet Word8
pr4p3o2es = [Word8] -> EnumSet Word8
forall a. Enum a => [a] -> EnumSet a
EnumSet.fromList @Word8 [Word8
0x80..Word8
0x8F]

        go :: [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8]
bess [Pattern]
restPs = [EnumSet Word8]
-> [Pattern] -> NonEmptyEnumStringSet Word8 -> Pattern
forall e.
Enum e =>
[EnumSet e] -> [Pattern e] -> NonEmptyEnumStringSet e -> Pattern e
go' [EnumSet Word8]
bess [Pattern]
restPs NonEmptyEnumStringSet Word8
ess0

        go' :: [EnumSet e] -> [Pattern e] -> NonEmptyEnumStringSet e -> Pattern e
go' [EnumSet e]
bess [Pattern e]
restPs NonEmptyEnumStringSet e
ess = case [EnumSet e]
bess of
            []    -> Pattern e
forall a. Monoid a => a
mempty
            [EnumSet e
bes] -> EnumSet e -> Pattern e
forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP
                do EnumSet e
bes EnumSet e -> EnumSet e -> EnumSet e
forall a. Enum a => EnumSet a -> EnumSet a -> EnumSet a
`EnumSet.difference` NonEmptyEnumStringSet e -> EnumSet e
forall k (a :: k). NonEmptyEnumStringSet a -> EnumSet a
NonEmptyEnumStringSet.singleEnums NonEmptyEnumStringSet e
ess
            EnumSet e
bes:[EnumSet e]
bess2 ->
                let mess :: EnumMap e (NonEmptyEnumStringSet e)
mess = NonEmptyEnumStringSet e -> EnumMap e (NonEmptyEnumStringSet e)
forall k (a :: k).
NonEmptyEnumStringSet a -> EnumMap a (NonEmptyEnumStringSet a)
NonEmptyEnumStringSet.enumStrings NonEmptyEnumStringSet e
ess
                    (EnumSet e
nes, EnumSet e
ces) = (e -> Bool) -> EnumSet e -> (EnumSet e, EnumSet e)
forall a.
Enum a =>
(a -> Bool) -> EnumSet a -> (EnumSet a, EnumSet a)
EnumSet.partition
                        do \e
be -> e -> EnumMap e (NonEmptyEnumStringSet e) -> Bool
forall k a. Enum k => k -> EnumMap k a -> Bool
EnumMap.member e
be EnumMap e (NonEmptyEnumStringSet e)
mess
                        EnumSet e
bes
                    cesP :: Pattern e
cesP = EnumSet e -> Pattern e
forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet e
ces Pattern e -> Pattern e -> Pattern e
forall a. Semigroup a => a -> a -> a
<> [Pattern e] -> Pattern e
forall a. Monoid a => [a] -> a
mconcat [Pattern e]
restPs
                in [Pattern e] -> Pattern e
forall e. Enum e => [Pattern e] -> Pattern e
Tlex.orP do
                    Pattern e
cesPPattern e -> [Pattern e] -> [Pattern e]
forall a. a -> [a] -> [a]
:
                        [ [EnumSet e] -> [Pattern e] -> NonEmptyEnumStringSet e -> Pattern e
go' [EnumSet e]
bess2 [Pattern e]
nrestPs NonEmptyEnumStringSet e
ness
                        | e
ne <- EnumSet e -> [e]
forall a. Enum a => EnumSet a -> [a]
EnumSet.toList EnumSet e
nes
                        , let ness :: NonEmptyEnumStringSet e
ness = case e
-> EnumMap e (NonEmptyEnumStringSet e)
-> Maybe (NonEmptyEnumStringSet e)
forall k a. Enum k => k -> EnumMap k a -> Maybe a
EnumMap.lookup e
ne EnumMap e (NonEmptyEnumStringSet e)
mess of
                                Just NonEmptyEnumStringSet e
x  -> NonEmptyEnumStringSet e
x
                                Maybe (NonEmptyEnumStringSet e)
Nothing -> [Char] -> NonEmptyEnumStringSet e
forall a. HasCallStack => [Char] -> a
error [Char]
"unreachable"
                        , let nrestPs :: [Pattern e]
nrestPs = case [Pattern e]
restPs of
                                []   -> []
                                Pattern e
_:[Pattern e]
xs -> [Pattern e]
xs
                        ]