module Language.Lexer.Tlex.Plugin.Encoding.UTF8 (
    charSetPUtf8,
) where

import           Language.Lexer.Tlex.Prelude

import qualified Data.CharSet                                   as CharSet
import qualified Data.EnumMap.Strict                            as EnumMap
import qualified Data.EnumSet                                   as EnumSet
import qualified Data.IntSet                                    as IntSet
import qualified Language.Lexer.Tlex.Data.NonEmptyEnumStringSet as NonEmptyEnumStringSet
import qualified Language.Lexer.Tlex.Plugin.Encoding.CharSetP   as CharSetP
import qualified Language.Lexer.Tlex.Syntax                     as Tlex


charSetPUtf8 :: CharSetP.CharSetEncoder m => CharSetP.CharSetP m
charSetPUtf8 :: forall (m :: * -> *). CharSetEncoder m => CharSetP m
charSetPUtf8 = CharSetP.CharSetP
        { $sel:charSetEncodingP:CharSetP :: CharSet -> m Pattern
CharSetP.charSetEncodingP = \case
            CharSet.CharSet Bool
True  ByteSet
_ IntSet
is -> forall {m :: * -> *}. CharSetEncoder m => IntSet -> m Pattern
goStraight IntSet
is
            CharSet.CharSet Bool
False ByteSet
_ IntSet
is -> forall {m :: * -> *}. CharSetEncoder m => IntSet -> m Pattern
goComplement IntSet
is
        }
    where
        goStraight :: IntSet -> m Pattern
goStraight IntSet
is = do
            NonEmptyEnumStringSet Word8
bsSet <- forall (m :: * -> *).
CharSetEncoder m =>
IntSet -> m (NonEmptyEnumStringSet Word8)
charSetToByteStringSetUtf8 IntSet
is
            forall (f :: * -> *) a. Applicative f => a -> f a
pure do forall {k}. Enum k => NonEmptyEnumStringSet k -> Pattern k
straightP NonEmptyEnumStringSet Word8
bsSet

        straightP :: NonEmptyEnumStringSet k -> Pattern k
straightP NonEmptyEnumStringSet k
s =
            let singleByteP :: Pattern k
singleByteP = forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP do
                    forall a. NonEmptyEnumStringSet a -> EnumSet a
NonEmptyEnumStringSet.singleEnums NonEmptyEnumStringSet k
s
            in forall unit. Enum unit => [Pattern unit] -> Pattern unit
Tlex.orP do
                Pattern k
singleBytePforall a. a -> [a] -> [a]
:
                    [ forall e. Enum e => [e] -> Pattern e
Tlex.enumsP [k
c] forall a. Semigroup a => a -> a -> a
<> NonEmptyEnumStringSet k -> Pattern k
straightP NonEmptyEnumStringSet k
s'
                    | (k
c, NonEmptyEnumStringSet k
s') <- forall k a. Enum k => EnumMap k a -> [(k, a)]
EnumMap.assocs do
                        forall a.
NonEmptyEnumStringSet a -> EnumMap a (NonEmptyEnumStringSet a)
NonEmptyEnumStringSet.enumStrings NonEmptyEnumStringSet k
s
                    ]

        goComplement :: IntSet -> m Pattern
goComplement IntSet
is = do
            NonEmptyEnumStringSet Word8
bsSet <- forall (m :: * -> *).
CharSetEncoder m =>
IntSet -> m (NonEmptyEnumStringSet Word8)
charSetToByteStringSetUtf8 IntSet
is
            forall (f :: * -> *) a. Applicative f => a -> f a
pure do NonEmptyEnumStringSet Word8 -> Pattern
complementPFromEnumStrings NonEmptyEnumStringSet Word8
bsSet

charSetToByteStringSetUtf8 :: CharSetP.CharSetEncoder m
    => IntSet.IntSet -> m (NonEmptyEnumStringSet.NonEmptyEnumStringSet Word8)
charSetToByteStringSetUtf8 :: forall (m :: * -> *).
CharSetEncoder m =>
IntSet -> m (NonEmptyEnumStringSet Word8)
charSetToByteStringSetUtf8 IntSet
is = forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM
        do \NonEmptyEnumStringSet Word8
s Int
c -> forall {f :: * -> *}.
CharSetEncoder f =>
NonEmptyEnumStringSet Word8
-> Int -> f (NonEmptyEnumStringSet Word8)
foldStep NonEmptyEnumStringSet Word8
s Int
c
        do forall a. Enum a => NonEmptyEnumStringSet a
NonEmptyEnumStringSet.empty
        do IntSet -> [Int]
IntSet.toAscList IntSet
is
    where
        foldStep :: NonEmptyEnumStringSet Word8
-> Int -> f (NonEmptyEnumStringSet Word8)
foldStep NonEmptyEnumStringSet Word8
s Int
c = if
            | Int
c forall a. Ord a => a -> a -> Bool
<= Int
0x7F -> forall (f :: * -> *) a. Applicative f => a -> f a
pure do
                forall a.
Enum a =>
a -> NonEmptyEnumStringSet a -> NonEmptyEnumStringSet a
NonEmptyEnumStringSet.insertSingleByte
                    do forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
c
                    do NonEmptyEnumStringSet Word8
s
            | Int
c forall a. Ord a => a -> a -> Bool
<= Int
0x7FF ->
                let (Word8
c', [Word8]
l) = Int -> Int -> (Word8, [Word8])
stringTails Int
c Int
1
                in forall (f :: * -> *) a. Applicative f => a -> f a
pure do
                    forall a.
Enum a =>
NonEmpty a -> NonEmptyEnumStringSet a -> NonEmptyEnumStringSet a
NonEmptyEnumStringSet.insert
                        do (Word8
0xC0 forall a. Num a => a -> a -> a
+ Word8
c') forall a. a -> [a] -> NonEmpty a
:| [Word8]
l
                        do NonEmptyEnumStringSet Word8
s
            | Int
0xD800 forall a. Ord a => a -> a -> Bool
<= Int
c Bool -> Bool -> Bool
&& Int
c forall a. Ord a => a -> a -> Bool
<= Int
0xDFFF -> do
                forall (m :: * -> *). CharSetEncoder m => EncodeWarning -> m ()
CharSetP.reportEncodeWarning
                    do Char -> EncodeWarning
CharSetP.NotSupportedChar do forall a. Enum a => Int -> a
toEnum Int
c
                forall (f :: * -> *) a. Applicative f => a -> f a
pure NonEmptyEnumStringSet Word8
s
            | Int
c forall a. Ord a => a -> a -> Bool
<= Int
0xFFFF ->
                let (Word8
c', [Word8]
l) = Int -> Int -> (Word8, [Word8])
stringTails Int
c Int
2
                in forall (f :: * -> *) a. Applicative f => a -> f a
pure do
                    forall a.
Enum a =>
NonEmpty a -> NonEmptyEnumStringSet a -> NonEmptyEnumStringSet a
NonEmptyEnumStringSet.insert
                        do (Word8
0xE0 forall a. Num a => a -> a -> a
+ Word8
c') forall a. a -> [a] -> NonEmpty a
:| [Word8]
l
                        do NonEmptyEnumStringSet Word8
s
            | Bool
otherwise ->
                let (Word8
c', [Word8]
l) = Int -> Int -> (Word8, [Word8])
stringTails Int
c Int
3
                in forall (f :: * -> *) a. Applicative f => a -> f a
pure do
                    forall a.
Enum a =>
NonEmpty a -> NonEmptyEnumStringSet a -> NonEmptyEnumStringSet a
NonEmptyEnumStringSet.insert
                        do (Word8
0xF0 forall a. Num a => a -> a -> a
+ Word8
c') forall a. a -> [a] -> NonEmpty a
:| [Word8]
l
                        do NonEmptyEnumStringSet Word8
s

        stringTails :: Int -> Int -> (Word8, [Word8])
        stringTails :: Int -> Int -> (Word8, [Word8])
stringTails Int
c Int
n = forall {t} {t} {a} {a}.
(Integral t, Num t, Num a, Num a, Eq t) =>
t -> [a] -> t -> (a, [a])
stringTails' Int
c [] Int
n
        stringTails' :: t -> [a] -> t -> (a, [a])
stringTails' t
c [a]
l = \case
            t
0 -> (forall a b. (Integral a, Num b) => a -> b
fromIntegral t
c, [a]
l)
            t
n ->
                let (t
c', t
x) = forall a. Integral a => a -> a -> (a, a)
quotRem t
c t
0x40
                    x' :: a
x' = forall a b. (Integral a, Num b) => a -> b
fromIntegral do t
0x80 forall a. Num a => a -> a -> a
+ t
x
                in t -> [a] -> t -> (a, [a])
stringTails' t
c'
                    do a
x' forall a. a -> [a] -> [a]
: [a]
l
                    do t
n forall a. Num a => a -> a -> a
- t
1

complementPFromEnumStrings
    :: NonEmptyEnumStringSet.NonEmptyEnumStringSet Word8 -> Tlex.Pattern Word8
complementPFromEnumStrings :: NonEmptyEnumStringSet Word8 -> Pattern
complementPFromEnumStrings NonEmptyEnumStringSet Word8
ess0 = forall unit. Enum unit => [Pattern unit] -> Pattern unit
Tlex.orP
        [ [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr1es] []
        , [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr2es, EnumSet Word8
seqes] [Pattern
seqesP]
        , [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr3p1o1es, EnumSet Word8
pr3p1o2es, EnumSet Word8
seqes]
            [ forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p1o1es
            , forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p1o2es
            , Pattern
seqesP
            ]
        , [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr3p2es, EnumSet Word8
seqes, EnumSet Word8
seqes]
            [ forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p2es
            , Pattern
seqesP
            , Pattern
seqesP
            ]
        , [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr3p3o1es, EnumSet Word8
pr3p3o2es, EnumSet Word8
seqes]
            [ forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p3o1es
            , forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p3o2es
            , Pattern
seqesP
            ]
        , [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr3p4es, EnumSet Word8
seqes, EnumSet Word8
seqes]
            [ forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p4es
            , Pattern
seqesP
            , Pattern
seqesP
            ]
        , [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr4p1o1es, EnumSet Word8
pr4p1o2es, EnumSet Word8
seqes, EnumSet Word8
seqes]
            [ forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr4p1o1es
            , forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr4p1o2es
            , Pattern
seqesP
            , Pattern
seqesP
            ]
        , [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr4p2es, EnumSet Word8
seqes, EnumSet Word8
seqes, EnumSet Word8
seqes]
            [ forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr4p1o1es
            , Pattern
seqesP
            , Pattern
seqesP
            , Pattern
seqesP
            ]
        , [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr4p3o1es, EnumSet Word8
pr4p3o2es, EnumSet Word8
seqes, EnumSet Word8
seqes]
            [ forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr4p3o1es
            , forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr4p3o2es
            , Pattern
seqesP
            , Pattern
seqesP
            ]
        ]
    where
        seqes :: EnumSet Word8
seqes = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0x80..Word8
0xBF]
        seqesP :: Pattern
seqesP = forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
seqes

        pr1es :: EnumSet Word8
pr1es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0x00..Word8
0x7F]
        pr2es :: EnumSet Word8
pr2es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xC2..Word8
0xDF]
        pr3p1o1es :: EnumSet Word8
pr3p1o1es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xE0]
        pr3p1o2es :: EnumSet Word8
pr3p1o2es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xA0..Word8
0xBF]
        pr3p2es :: EnumSet Word8
pr3p2es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xE1..Word8
0xEC]
        pr3p3o1es :: EnumSet Word8
pr3p3o1es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xED]
        pr3p3o2es :: EnumSet Word8
pr3p3o2es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0x80..Word8
0x9F]
        pr3p4es :: EnumSet Word8
pr3p4es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xEE..Word8
0xEF]
        pr4p1o1es :: EnumSet Word8
pr4p1o1es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xF0]
        pr4p1o2es :: EnumSet Word8
pr4p1o2es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0x90..Word8
0xBF]
        pr4p2es :: EnumSet Word8
pr4p2es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xF1..Word8
0xF3]
        pr4p3o1es :: EnumSet Word8
pr4p3o1es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xF4]
        pr4p3o2es :: EnumSet Word8
pr4p3o2es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0x80..Word8
0x8F]

        go :: [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8]
bess [Pattern]
restPs = forall {k}.
Enum k =>
[EnumSet k] -> [Pattern k] -> NonEmptyEnumStringSet k -> Pattern k
go' [EnumSet Word8]
bess [Pattern]
restPs NonEmptyEnumStringSet Word8
ess0

        go' :: [EnumSet k] -> [Pattern k] -> NonEmptyEnumStringSet k -> Pattern k
go' [EnumSet k]
bess [Pattern k]
restPs NonEmptyEnumStringSet k
ess = case [EnumSet k]
bess of
            []    -> forall a. Monoid a => a
mempty
            [EnumSet k
bes] -> forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP
                do EnumSet k
bes forall k. EnumSet k -> EnumSet k -> EnumSet k
`EnumSet.difference` forall a. NonEmptyEnumStringSet a -> EnumSet a
NonEmptyEnumStringSet.singleEnums NonEmptyEnumStringSet k
ess
            EnumSet k
bes:[EnumSet k]
bess2 ->
                let mess :: EnumMap k (NonEmptyEnumStringSet k)
mess = forall a.
NonEmptyEnumStringSet a -> EnumMap a (NonEmptyEnumStringSet a)
NonEmptyEnumStringSet.enumStrings NonEmptyEnumStringSet k
ess
                    (EnumSet k
nes, EnumSet k
ces) = forall k.
Enum k =>
(k -> Bool) -> EnumSet k -> (EnumSet k, EnumSet k)
EnumSet.partition
                        do \k
be -> forall k a. Enum k => k -> EnumMap k a -> Bool
EnumMap.member k
be EnumMap k (NonEmptyEnumStringSet k)
mess
                        EnumSet k
bes
                    cesP :: Pattern k
cesP = forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet k
ces forall a. Semigroup a => a -> a -> a
<> forall a. Monoid a => [a] -> a
mconcat [Pattern k]
restPs
                in forall unit. Enum unit => [Pattern unit] -> Pattern unit
Tlex.orP do
                    Pattern k
cesPforall a. a -> [a] -> [a]
:
                        [ [EnumSet k] -> [Pattern k] -> NonEmptyEnumStringSet k -> Pattern k
go' [EnumSet k]
bess2 [Pattern k]
nrestPs NonEmptyEnumStringSet k
ness
                        | k
ne <- forall k. Enum k => EnumSet k -> [k]
EnumSet.toList EnumSet k
nes
                        , let ness :: NonEmptyEnumStringSet k
ness = case forall k a. Enum k => k -> EnumMap k a -> Maybe a
EnumMap.lookup k
ne EnumMap k (NonEmptyEnumStringSet k)
mess of
                                Just NonEmptyEnumStringSet k
x  -> NonEmptyEnumStringSet k
x
                                Maybe (NonEmptyEnumStringSet k)
Nothing -> forall a. HasCallStack => [Char] -> a
error [Char]
"unreachable"
                        , let nrestPs :: [Pattern k]
nrestPs = case [Pattern k]
restPs of
                                []   -> []
                                Pattern k
_:[Pattern k]
xs -> [Pattern k]
xs
                        ]