module CharSet (
setSingleton,
Encoding(..),
Byte,
ByteSet,
byteSetSingleton,
byteRanges,
byteSetRange,
CharSet,
emptyCharSet,
charSetSingleton,
charSet,
charSetMinus,
charSetComplement,
charSetRange,
charSetUnion,
charSetQuote,
setUnions,
byteSetToArray,
byteSetElems,
byteSetElem
) where
import Data.Array
import Data.Ranged
import Data.Word
import Data.Maybe (catMaybes)
import Data.Char (chr,ord)
import UTF8
type Byte = Word8
type CharSet = RSet Char
type ByteSet = RSet Byte
type Utf8Range = Span [Byte]
data Encoding = Latin1 | UTF8
emptyCharSet :: CharSet
emptyCharSet :: CharSet
emptyCharSet = CharSet
forall a. DiscreteOrdered a => RSet a
rSetEmpty
byteSetElem :: ByteSet -> Byte -> Bool
byteSetElem :: ByteSet -> Byte -> Bool
byteSetElem = ByteSet -> Byte -> Bool
forall v. DiscreteOrdered v => RSet v -> v -> Bool
rSetHas
charSetSingleton :: Char -> CharSet
charSetSingleton :: Char -> CharSet
charSetSingleton = Char -> CharSet
forall v. DiscreteOrdered v => v -> RSet v
rSingleton
setSingleton :: DiscreteOrdered a => a -> RSet a
setSingleton :: a -> RSet a
setSingleton = a -> RSet a
forall v. DiscreteOrdered v => v -> RSet v
rSingleton
charSet :: [Char] -> CharSet
charSet :: [Char] -> CharSet
charSet = [CharSet] -> CharSet
forall a. DiscreteOrdered a => [RSet a] -> RSet a
setUnions ([CharSet] -> CharSet)
-> ([Char] -> [CharSet]) -> [Char] -> CharSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> CharSet) -> [Char] -> [CharSet]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Char -> CharSet
charSetSingleton
charSetMinus :: CharSet -> CharSet -> CharSet
charSetMinus :: CharSet -> CharSet -> CharSet
charSetMinus = CharSet -> CharSet -> CharSet
forall v. DiscreteOrdered v => RSet v -> RSet v -> RSet v
rSetDifference
charSetUnion :: CharSet -> CharSet -> CharSet
charSetUnion :: CharSet -> CharSet -> CharSet
charSetUnion = CharSet -> CharSet -> CharSet
forall v. DiscreteOrdered v => RSet v -> RSet v -> RSet v
rSetUnion
setUnions :: DiscreteOrdered a => [RSet a] -> RSet a
setUnions :: [RSet a] -> RSet a
setUnions = (RSet a -> RSet a -> RSet a) -> RSet a -> [RSet a] -> RSet a
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr RSet a -> RSet a -> RSet a
forall v. DiscreteOrdered v => RSet v -> RSet v -> RSet v
rSetUnion RSet a
forall a. DiscreteOrdered a => RSet a
rSetEmpty
charSetComplement :: CharSet -> CharSet
charSetComplement :: CharSet -> CharSet
charSetComplement = CharSet -> CharSet
forall a. DiscreteOrdered a => RSet a -> RSet a
rSetNegation
charSetRange :: Char -> Char -> CharSet
charSetRange :: Char -> Char -> CharSet
charSetRange Char
c1 Char
c2 = [Range Char] -> CharSet
forall v. DiscreteOrdered v => [Range v] -> RSet v
makeRangedSet [Boundary Char -> Boundary Char -> Range Char
forall v. Boundary v -> Boundary v -> Range v
Range (Char -> Boundary Char
forall a. a -> Boundary a
BoundaryBelow Char
c1) (Char -> Boundary Char
forall a. a -> Boundary a
BoundaryAbove Char
c2)]
byteSetToArray :: ByteSet -> Array Byte Bool
byteSetToArray :: ByteSet -> Array Byte Bool
byteSetToArray ByteSet
set = (Byte, Byte) -> [(Byte, Bool)] -> Array Byte Bool
forall i e. Ix i => (i, i) -> [(i, e)] -> Array i e
array ((Byte, Bool) -> Byte
forall a b. (a, b) -> a
fst ([(Byte, Bool)] -> (Byte, Bool)
forall a. [a] -> a
head [(Byte, Bool)]
ass), (Byte, Bool) -> Byte
forall a b. (a, b) -> a
fst ([(Byte, Bool)] -> (Byte, Bool)
forall a. [a] -> a
last [(Byte, Bool)]
ass)) [(Byte, Bool)]
ass
where ass :: [(Byte, Bool)]
ass = [(Byte
c,ByteSet -> Byte -> Bool
forall v. DiscreteOrdered v => RSet v -> v -> Bool
rSetHas ByteSet
set Byte
c) | Byte
c <- [Byte
0..Byte
0xff]]
byteSetElems :: ByteSet -> [Byte]
byteSetElems :: ByteSet -> [Byte]
byteSetElems ByteSet
set = [Byte
c | Byte
c <- [Byte
0 .. Byte
0xff], ByteSet -> Byte -> Bool
forall v. DiscreteOrdered v => RSet v -> v -> Bool
rSetHas ByteSet
set Byte
c]
charToRanges :: Encoding -> CharSet -> [Utf8Range]
charToRanges :: Encoding -> CharSet -> [Utf8Range]
charToRanges Encoding
Latin1 =
(Span Char -> Utf8Range) -> [Span Char] -> [Utf8Range]
forall a b. (a -> b) -> [a] -> [b]
map ((Char -> [Byte]) -> Span Char -> Utf8Range
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Byte -> [Byte] -> [Byte]
forall a. a -> [a] -> [a]
: [])(Byte -> [Byte]) -> (Char -> Byte) -> Char -> [Byte]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.Int -> Byte
forall a b. (Integral a, Num b) => a -> b
fromIntegral(Int -> Byte) -> (Char -> Int) -> Char -> Byte
forall b c a. (b -> c) -> (a -> b) -> a -> c
.Char -> Int
ord))
([Span Char] -> [Utf8Range])
-> (CharSet -> [Span Char]) -> CharSet -> [Utf8Range]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Maybe (Span Char)] -> [Span Char]
forall a. [Maybe a] -> [a]
catMaybes
([Maybe (Span Char)] -> [Span Char])
-> (CharSet -> [Maybe (Span Char)]) -> CharSet -> [Span Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Range Char -> Maybe (Span Char))
-> [Range Char] -> [Maybe (Span Char)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Bool -> Range Char -> Maybe (Span Char)
charRangeToCharSpan Bool
False)
([Range Char] -> [Maybe (Span Char)])
-> (CharSet -> [Range Char]) -> CharSet -> [Maybe (Span Char)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CharSet -> [Range Char]
forall v. RSet v -> [Range v]
rSetRanges
charToRanges Encoding
UTF8 =
[[Utf8Range]] -> [Utf8Range]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
([[Utf8Range]] -> [Utf8Range])
-> (CharSet -> [[Utf8Range]]) -> CharSet -> [Utf8Range]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Utf8Range -> [Utf8Range]) -> [Utf8Range] -> [[Utf8Range]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Utf8Range -> [Utf8Range]
toUtfRange
([Utf8Range] -> [[Utf8Range]])
-> (CharSet -> [Utf8Range]) -> CharSet -> [[Utf8Range]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Span Char -> Utf8Range) -> [Span Char] -> [Utf8Range]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Char -> [Byte]) -> Span Char -> Utf8Range
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Char -> [Byte]
UTF8.encode)
([Span Char] -> [Utf8Range])
-> (CharSet -> [Span Char]) -> CharSet -> [Utf8Range]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Maybe (Span Char)] -> [Span Char]
forall a. [Maybe a] -> [a]
catMaybes
([Maybe (Span Char)] -> [Span Char])
-> (CharSet -> [Maybe (Span Char)]) -> CharSet -> [Span Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Range Char -> Maybe (Span Char))
-> [Range Char] -> [Maybe (Span Char)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Bool -> Range Char -> Maybe (Span Char)
charRangeToCharSpan Bool
True)
([Range Char] -> [Maybe (Span Char)])
-> (CharSet -> [Range Char]) -> CharSet -> [Maybe (Span Char)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CharSet -> [Range Char]
forall v. RSet v -> [Range v]
rSetRanges
toUtfRange :: Span [Byte] -> [Span [Byte]]
toUtfRange :: Utf8Range -> [Utf8Range]
toUtfRange (Span [Byte]
x [Byte]
y) = [Byte] -> [Byte] -> [Utf8Range]
fix [Byte]
x [Byte]
y
fix :: [Byte] -> [Byte] -> [Span [Byte]]
fix :: [Byte] -> [Byte] -> [Utf8Range]
fix [Byte]
x [Byte]
y
| [Byte] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Byte]
x Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== [Byte] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Byte]
y = [[Byte] -> [Byte] -> Utf8Range
forall a. a -> a -> Span a
Span [Byte]
x [Byte]
y]
| [Byte] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Byte]
x Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = [Byte] -> [Byte] -> Utf8Range
forall a. a -> a -> Span a
Span [Byte]
x [Byte
0x7F] Utf8Range -> [Utf8Range] -> [Utf8Range]
forall a. a -> [a] -> [a]
: [Byte] -> [Byte] -> [Utf8Range]
fix [Byte
0xC2,Byte
0x80] [Byte]
y
| [Byte] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Byte]
x Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
2 = [Byte] -> [Byte] -> Utf8Range
forall a. a -> a -> Span a
Span [Byte]
x [Byte
0xDF,Byte
0xBF] Utf8Range -> [Utf8Range] -> [Utf8Range]
forall a. a -> [a] -> [a]
: [Byte] -> [Byte] -> [Utf8Range]
fix [Byte
0xE0,Byte
0x80,Byte
0x80] [Byte]
y
| [Byte] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Byte]
x Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
3 = [Byte] -> [Byte] -> Utf8Range
forall a. a -> a -> Span a
Span [Byte]
x [Byte
0xEF,Byte
0xBF,Byte
0xBF] Utf8Range -> [Utf8Range] -> [Utf8Range]
forall a. a -> [a] -> [a]
: [Byte] -> [Byte] -> [Utf8Range]
fix [Byte
0xF0,Byte
0x80,Byte
0x80,Byte
0x80] [Byte]
y
| Bool
otherwise = [Char] -> [Utf8Range]
forall a. HasCallStack => [Char] -> a
error [Char]
"fix: incorrect input given"
byteRangeToBytePair :: Span [Byte] -> ([Byte],[Byte])
byteRangeToBytePair :: Utf8Range -> ([Byte], [Byte])
byteRangeToBytePair (Span [Byte]
x [Byte]
y) = ([Byte]
x,[Byte]
y)
data Span a = Span a a
instance Functor Span where
fmap :: (a -> b) -> Span a -> Span b
fmap a -> b
f (Span a
x a
y) = b -> b -> Span b
forall a. a -> a -> Span a
Span (a -> b
f a
x) (a -> b
f a
y)
charRangeToCharSpan :: Bool -> Range Char -> Maybe (Span Char)
charRangeToCharSpan :: Bool -> Range Char -> Maybe (Span Char)
charRangeToCharSpan Bool
_ (Range Boundary Char
BoundaryAboveAll Boundary Char
_) = Maybe (Span Char)
forall a. Maybe a
Nothing
charRangeToCharSpan Bool
_ (Range Boundary Char
_ Boundary Char
BoundaryBelowAll) = Maybe (Span Char)
forall a. Maybe a
Nothing
charRangeToCharSpan Bool
uni (Range Boundary Char
x Boundary Char
y) = Span Char -> Maybe (Span Char)
forall a. a -> Maybe a
Just (Char -> Char -> Span Char
forall a. a -> a -> Span a
Span (Boundary Char -> Char
l Boundary Char
x) (Boundary Char -> Char
h Boundary Char
y))
where l :: Boundary Char -> Char
l Boundary Char
b = case Boundary Char
b of
Boundary Char
BoundaryBelowAll -> Char
'\0'
BoundaryBelow Char
a -> Char
a
BoundaryAbove Char
a -> Char -> Char
forall a. Enum a => a -> a
succ Char
a
Boundary Char
BoundaryAboveAll -> [Char] -> Char
forall a. HasCallStack => [Char] -> a
error [Char]
"panic: charRangeToCharSpan"
h :: Boundary Char -> Char
h Boundary Char
b = case Boundary Char
b of
Boundary Char
BoundaryBelowAll -> [Char] -> Char
forall a. HasCallStack => [Char] -> a
error [Char]
"panic: charRangeToCharSpan"
BoundaryBelow Char
a -> Char -> Char
forall a. Enum a => a -> a
pred Char
a
BoundaryAbove Char
a -> Char
a
Boundary Char
BoundaryAboveAll | Bool
uni -> Int -> Char
chr Int
0x10ffff
| Bool
otherwise -> Int -> Char
chr Int
0xff
byteRanges :: Encoding -> CharSet -> [([Byte],[Byte])]
byteRanges :: Encoding -> CharSet -> [([Byte], [Byte])]
byteRanges Encoding
enc = (Utf8Range -> ([Byte], [Byte]))
-> [Utf8Range] -> [([Byte], [Byte])]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Utf8Range -> ([Byte], [Byte])
byteRangeToBytePair ([Utf8Range] -> [([Byte], [Byte])])
-> (CharSet -> [Utf8Range]) -> CharSet -> [([Byte], [Byte])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Encoding -> CharSet -> [Utf8Range]
charToRanges Encoding
enc
byteSetRange :: Byte -> Byte -> ByteSet
byteSetRange :: Byte -> Byte -> ByteSet
byteSetRange Byte
c1 Byte
c2 = [Range Byte] -> ByteSet
forall v. DiscreteOrdered v => [Range v] -> RSet v
makeRangedSet [Boundary Byte -> Boundary Byte -> Range Byte
forall v. Boundary v -> Boundary v -> Range v
Range (Byte -> Boundary Byte
forall a. a -> Boundary a
BoundaryBelow Byte
c1) (Byte -> Boundary Byte
forall a. a -> Boundary a
BoundaryAbove Byte
c2)]
byteSetSingleton :: Byte -> ByteSet
byteSetSingleton :: Byte -> ByteSet
byteSetSingleton = Byte -> ByteSet
forall v. DiscreteOrdered v => v -> RSet v
rSingleton
instance DiscreteOrdered Word8 where
adjacent :: Byte -> Byte -> Bool
adjacent Byte
x Byte
y = Byte
x Byte -> Byte -> Byte
forall a. Num a => a -> a -> a
+ Byte
1 Byte -> Byte -> Bool
forall a. Eq a => a -> a -> Bool
== Byte
y
adjacentBelow :: Byte -> Maybe Byte
adjacentBelow Byte
0 = Maybe Byte
forall a. Maybe a
Nothing
adjacentBelow Byte
x = Byte -> Maybe Byte
forall a. a -> Maybe a
Just (Byte
xByte -> Byte -> Byte
forall a. Num a => a -> a -> a
-Byte
1)
charSetQuote :: CharSet -> String
charSetQuote :: CharSet -> [Char]
charSetQuote CharSet
s = [Char]
"(\\c -> " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ([Char] -> [Char] -> [Char]) -> [Char] -> [[Char]] -> [Char]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\[Char]
x [Char]
y -> [Char]
x [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" || " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
y) [Char]
"False" ((Range Char -> [Char]) -> [Range Char] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map Range Char -> [Char]
forall a. Show a => Range a -> [Char]
quoteRange (CharSet -> [Range Char]
forall v. RSet v -> [Range v]
rSetRanges CharSet
s)) [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
")"
where quoteRange :: Range a -> [Char]
quoteRange (Range Boundary a
l Boundary a
h) = Boundary a -> [Char]
forall a. Show a => Boundary a -> [Char]
quoteL Boundary a
l [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" && " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Boundary a -> [Char]
forall a. Show a => Boundary a -> [Char]
quoteH Boundary a
h
quoteL :: Boundary a -> [Char]
quoteL (BoundaryAbove a
a) = [Char]
"c > " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ a -> [Char]
forall a. Show a => a -> [Char]
show a
a
quoteL (BoundaryBelow a
a) = [Char]
"c >= " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ a -> [Char]
forall a. Show a => a -> [Char]
show a
a
quoteL (Boundary a
BoundaryAboveAll) = [Char]
"False"
quoteL (Boundary a
BoundaryBelowAll) = [Char]
"True"
quoteH :: Boundary a -> [Char]
quoteH (BoundaryAbove a
a) = [Char]
"c <= " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ a -> [Char]
forall a. Show a => a -> [Char]
show a
a
quoteH (BoundaryBelow a
a) = [Char]
"c < " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ a -> [Char]
forall a. Show a => a -> [Char]
show a
a
quoteH (Boundary a
BoundaryAboveAll) = [Char]
"True"
quoteH (Boundary a
BoundaryBelowAll) = [Char]
"False"