{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
#if __GLASGOW_HASKELL__ >=704
{-# LANGUAGE Safe #-}
#elif __GLASGOW_HASKELL__ >=702
{-# LANGUAGE Trustworthy #-}
#endif
module RERE.CharSet (
CharSet,
empty,
universe,
singleton,
insert,
union,
intersection,
complement,
difference,
size,
null,
member,
fromList,
toList,
fromIntervalList,
toIntervalList,
) where
import Prelude hiding (null)
import Data.Char (chr, ord)
import Data.List (foldl', sortBy)
import Data.String (IsString (..))
#if MIN_VERSION_containers(0,5,0)
import qualified Data.IntMap.Strict as IM
#else
import qualified Data.IntMap as IM
#endif
newtype CharSet = CS { unCS :: IM.IntMap Int }
deriving (Eq, Ord)
instance IsString CharSet where
fromString = fromList
instance Show CharSet where
showsPrec d cs
| size cs < 20
= showsPrec d (toList cs)
| otherwise
= showParen (d > 10)
$ showString "CS.fromIntervalList "
. showsPrec 11 (toIntervalList cs)
empty :: CharSet
empty = CS IM.empty
universe :: CharSet
universe = CS $ IM.singleton 0 0x10ffff
null :: CharSet -> Bool
null (CS cs) = IM.null cs
size :: CharSet -> Int
size (CS m) = foldl' (\ !acc (lo, hi) -> acc + (hi - lo) + 1) 0 (IM.toList m)
singleton :: Char -> CharSet
singleton c = CS (IM.singleton (ord c) (ord c))
member :: Char -> CharSet -> Bool
#if MIN_VERSION_containers(0,5,0)
member c (CS m) = case IM.lookupLE i m of
Nothing -> False
Just (_, hi) -> i <= hi
where
#else
member c (CS m) = go (IM.toList m)
where
go [] = False
go ((x,y):zs) = (x <= i && i <= y) || go zs
#endif
i = ord c
insert :: Char -> CharSet -> CharSet
insert c (CS m) = normalise (IM.insert (ord c) (ord c) m)
union :: CharSet -> CharSet -> CharSet
union (CS xs) (CS ys) = normalise (IM.unionWith max xs ys)
intersection :: CharSet -> CharSet -> CharSet
intersection (CS xs) (CS ys) = CS $
IM.fromList (intersectRangeList (IM.toList xs) (IM.toList ys))
intersectRangeList :: Ord a => [(a, a)] -> [(a, a)] -> [(a, a)]
intersectRangeList aset@((x,y):as) bset@((u,v):bs)
| y < u = intersectRangeList as bset
| v < x = intersectRangeList aset bs
| y < v = (max x u, y) : intersectRangeList as bset
| otherwise = (max x u, v) : intersectRangeList aset bs
intersectRangeList _ [] = []
intersectRangeList [] _ = []
complement :: CharSet -> CharSet
complement (CS xs) = CS $ IM.fromList $ complementRangeList (IM.toList xs)
complementRangeList' :: Int -> [(Int, Int)] -> [(Int, Int)]
complementRangeList' x ((u,v):s) = (x,pred u) : complementRangeList'' v s
complementRangeList' x [] = [(x,0x10ffff)]
complementRangeList'' :: Int -> [(Int, Int)] -> [(Int, Int)]
complementRangeList'' x s
| x == 0x10ffff = []
| otherwise = complementRangeList' (succ x) s
complementRangeList :: [(Int, Int)] -> [(Int, Int)]
complementRangeList s@((x,y):s')
| x == 0 = complementRangeList'' y s'
| otherwise = complementRangeList' 0 s
complementRangeList [] = [(0, 0x10ffff)]
difference :: CharSet -> CharSet -> CharSet
difference xs ys = intersection xs (complement ys)
fromList :: String -> CharSet
fromList = normalise . foldl' (\ acc c -> IM.insert (ord c) (ord c) acc) IM.empty
toList :: CharSet -> String
toList = concatMap (uncurry enumFromTo) . toIntervalList
toIntervalList :: CharSet -> [(Char, Char)]
toIntervalList (CS m) = [ (chr lo, chr hi) | (lo, hi) <- IM.toList m ]
fromIntervalList :: [(Char,Char)] -> CharSet
fromIntervalList xs = normalise' $ sortBy (\a b -> compare (fst a) (fst b))
[ (ord lo, ord hi)
| (lo, hi) <- xs
, lo <= hi
]
normalise :: IM.IntMap Int -> CharSet
normalise = normalise'. IM.toList
normalise' :: [(Int,Int)] -> CharSet
normalise' = CS . IM.fromList . go where
go :: [(Int,Int)] -> [(Int,Int)]
go [] = []
go ((x,y):zs) = go' x y zs
go' :: Int -> Int -> [(Int, Int)] -> [(Int, Int)]
go' lo hi [] = [(lo, hi)]
go' lo hi ws0@((u,v):ws)
| u <= succ hi = go' lo (max v hi) ws
| otherwise = (lo,hi) : go ws0