{-# LANGUAGE CPP #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module Text.Collate.Collator
  ( Collator(..)
  , SortKey(..)
  , renderSortKey
  , VariableWeighting(..)
  , rootCollator
  , collatorLang
  , CollatorOptions(..)
  , setVariableWeighting
  , setFrenchAccents
  , setUpperBeforeLower
  , setNormalization
  , collator
  , defaultCollatorOptions
  , collatorFor
  , mkCollator
  )
where

import Text.Collate.Lang
import Text.Collate.Tailorings
import Text.Collate.Collation (getCollationElements, Collation(..),
                               CollationElement(..))
import Text.Collate.Normalize (toNFD)
import Data.Word (Word16)
import Data.String
import qualified Data.Text as T
import Data.Text (Text)
import Data.Ord (comparing)
import Data.Char (ord)
import Data.List (intercalate)
import Text.Printf (printf)
import Language.Haskell.TH.Quote (QuasiQuoter(..))
#if MIN_VERSION_base(4,11,0)
#else
import Data.Semigroup (Semigroup(..))
#endif

-- | 'VariableWeighting' affects how punctuation is treated.
-- See <http://www.unicode.org/reports/tr10/#Variable_Weighting>.
data VariableWeighting =
    NonIgnorable   -- ^ Don't ignore punctuation (Deluge < deluge-)
  | Blanked -- ^ Completely ignore punctuation (Deluge = deluge-)
  | Shifted -- ^ Consider punctuation at lower priority
           -- (de-luge < delu-ge < deluge < deluge- < Deluge)
  | ShiftTrimmed -- ^ Variant of Shifted (deluge < de-luge < delu-ge)
  deriving (Int -> VariableWeighting -> ShowS
[VariableWeighting] -> ShowS
VariableWeighting -> String
(Int -> VariableWeighting -> ShowS)
-> (VariableWeighting -> String)
-> ([VariableWeighting] -> ShowS)
-> Show VariableWeighting
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [VariableWeighting] -> ShowS
$cshowList :: [VariableWeighting] -> ShowS
show :: VariableWeighting -> String
$cshow :: VariableWeighting -> String
showsPrec :: Int -> VariableWeighting -> ShowS
$cshowsPrec :: Int -> VariableWeighting -> ShowS
Show, VariableWeighting -> VariableWeighting -> Bool
(VariableWeighting -> VariableWeighting -> Bool)
-> (VariableWeighting -> VariableWeighting -> Bool)
-> Eq VariableWeighting
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: VariableWeighting -> VariableWeighting -> Bool
$c/= :: VariableWeighting -> VariableWeighting -> Bool
== :: VariableWeighting -> VariableWeighting -> Bool
$c== :: VariableWeighting -> VariableWeighting -> Bool
Eq, Eq VariableWeighting
Eq VariableWeighting
-> (VariableWeighting -> VariableWeighting -> Ordering)
-> (VariableWeighting -> VariableWeighting -> Bool)
-> (VariableWeighting -> VariableWeighting -> Bool)
-> (VariableWeighting -> VariableWeighting -> Bool)
-> (VariableWeighting -> VariableWeighting -> Bool)
-> (VariableWeighting -> VariableWeighting -> VariableWeighting)
-> (VariableWeighting -> VariableWeighting -> VariableWeighting)
-> Ord VariableWeighting
VariableWeighting -> VariableWeighting -> Bool
VariableWeighting -> VariableWeighting -> Ordering
VariableWeighting -> VariableWeighting -> VariableWeighting
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: VariableWeighting -> VariableWeighting -> VariableWeighting
$cmin :: VariableWeighting -> VariableWeighting -> VariableWeighting
max :: VariableWeighting -> VariableWeighting -> VariableWeighting
$cmax :: VariableWeighting -> VariableWeighting -> VariableWeighting
>= :: VariableWeighting -> VariableWeighting -> Bool
$c>= :: VariableWeighting -> VariableWeighting -> Bool
> :: VariableWeighting -> VariableWeighting -> Bool
$c> :: VariableWeighting -> VariableWeighting -> Bool
<= :: VariableWeighting -> VariableWeighting -> Bool
$c<= :: VariableWeighting -> VariableWeighting -> Bool
< :: VariableWeighting -> VariableWeighting -> Bool
$c< :: VariableWeighting -> VariableWeighting -> Bool
compare :: VariableWeighting -> VariableWeighting -> Ordering
$ccompare :: VariableWeighting -> VariableWeighting -> Ordering
$cp1Ord :: Eq VariableWeighting
Ord)

data CollatorOptions =
  CollatorOptions
  { CollatorOptions -> Maybe Lang
optLang               :: Maybe Lang -- ^ 'Lang' used for tailoring.
      -- Note that because of fallback rules, this may be somewhat
      -- different from the 'Lang' passed to 'collatorFor'.  This 'Lang'
      -- won't contain unicode extensions used to set options, but
      -- it will specify the collation if a non-default collation is being used.
  , CollatorOptions -> VariableWeighting
optVariableWeighting  :: VariableWeighting  -- ^ Method for handling
      -- variable elements (see <http://www.unicode.org/reports/tr10/>,
      -- Tables 11 and 12).
  , CollatorOptions -> Bool
optFrenchAccents      :: Bool -- ^ If True, secondary weights are scanned
      -- in reverse order, so we get the sorting
      -- "cote côte coté côté" instead of "cote coté côte côté"
  , CollatorOptions -> Bool
optUpperBeforeLower   :: Bool -- ^ Sort uppercase letters before lower
  , CollatorOptions -> Bool
optNormalize          :: Bool -- ^ If True, strings are normalized
      -- to NFD before collation elements are constructed.  If the input
      -- is already normalized, this option can be set to False for
      -- better performance.
  } deriving (Int -> CollatorOptions -> ShowS
[CollatorOptions] -> ShowS
CollatorOptions -> String
(Int -> CollatorOptions -> ShowS)
-> (CollatorOptions -> String)
-> ([CollatorOptions] -> ShowS)
-> Show CollatorOptions
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CollatorOptions] -> ShowS
$cshowList :: [CollatorOptions] -> ShowS
show :: CollatorOptions -> String
$cshow :: CollatorOptions -> String
showsPrec :: Int -> CollatorOptions -> ShowS
$cshowsPrec :: Int -> CollatorOptions -> ShowS
Show, CollatorOptions -> CollatorOptions -> Bool
(CollatorOptions -> CollatorOptions -> Bool)
-> (CollatorOptions -> CollatorOptions -> Bool)
-> Eq CollatorOptions
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CollatorOptions -> CollatorOptions -> Bool
$c/= :: CollatorOptions -> CollatorOptions -> Bool
== :: CollatorOptions -> CollatorOptions -> Bool
$c== :: CollatorOptions -> CollatorOptions -> Bool
Eq, Eq CollatorOptions
Eq CollatorOptions
-> (CollatorOptions -> CollatorOptions -> Ordering)
-> (CollatorOptions -> CollatorOptions -> Bool)
-> (CollatorOptions -> CollatorOptions -> Bool)
-> (CollatorOptions -> CollatorOptions -> Bool)
-> (CollatorOptions -> CollatorOptions -> Bool)
-> (CollatorOptions -> CollatorOptions -> CollatorOptions)
-> (CollatorOptions -> CollatorOptions -> CollatorOptions)
-> Ord CollatorOptions
CollatorOptions -> CollatorOptions -> Bool
CollatorOptions -> CollatorOptions -> Ordering
CollatorOptions -> CollatorOptions -> CollatorOptions
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: CollatorOptions -> CollatorOptions -> CollatorOptions
$cmin :: CollatorOptions -> CollatorOptions -> CollatorOptions
max :: CollatorOptions -> CollatorOptions -> CollatorOptions
$cmax :: CollatorOptions -> CollatorOptions -> CollatorOptions
>= :: CollatorOptions -> CollatorOptions -> Bool
$c>= :: CollatorOptions -> CollatorOptions -> Bool
> :: CollatorOptions -> CollatorOptions -> Bool
$c> :: CollatorOptions -> CollatorOptions -> Bool
<= :: CollatorOptions -> CollatorOptions -> Bool
$c<= :: CollatorOptions -> CollatorOptions -> Bool
< :: CollatorOptions -> CollatorOptions -> Bool
$c< :: CollatorOptions -> CollatorOptions -> Bool
compare :: CollatorOptions -> CollatorOptions -> Ordering
$ccompare :: CollatorOptions -> CollatorOptions -> Ordering
$cp1Ord :: Eq CollatorOptions
Ord)

showWordList :: [Word16] -> String
showWordList :: [Word16] -> String
showWordList [Word16]
ws =
    String
"[" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
","
            ((Word16 -> String) -> [Word16] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (String -> Int -> String
forall r. PrintfType r => String -> r
printf String
"0x%04X" (Int -> String) -> (Word16 -> Int) -> Word16 -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral :: Word16 -> Int)) [Word16]
ws) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"]"

newtype SortKey = SortKey [Word16]
  deriving (SortKey -> SortKey -> Bool
(SortKey -> SortKey -> Bool)
-> (SortKey -> SortKey -> Bool) -> Eq SortKey
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SortKey -> SortKey -> Bool
$c/= :: SortKey -> SortKey -> Bool
== :: SortKey -> SortKey -> Bool
$c== :: SortKey -> SortKey -> Bool
Eq, Eq SortKey
Eq SortKey
-> (SortKey -> SortKey -> Ordering)
-> (SortKey -> SortKey -> Bool)
-> (SortKey -> SortKey -> Bool)
-> (SortKey -> SortKey -> Bool)
-> (SortKey -> SortKey -> Bool)
-> (SortKey -> SortKey -> SortKey)
-> (SortKey -> SortKey -> SortKey)
-> Ord SortKey
SortKey -> SortKey -> Bool
SortKey -> SortKey -> Ordering
SortKey -> SortKey -> SortKey
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: SortKey -> SortKey -> SortKey
$cmin :: SortKey -> SortKey -> SortKey
max :: SortKey -> SortKey -> SortKey
$cmax :: SortKey -> SortKey -> SortKey
>= :: SortKey -> SortKey -> Bool
$c>= :: SortKey -> SortKey -> Bool
> :: SortKey -> SortKey -> Bool
$c> :: SortKey -> SortKey -> Bool
<= :: SortKey -> SortKey -> Bool
$c<= :: SortKey -> SortKey -> Bool
< :: SortKey -> SortKey -> Bool
$c< :: SortKey -> SortKey -> Bool
compare :: SortKey -> SortKey -> Ordering
$ccompare :: SortKey -> SortKey -> Ordering
$cp1Ord :: Eq SortKey
Ord)

instance Show SortKey where
 show :: SortKey -> String
show (SortKey [Word16]
ws) = String
"SortKey " String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Word16] -> String
showWordList [Word16]
ws

-- | Render sort key in the manner used in the CLDR collation test data:
-- the character '|' is used to separate the levels of the key and
-- corresponds to a 0 in the actual sort key.
renderSortKey :: SortKey -> String
renderSortKey :: SortKey -> String
renderSortKey (SortKey [Word16]
ws) = String
"[" String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Word16] -> String
tohexes [Word16]
ws String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"]"
 where
  tohexes :: [Word16] -> String
tohexes = [String] -> String
unwords ([String] -> String)
-> ([Word16] -> [String]) -> [Word16] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word16 -> String) -> [Word16] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map Word16 -> String
forall t p.
(Eq t, Num t, IsString p, PrintfArg t, PrintfType p) =>
t -> p
tohex
  tohex :: t -> p
tohex t
0 = p
"|"
  tohex t
x = String -> t -> p
forall r. PrintfType r => String -> r
printf String
"%04X" t
x

-- Note that & b < q <<< Q is the same as & b < q, & q <<< Q
-- Another syntactic shortcut is:
-- & a <* bcd-gp-s => & a < b < c < d < e < f < g < p < q < r < s
-- & a =* bB => & a = b = B (without that, we have a contraction)
-- &[before 2] a << b => sorts sorts b before a


data Collator =
  Collator
  { -- | Compare two 'Text's
    Collator -> Text -> Text -> Ordering
collate               :: Text -> Text -> Ordering
    -- | Compare two strings of any type that can be unpacked
    -- lazily into a list of 'Char's.
  , Collator -> forall a. Eq a => (a -> String) -> a -> a -> Ordering
collateWithUnpacker   :: forall a. Eq a => (a -> [Char]) -> a -> a -> Ordering
    -- | The sort key used to compare a 'Text'
  , Collator -> Text -> SortKey
sortKey               :: Text -> SortKey
    -- | The options used for this 'Collator'
  , Collator -> CollatorOptions
collatorOptions       :: CollatorOptions
    -- | The collation table used for this 'Collator'
  , Collator -> Collation
collatorCollation     :: Collation
  }

instance IsString Collator where
 fromString :: String -> Collator
fromString = Lang -> Collator
collatorFor (Lang -> Collator) -> (String -> Lang) -> String -> Collator
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Lang
forall a. IsString a => String -> a
fromString

-- | Default collator based on DUCET table (@allkeys.txt@).
rootCollator :: Collator
rootCollator :: Collator
rootCollator = CollatorOptions -> Collation -> Collator
mkCollator CollatorOptions
defaultCollatorOptions Collation
ducetCollation

{-# DEPRECATED collatorLang "Use (optLang . collatorOptions)" #-}
-- | 'Lang' used for tailoring. Because of fallback rules, this may be somewhat
-- different from the 'Lang' passed to 'collatorFor'.  This 'Lang'
-- won't contain unicode extensions used to set options, but
-- it will specify the collation if a non-default collation is being used.
collatorLang :: Collator -> Maybe Lang
collatorLang :: Collator -> Maybe Lang
collatorLang = CollatorOptions -> Maybe Lang
optLang (CollatorOptions -> Maybe Lang)
-> (Collator -> CollatorOptions) -> Collator -> Maybe Lang
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Collator -> CollatorOptions
collatorOptions

modifyCollatorOptions :: (CollatorOptions -> CollatorOptions)
                      -> Collator -> Collator
modifyCollatorOptions :: (CollatorOptions -> CollatorOptions) -> Collator -> Collator
modifyCollatorOptions CollatorOptions -> CollatorOptions
f Collator
coll =
  CollatorOptions -> Collation -> Collator
mkCollator (CollatorOptions -> CollatorOptions
f (CollatorOptions -> CollatorOptions)
-> CollatorOptions -> CollatorOptions
forall a b. (a -> b) -> a -> b
$ Collator -> CollatorOptions
collatorOptions Collator
coll) (Collator -> Collation
collatorCollation Collator
coll)

-- | Set method for handling variable elements (punctuation
-- and spaces): see <http://www.unicode.org/reports/tr10/>,
-- Tables 11 and 12.
setVariableWeighting :: VariableWeighting -> Collator -> Collator
setVariableWeighting :: VariableWeighting -> Collator -> Collator
setVariableWeighting VariableWeighting
w =
  (CollatorOptions -> CollatorOptions) -> Collator -> Collator
modifyCollatorOptions (\CollatorOptions
o -> CollatorOptions
o{ optVariableWeighting :: VariableWeighting
optVariableWeighting = VariableWeighting
w })

-- | The Unicode Collation Algorithm expects input to be normalized
-- into its canonical decomposition (NFD). By default, collators perform
-- this normalization. If your input is already normalized, you can increase
-- performance by disabling this step: @setNormalization False@.
setNormalization :: Bool -> Collator -> Collator
setNormalization :: Bool -> Collator -> Collator
setNormalization Bool
normalize =
  (CollatorOptions -> CollatorOptions) -> Collator -> Collator
modifyCollatorOptions (\CollatorOptions
o -> CollatorOptions
o{ optNormalize :: Bool
optNormalize = Bool
normalize })

-- | @setFrenchAccents True@ causes secondary weights to be scanned
-- in reverse order, so we get the sorting
-- @cote côte coté côté@ instead of @cote coté côte côté@.
-- The default is usually @False@, except for @fr-CA@ where it is @True@.
setFrenchAccents :: Bool -> Collator -> Collator
setFrenchAccents :: Bool -> Collator -> Collator
setFrenchAccents Bool
frAccents =
  (CollatorOptions -> CollatorOptions) -> Collator -> Collator
modifyCollatorOptions (\CollatorOptions
o -> CollatorOptions
o{ optFrenchAccents :: Bool
optFrenchAccents = Bool
frAccents })

-- | Most collations default to sorting lowercase letters before
-- uppercase (exceptions: @mt@, @da@, @cu@).  To select the opposite
-- behavior, use @setUpperBeforeLower True@.
setUpperBeforeLower :: Bool -> Collator -> Collator
setUpperBeforeLower :: Bool -> Collator -> Collator
setUpperBeforeLower Bool
upperBefore =
  (CollatorOptions -> CollatorOptions) -> Collator -> Collator
modifyCollatorOptions (\CollatorOptions
o -> CollatorOptions
o{ optUpperBeforeLower :: Bool
optUpperBeforeLower = Bool
upperBefore })

-- | Create a collator at compile time based on a BCP 47 language
-- tag: e.g., @[collator|es-u-co-trad|]@.  Requires the @QuasiQuotes@ extension.
collator :: QuasiQuoter
collator :: QuasiQuoter
collator = QuasiQuoter :: (String -> Q Exp)
-> (String -> Q Pat)
-> (String -> Q Type)
-> (String -> Q [Dec])
-> QuasiQuoter
QuasiQuoter
  { quoteExp :: String -> Q Exp
quoteExp = \String
langtag -> do
      case Text -> Either String Lang
parseLang (String -> Text
T.pack String
langtag) of
        Left String
e -> do
          String -> Q Exp
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Q Exp) -> String -> Q Exp
forall a b. (a -> b) -> a -> b
$ String
"Could not parse BCP47 tag " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
langtag String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
e
        Right Lang
lang ->
          case Lang -> [(Lang, Collation)] -> Maybe (Lang, Collation)
forall a. Lang -> [(Lang, a)] -> Maybe (Lang, a)
lookupLang Lang
lang [(Lang, Collation)]
tailorings of
            Maybe (Lang, Collation)
Nothing     -> [| rootCollator |]
            Just (Lang
_, Collation
_) -> [| collatorFor lang |]
  , quotePat :: String -> Q Pat
quotePat = String -> Q Pat
forall a. HasCallStack => a
undefined
  , quoteType :: String -> Q Type
quoteType = String -> Q Type
forall a. HasCallStack => a
undefined
  , quoteDec :: String -> Q [Dec]
quoteDec = String -> Q [Dec]
forall a. HasCallStack => a
undefined
  }

-- | Default 'CollatorOptions'.
defaultCollatorOptions :: CollatorOptions
defaultCollatorOptions :: CollatorOptions
defaultCollatorOptions =
  CollatorOptions :: Maybe Lang
-> VariableWeighting -> Bool -> Bool -> Bool -> CollatorOptions
CollatorOptions
  { optLang :: Maybe Lang
optLang              = Maybe Lang
forall a. Maybe a
Nothing
  , optVariableWeighting :: VariableWeighting
optVariableWeighting = VariableWeighting
NonIgnorable
  , optFrenchAccents :: Bool
optFrenchAccents     = Bool
False
  , optUpperBeforeLower :: Bool
optUpperBeforeLower  = Bool
False
  , optNormalize :: Bool
optNormalize         = Bool
True
  }

-- | Returns a collator based on a BCP 47 language tag.
-- If no exact match is found, we try to find the best match
-- (falling back to the root collation if nothing else succeeds).
-- If something other than the default collation for a language
-- is desired, the @co@ keyword of the unicode extensions can be
-- used (e.g. @es-u-co-trad@ for traditional Spanish).
-- Other unicode extensions affect the collator options:
--
-- - The @kb@ keyword has the same effect as
--   'setFrenchAccents' (e.g. @fr-FR-u-kb-true@).
-- - The @ka@ keyword has the same effect as 'setVariableWeight'
--   (e.g. @fr-FR-u-kb-ka-shifted@ or @en-u-ka-noignore@).
-- - The @kf@ keyword has the same effect as 'setUpperBeforeLower'
--   (e.g. @fr-u-kf-upper@ or @fr-u-kf-lower@).
-- - The @kk@ keyword has the same effect as 'setNormalization'
--   (e.g. @fr-u-kk-false@).
collatorFor :: Lang -> Collator
collatorFor :: Lang -> Collator
collatorFor Lang
lang = CollatorOptions -> Collation -> Collator
mkCollator CollatorOptions
opts Collation
collation
  where
    opts :: CollatorOptions
opts = CollatorOptions
defaultCollatorOptions{
             optLang :: Maybe Lang
optLang          = Maybe Lang
langUsed,
             optFrenchAccents :: Bool
optFrenchAccents =
               case Text -> [(Text, [(Text, Text)])] -> Maybe [(Text, Text)]
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"u" [(Text, [(Text, Text)])]
exts Maybe [(Text, Text)]
-> ([(Text, Text)] -> Maybe Text) -> Maybe Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"kb" of
                 Just Text
""       -> Bool
True
                                       -- true is default attribute value
                 Just Text
"true"   -> Bool
True
                 Just Text
_        -> Bool
False
                 Maybe Text
Nothing       -> Lang -> Text
langLanguage Lang
lang Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"cu" Bool -> Bool -> Bool
||
                   (Lang -> Text
langLanguage Lang
lang Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"fr" Bool -> Bool -> Bool
&& Lang -> Maybe Text
langRegion Lang
lang Maybe Text -> Maybe Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"CA"),
             optVariableWeighting :: VariableWeighting
optVariableWeighting =
               case Text -> [(Text, [(Text, Text)])] -> Maybe [(Text, Text)]
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"u" [(Text, [(Text, Text)])]
exts Maybe [(Text, Text)]
-> ([(Text, Text)] -> Maybe Text) -> Maybe Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"ka" of
                 Just Text
""         -> VariableWeighting
NonIgnorable
                 Just Text
"noignore" -> VariableWeighting
NonIgnorable
                 Just Text
"shifted"  -> VariableWeighting
Shifted
                 Maybe Text
Nothing | Lang -> Text
langLanguage Lang
lang Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"th"
                                 -> VariableWeighting
Shifted
                 Maybe Text
_               -> VariableWeighting
NonIgnorable,
             optUpperBeforeLower :: Bool
optUpperBeforeLower =
               case Text -> [(Text, [(Text, Text)])] -> Maybe [(Text, Text)]
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"u" [(Text, [(Text, Text)])]
exts Maybe [(Text, Text)]
-> ([(Text, Text)] -> Maybe Text) -> Maybe Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"kf" of
                 Just Text
""         -> Bool
True
                 Just Text
"upper"    -> Bool
True
                 Just Text
_          -> Bool
False
                 Maybe Text
Nothing         -> Lang -> Text
langLanguage Lang
lang Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"mt" Bool -> Bool -> Bool
||
                                    Lang -> Text
langLanguage Lang
lang Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"da" Bool -> Bool -> Bool
||
                                    Lang -> Text
langLanguage Lang
lang Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"cu",
             optNormalize :: Bool
optNormalize =
               case Text -> [(Text, [(Text, Text)])] -> Maybe [(Text, Text)]
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"u" [(Text, [(Text, Text)])]
exts Maybe [(Text, Text)]
-> ([(Text, Text)] -> Maybe Text) -> Maybe Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"kk" of
                 Just Text
""         -> Bool
True
                 Just Text
"true"     -> Bool
True
                 Just Text
"false"    -> Bool
False
                 Maybe Text
_               -> Bool
True }
    (Maybe Lang
langUsed, Collation
collation) =
      case Lang -> [(Lang, Collation)] -> Maybe (Lang, Collation)
forall a. Lang -> [(Lang, a)] -> Maybe (Lang, a)
lookupLang Lang
lang [(Lang, Collation)]
tailorings of
        Maybe (Lang, Collation)
Nothing            -> (Maybe Lang
forall a. Maybe a
Nothing, Collation
ducetCollation)
        Just (Lang
l,Collation
tailoring) -> (Lang -> Maybe Lang
forall a. a -> Maybe a
Just Lang
l, Collation
ducetCollation Collation -> Collation -> Collation
forall a. Semigroup a => a -> a -> a
<> Collation
tailoring)
    exts :: [(Text, [(Text, Text)])]
exts = Lang -> [(Text, [(Text, Text)])]
langExtensions Lang
lang

-- | Returns a collator constructed using the collation and
-- variable weighting specified in the options.
mkCollator :: CollatorOptions -> Collation -> Collator
mkCollator :: CollatorOptions -> Collation -> Collator
mkCollator CollatorOptions
opts Collation
collation =
  Collator :: (Text -> Text -> Ordering)
-> (forall a. Eq a => (a -> String) -> a -> a -> Ordering)
-> (Text -> SortKey)
-> CollatorOptions
-> Collation
-> Collator
Collator { collate :: Text -> Text -> Ordering
collate = \Text
x Text
y -> if Text
x Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
y  -- optimization
                                  then Ordering
EQ
                                  else (Text -> SortKey) -> Text -> Text -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing Text -> SortKey
sortKey' Text
x Text
y
           , collateWithUnpacker :: forall a. Eq a => (a -> String) -> a -> a -> Ordering
collateWithUnpacker
                     = \a -> String
unpack a
x a
y
                            -> if a
x a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
y
                                  then Ordering
EQ
                                  else (a -> SortKey) -> a -> a -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing ([Int] -> SortKey
sortKeyFromCodePoints' ([Int] -> SortKey) -> (a -> [Int]) -> a -> SortKey
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Int) -> String -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map Char -> Int
ord (String -> [Int]) -> (a -> String) -> a -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
unpack)
                                         a
x a
y
           , sortKey :: Text -> SortKey
sortKey = Text -> SortKey
sortKey'
           , collatorOptions :: CollatorOptions
collatorOptions = CollatorOptions
opts
           , collatorCollation :: Collation
collatorCollation = Collation
collation
           }
 where
  sortKey' :: Text -> SortKey
sortKey' =
      [Int] -> SortKey
sortKeyFromCodePoints'
    ([Int] -> SortKey) -> (Text -> [Int]) -> Text -> SortKey
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> [Int] -> [Int]) -> [Int] -> Text -> [Int]
forall a. (Char -> a -> a) -> a -> Text -> a
T.foldr ((:) (Int -> [Int] -> [Int]) -> (Char -> Int) -> Char -> [Int] -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord) []
  sortKeyFromCodePoints' :: [Int] -> SortKey
sortKeyFromCodePoints' =
      CollatorOptions -> [CollationElement] -> SortKey
mkSortKey CollatorOptions
opts
    ([CollationElement] -> SortKey)
-> ([Int] -> [CollationElement]) -> [Int] -> SortKey
forall b c a. (b -> c) -> (a -> b) -> a -> c
. VariableWeighting -> [CollationElement] -> [CollationElement]
handleVariable (CollatorOptions -> VariableWeighting
optVariableWeighting CollatorOptions
opts)
    ([CollationElement] -> [CollationElement])
-> ([Int] -> [CollationElement]) -> [Int] -> [CollationElement]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Collation -> [Int] -> [CollationElement]
getCollationElements Collation
collation
    ([Int] -> [CollationElement])
-> ([Int] -> [Int]) -> [Int] -> [CollationElement]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. if CollatorOptions -> Bool
optNormalize CollatorOptions
opts
         then [Int] -> [Int]
toNFD
         else [Int] -> [Int]
forall a. a -> a
id

handleVariable :: VariableWeighting -> [CollationElement] -> [CollationElement]
handleVariable :: VariableWeighting -> [CollationElement] -> [CollationElement]
handleVariable VariableWeighting
NonIgnorable = [CollationElement] -> [CollationElement]
forall a. a -> a
id
handleVariable VariableWeighting
Blanked = Bool -> Bool -> [CollationElement] -> [CollationElement]
doVariable Bool
False Bool
False
handleVariable VariableWeighting
Shifted = Bool -> Bool -> [CollationElement] -> [CollationElement]
doVariable Bool
True Bool
False
handleVariable VariableWeighting
ShiftTrimmed = VariableWeighting -> [CollationElement] -> [CollationElement]
handleVariable VariableWeighting
Shifted

doVariable :: Bool -> Bool -> [CollationElement] -> [CollationElement]
doVariable :: Bool -> Bool -> [CollationElement] -> [CollationElement]
doVariable Bool
_useL4 Bool
_afterVariable [] = []
doVariable Bool
useL4 Bool
afterVariable (CollationElement
e:[CollationElement]
es)
  | CollationElement -> Bool
collationVariable CollationElement
e
    =   CollationElement
e{ collationL1 :: Word16
collationL1 = Word16
0, collationL2 :: Word16
collationL2 = Word16
0, collationL3 :: Word16
collationL3 = Word16
0,
           collationL4 :: Word16
collationL4 = -- Table 11
             case Bool
useL4 of
               Bool
True
                 | CollationElement -> Word16
collationL1 CollationElement
e Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
0
                 , CollationElement -> Word16
collationL2 CollationElement
e Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
0
                 , CollationElement -> Word16
collationL3 CollationElement
e Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
0   -> Word16
0
                 | CollationElement -> Word16
collationL1 CollationElement
e Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
0
                 , CollationElement -> Word16
collationL3 CollationElement
e Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word16
0
                 , Bool
afterVariable        -> Word16
0
                 | CollationElement -> Word16
collationL1 CollationElement
e Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word16
0   -> CollationElement -> Word16
collationL1 CollationElement
e
                 | CollationElement -> Word16
collationL1 CollationElement
e Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
0
                 , CollationElement -> Word16
collationL3 CollationElement
e Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word16
0
                 , Bool -> Bool
not Bool
afterVariable    -> Word16
0xFFFF
               Bool
_                        -> Word16
0
         } CollationElement -> [CollationElement] -> [CollationElement]
forall a. a -> [a] -> [a]
: Bool -> Bool -> [CollationElement] -> [CollationElement]
doVariable Bool
useL4 Bool
True [CollationElement]
es
  | CollationElement -> Word16
collationL1 CollationElement
e Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
0 -- "ignorable"
  , Bool
afterVariable
    = CollationElement
e{ collationL1 :: Word16
collationL1 = Word16
0, collationL2 :: Word16
collationL2 = Word16
0, collationL3 :: Word16
collationL3 = Word16
0, collationL4 :: Word16
collationL4 = Word16
0 }
       CollationElement -> [CollationElement] -> [CollationElement]
forall a. a -> [a] -> [a]
: Bool -> Bool -> [CollationElement] -> [CollationElement]
doVariable Bool
useL4 Bool
afterVariable [CollationElement]
es
  | CollationElement -> Word16
collationL1 CollationElement
e Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word16
0
  , Bool -> Bool
not (CollationElement -> Bool
collationVariable CollationElement
e)
  , Bool
useL4
  = CollationElement
e{ collationL4 :: Word16
collationL4 = Word16
0xFFFF } CollationElement -> [CollationElement] -> [CollationElement]
forall a. a -> [a] -> [a]
: Bool -> Bool -> [CollationElement] -> [CollationElement]
doVariable Bool
useL4 Bool
False [CollationElement]
es
  | Bool
otherwise
    = CollationElement
e CollationElement -> [CollationElement] -> [CollationElement]
forall a. a -> [a] -> [a]
: Bool -> Bool -> [CollationElement] -> [CollationElement]
doVariable Bool
useL4 Bool
False [CollationElement]
es

mkSortKey :: CollatorOptions -> [CollationElement] -> SortKey
mkSortKey :: CollatorOptions -> [CollationElement] -> SortKey
mkSortKey CollatorOptions
opts [CollationElement]
elts = [Word16] -> SortKey
SortKey ([Word16] -> SortKey) -> [Word16] -> SortKey
forall a b. (a -> b) -> a -> b
$
    [Word16]
l1s [Word16] -> [Word16] -> [Word16]
forall a. [a] -> [a] -> [a]
++ (Word16
0Word16 -> [Word16] -> [Word16]
forall a. a -> [a] -> [a]
:[Word16]
l2s) [Word16] -> [Word16] -> [Word16]
forall a. [a] -> [a] -> [a]
++ (Word16
0Word16 -> [Word16] -> [Word16]
forall a. a -> [a] -> [a]
:[Word16]
l3s) [Word16] -> [Word16] -> [Word16]
forall a. [a] -> [a] -> [a]
++ if [Word16] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Word16]
l4s
                                    then []
                                    else Word16
0Word16 -> [Word16] -> [Word16]
forall a. a -> [a] -> [a]
:[Word16]
l4s
  where
    l1s :: [Word16]
l1s = (Word16 -> Bool) -> [Word16] -> [Word16]
forall a. (a -> Bool) -> [a] -> [a]
filter (Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
/=Word16
0) ([Word16] -> [Word16]) -> [Word16] -> [Word16]
forall a b. (a -> b) -> a -> b
$ (CollationElement -> Word16) -> [CollationElement] -> [Word16]
forall a b. (a -> b) -> [a] -> [b]
map CollationElement -> Word16
collationL1 [CollationElement]
elts
    l2s :: [Word16]
l2s = (if CollatorOptions -> Bool
optFrenchAccents CollatorOptions
opts
              then [Word16] -> [Word16]
forall a. [a] -> [a]
reverse
              else [Word16] -> [Word16]
forall a. a -> a
id) ([Word16] -> [Word16]) -> [Word16] -> [Word16]
forall a b. (a -> b) -> a -> b
$ (Word16 -> Bool) -> [Word16] -> [Word16]
forall a. (a -> Bool) -> [a] -> [a]
filter (Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
/=Word16
0) ([Word16] -> [Word16]) -> [Word16] -> [Word16]
forall a b. (a -> b) -> a -> b
$ (CollationElement -> Word16) -> [CollationElement] -> [Word16]
forall a b. (a -> b) -> [a] -> [b]
map CollationElement -> Word16
collationL2 [CollationElement]
elts
    l3s :: [Word16]
l3s = (Word16 -> Bool) -> [Word16] -> [Word16]
forall a. (a -> Bool) -> [a] -> [a]
filter (Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
/=Word16
0) ([Word16] -> [Word16]) -> [Word16] -> [Word16]
forall a b. (a -> b) -> a -> b
$ (CollationElement -> Word16) -> [CollationElement] -> [Word16]
forall a b. (a -> b) -> [a] -> [b]
map ((if CollatorOptions -> Bool
optUpperBeforeLower CollatorOptions
opts
                                  then Word16 -> Word16
forall p. (Eq p, Num p) => p -> p
switchUpperAndLower
                                  else Word16 -> Word16
forall a. a -> a
id) (Word16 -> Word16)
-> (CollationElement -> Word16) -> CollationElement -> Word16
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CollationElement -> Word16
collationL3) [CollationElement]
elts
    l4s :: [Word16]
l4s = case CollatorOptions -> VariableWeighting
optVariableWeighting CollatorOptions
opts of
             VariableWeighting
NonIgnorable -> []
             VariableWeighting
Blanked      -> []
             VariableWeighting
ShiftTrimmed -> [Word16] -> [Word16]
trimTrailingFFFFs [Word16]
l4s'
             VariableWeighting
Shifted      -> [Word16]
l4s'
    l4s' :: [Word16]
l4s' = (Word16 -> Bool) -> [Word16] -> [Word16]
forall a. (a -> Bool) -> [a] -> [a]
filter (Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
/=Word16
0) ([Word16] -> [Word16]) -> [Word16] -> [Word16]
forall a b. (a -> b) -> a -> b
$ (CollationElement -> Word16) -> [CollationElement] -> [Word16]
forall a b. (a -> b) -> [a] -> [b]
map CollationElement -> Word16
collationL4 [CollationElement]
elts
    switchUpperAndLower :: p -> p
switchUpperAndLower p
0x0002 = p
0x0008
    switchUpperAndLower p
0x0008 = p
0x0002
    switchUpperAndLower p
x      = p
x

trimTrailingFFFFs :: [Word16] -> [Word16]
trimTrailingFFFFs :: [Word16] -> [Word16]
trimTrailingFFFFs = [Word16] -> [Word16]
forall a. [a] -> [a]
reverse ([Word16] -> [Word16])
-> ([Word16] -> [Word16]) -> [Word16] -> [Word16]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word16 -> Bool) -> [Word16] -> [Word16]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
0xFFFF) ([Word16] -> [Word16])
-> ([Word16] -> [Word16]) -> [Word16] -> [Word16]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word16] -> [Word16]
forall a. [a] -> [a]
reverse