-- |
-- Module      : Data.Text.ICU.Spoof.Pure
-- Copyright   : (c) 2015 Ben Hamilton
--
-- License     : BSD-style
-- Maintainer  : bgertzfield@gmail.com
-- Stability   : experimental
-- Portability : GHC
--
-- Pure string spoof checking functions for Unicode, implemented as
-- bindings to the International Components for Unicode (ICU)
-- libraries.
--
-- For the impure spoof checking API (which is richer, but less easy to
-- use), see the "Data.Text.ICU.Spoof" module.

module Data.Text.ICU.Spoof.Pure
    (
    -- * Types
      Spoof
    , SpoofParams(..)
    , spoof
    , spoofWithParams
    , spoofFromSource
    , spoofFromSerialized
    -- * String spoof checks
    , areConfusable
    , getSkeleton
    , spoofCheck
    -- * Configuration
    , getAllowedLocales
    , getChecks
    , getRestrictionLevel
    -- * Persistence
    , serialize
    ) where

import Data.ByteString (ByteString)
import Data.Foldable (forM_)
import Data.Text (Text)
import Data.Text.ICU.Spoof.Internal (Spoof(..))
import System.IO.Unsafe (unsafePerformIO)
import qualified Data.Text.ICU.Spoof as S

data SpoofParams
  -- | Used to configure a 'Spoof' checker via 'spoofWithParams'.
  = SpoofParams {
    -- | Optional 'S.SpoofCheck's to perform on a string. By default, performs
    -- all checks except 'CharLimit'.
    SpoofParams -> Maybe [SpoofCheck]
spoofChecks :: Maybe [S.SpoofCheck]
    -- | Optional 'S.RestrictionLevel' to which characters in the string will
    -- be limited. By default, uses 'HighlyRestrictive'.
  , SpoofParams -> Maybe RestrictionLevel
level :: Maybe S.RestrictionLevel
    -- | Optional locale(s) whose scripts will be used to limit the
    -- set of allowed characters in a string. If set, automatically
    -- enables the 'CharLimit' spoof check.
  , SpoofParams -> Maybe [String]
locales :: Maybe [String]
} deriving (Int -> SpoofParams -> ShowS
[SpoofParams] -> ShowS
SpoofParams -> String
(Int -> SpoofParams -> ShowS)
-> (SpoofParams -> String)
-> ([SpoofParams] -> ShowS)
-> Show SpoofParams
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SpoofParams] -> ShowS
$cshowList :: [SpoofParams] -> ShowS
show :: SpoofParams -> String
$cshow :: SpoofParams -> String
showsPrec :: Int -> SpoofParams -> ShowS
$cshowsPrec :: Int -> SpoofParams -> ShowS
Show, SpoofParams -> SpoofParams -> Bool
(SpoofParams -> SpoofParams -> Bool)
-> (SpoofParams -> SpoofParams -> Bool) -> Eq SpoofParams
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SpoofParams -> SpoofParams -> Bool
$c/= :: SpoofParams -> SpoofParams -> Bool
== :: SpoofParams -> SpoofParams -> Bool
$c== :: SpoofParams -> SpoofParams -> Bool
Eq)

applySpoofParams :: SpoofParams -> S.MSpoof -> S.MSpoof
applySpoofParams :: SpoofParams -> MSpoof -> MSpoof
applySpoofParams (SpoofParams Maybe [SpoofCheck]
c Maybe RestrictionLevel
lev Maybe [String]
loc) MSpoof
s = IO MSpoof -> MSpoof
forall a. IO a -> a
unsafePerformIO (IO MSpoof -> MSpoof) -> IO MSpoof -> MSpoof
forall a b. (a -> b) -> a -> b
$ do
  Maybe [SpoofCheck] -> ([SpoofCheck] -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ Maybe [SpoofCheck]
c (MSpoof -> [SpoofCheck] -> IO ()
S.setChecks MSpoof
s)
  Maybe RestrictionLevel -> (RestrictionLevel -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ Maybe RestrictionLevel
lev (MSpoof -> RestrictionLevel -> IO ()
S.setRestrictionLevel MSpoof
s)
  Maybe [String] -> ([String] -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ Maybe [String]
loc (MSpoof -> [String] -> IO ()
S.setAllowedLocales MSpoof
s)
  MSpoof -> IO MSpoof
forall (m :: * -> *) a. Monad m => a -> m a
return MSpoof
s

-- | Open an immutable 'Spoof' checker with default options (all
-- 'S.SpoofCheck's except 'CharLimit').
spoof :: Spoof
spoof :: Spoof
spoof = IO Spoof -> Spoof
forall a. IO a -> a
unsafePerformIO (IO Spoof -> Spoof) -> IO Spoof -> Spoof
forall a b. (a -> b) -> a -> b
$ MSpoof -> Spoof
S (MSpoof -> Spoof) -> IO MSpoof -> IO Spoof
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` IO MSpoof
S.open
{-# NOINLINE spoof #-}

-- | Open an immutable 'Spoof' checker with specific 'SpoofParams'
-- to control its behavior.
spoofWithParams :: SpoofParams -> Spoof
spoofWithParams :: SpoofParams -> Spoof
spoofWithParams SpoofParams
p = IO Spoof -> Spoof
forall a. IO a -> a
unsafePerformIO (IO Spoof -> Spoof) -> IO Spoof -> Spoof
forall a b. (a -> b) -> a -> b
$ do
  MSpoof
s <- IO MSpoof
S.open
  Spoof -> IO Spoof
forall (m :: * -> *) a. Monad m => a -> m a
return (MSpoof -> Spoof
S (MSpoof -> Spoof) -> MSpoof -> Spoof
forall a b. (a -> b) -> a -> b
$ SpoofParams -> MSpoof -> MSpoof
applySpoofParams SpoofParams
p MSpoof
s)

-- | Open a immutable 'Spoof' checker with specific 'SpoofParams'
-- to control its behavior and custom rules given the UTF-8 encoded
-- contents of the @confusables.txt@ and @confusablesWholeScript.txt@
-- files as described in <http://unicode.org/reports/tr39/ Unicode UAX #39>.
spoofFromSource :: (ByteString, ByteString) -> SpoofParams -> Spoof
spoofFromSource :: (ByteString, ByteString) -> SpoofParams -> Spoof
spoofFromSource (ByteString
confusables, ByteString
confusablesWholeScript) SpoofParams
p = IO Spoof -> Spoof
forall a. IO a -> a
unsafePerformIO (IO Spoof -> Spoof) -> IO Spoof -> Spoof
forall a b. (a -> b) -> a -> b
$ do
  MSpoof
s <- (ByteString, ByteString) -> IO MSpoof
S.openFromSource (ByteString
confusables, ByteString
confusablesWholeScript)
  Spoof -> IO Spoof
forall (m :: * -> *) a. Monad m => a -> m a
return (MSpoof -> Spoof
S (MSpoof -> Spoof) -> MSpoof -> Spoof
forall a b. (a -> b) -> a -> b
$ SpoofParams -> MSpoof -> MSpoof
applySpoofParams SpoofParams
p MSpoof
s)

-- | Create an immutable spoof checker with specific 'SpoofParams'
-- to control its behavior and custom rules previously returned
-- by 'serialize'.
spoofFromSerialized :: ByteString -> SpoofParams -> Spoof
spoofFromSerialized :: ByteString -> SpoofParams -> Spoof
spoofFromSerialized ByteString
b SpoofParams
p = IO Spoof -> Spoof
forall a. IO a -> a
unsafePerformIO (IO Spoof -> Spoof) -> IO Spoof -> Spoof
forall a b. (a -> b) -> a -> b
$ do
  MSpoof
s <- ByteString -> IO MSpoof
S.openFromSerialized ByteString
b
  Spoof -> IO Spoof
forall (m :: * -> *) a. Monad m => a -> m a
return (MSpoof -> Spoof
S (MSpoof -> Spoof) -> MSpoof -> Spoof
forall a b. (a -> b) -> a -> b
$ SpoofParams -> MSpoof -> MSpoof
applySpoofParams SpoofParams
p MSpoof
s)

-- | Check two strings for confusability.
areConfusable :: Spoof -> Text -> Text -> S.SpoofCheckResult
areConfusable :: Spoof -> Text -> Text -> SpoofCheckResult
areConfusable (S MSpoof
s) Text
t1 Text
t2 = IO SpoofCheckResult -> SpoofCheckResult
forall a. IO a -> a
unsafePerformIO (IO SpoofCheckResult -> SpoofCheckResult)
-> IO SpoofCheckResult -> SpoofCheckResult
forall a b. (a -> b) -> a -> b
$ MSpoof -> Text -> Text -> IO SpoofCheckResult
S.areConfusable MSpoof
s Text
t1 Text
t2

-- | Check a string for spoofing issues.
spoofCheck :: Spoof -> Text -> S.SpoofCheckResult
spoofCheck :: Spoof -> Text -> SpoofCheckResult
spoofCheck (S MSpoof
s) Text
t = IO SpoofCheckResult -> SpoofCheckResult
forall a. IO a -> a
unsafePerformIO (IO SpoofCheckResult -> SpoofCheckResult)
-> IO SpoofCheckResult -> SpoofCheckResult
forall a b. (a -> b) -> a -> b
$ MSpoof -> Text -> IO SpoofCheckResult
S.spoofCheck MSpoof
s Text
t

-- | Generates re-usable \"skeleton\" strings which can be used (via
-- Unicode equality) to check if an identifier is confusable
-- with some large set of existing identifiers.
--
-- If you cache the returned strings in storage, you /must/ invalidate
-- your cache any time the underlying confusables database changes
-- (i.e., on ICU upgrade).
--
-- By default, assumes all input strings have been passed through
-- 'toCaseFold' and are lower-case. To change this, pass
-- 'SkeletonAnyCase'.
--
-- By default, builds skeletons which catch visually confusable
-- characters across multiple scripts.  Pass 'SkeletonSingleScript' to
-- override that behavior and build skeletons which catch visually
-- confusable characters across single scripts.
getSkeleton :: Spoof -> Maybe S.SkeletonTypeOverride -> Text -> Text
getSkeleton :: Spoof -> Maybe SkeletonTypeOverride -> Text -> Text
getSkeleton (S MSpoof
s) Maybe SkeletonTypeOverride
o Text
t = IO Text -> Text
forall a. IO a -> a
unsafePerformIO (IO Text -> Text) -> IO Text -> Text
forall a b. (a -> b) -> a -> b
$ MSpoof -> Maybe SkeletonTypeOverride -> Text -> IO Text
S.getSkeleton MSpoof
s Maybe SkeletonTypeOverride
o Text
t

-- | Gets the restriction level currently configured in the spoof
-- checker, if present.
getRestrictionLevel :: Spoof -> Maybe S.RestrictionLevel
getRestrictionLevel :: Spoof -> Maybe RestrictionLevel
getRestrictionLevel (S MSpoof
s) = IO (Maybe RestrictionLevel) -> Maybe RestrictionLevel
forall a. IO a -> a
unsafePerformIO (IO (Maybe RestrictionLevel) -> Maybe RestrictionLevel)
-> IO (Maybe RestrictionLevel) -> Maybe RestrictionLevel
forall a b. (a -> b) -> a -> b
$ MSpoof -> IO (Maybe RestrictionLevel)
S.getRestrictionLevel MSpoof
s

-- | Gets the checks currently configured in the spoof checker.
getChecks :: Spoof -> [S.SpoofCheck]
getChecks :: Spoof -> [SpoofCheck]
getChecks (S MSpoof
s) = IO [SpoofCheck] -> [SpoofCheck]
forall a. IO a -> a
unsafePerformIO (IO [SpoofCheck] -> [SpoofCheck])
-> IO [SpoofCheck] -> [SpoofCheck]
forall a b. (a -> b) -> a -> b
$ MSpoof -> IO [SpoofCheck]
S.getChecks MSpoof
s

-- | Gets the locales whose scripts are currently allowed by the spoof
-- checker.  (We don't use 'LocaleName' since the root and default
-- locales have no meaning here.)
getAllowedLocales :: Spoof -> [String]
getAllowedLocales :: Spoof -> [String]
getAllowedLocales (S MSpoof
s) = IO [String] -> [String]
forall a. IO a -> a
unsafePerformIO (IO [String] -> [String]) -> IO [String] -> [String]
forall a b. (a -> b) -> a -> b
$ MSpoof -> IO [String]
S.getAllowedLocales MSpoof
s

-- | Serializes the rules in this spoof checker to a byte array,
-- suitable for re-use by 'spoofFromSerialized'.
--
-- Only includes any data provided to 'openFromSource'. Does not
-- include any other state or configuration.
serialize :: Spoof -> ByteString
serialize :: Spoof -> ByteString
serialize (S MSpoof
s) = IO ByteString -> ByteString
forall a. IO a -> a
unsafePerformIO (IO ByteString -> ByteString) -> IO ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ MSpoof -> IO ByteString
S.serialize MSpoof
s

{-# INLINE spoofCheck #-}