-- Module      :  Strings
-- | a module to convert between character string encodings

-- would require systematic checks what are permited characters
-- (especially for input to urlEncoding)

-- the latin encoding is produced by show ...
-- t2u is nearly invertible...

-- strings remain here, to be used when constructing the wrappers for
-- functions used from other packages (with String interfaces)
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings     #-}
{-# LANGUAGE ScopedTypeVariables   #-}
{-# LANGUAGE StandaloneDeriving
    , DeriveGeneric
    , DeriveAnyClass
    , TypeSynonymInstances
{-# OPTIONS_GHC -fno-warn-missing-methods #-}
{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
{-# OPTIONS_GHC -w #-}

module Uniform.Strings.Conversion (
    ByteString, LazyByteString
    , s2b, b2s, b2t,   t2b, t2u,  s2u
    , s2t, t2s
    , t2tl, tl2t
    -- uses UTF8 as encoding in ByteString
    -- urlencode is always represented the same as the input
    , Text (..), BSUTF (..), URL (..)
    , URLform , b2uf, b2urlf, urlf2b

    , b2bu, bu2b, bu2s, bu2t, t2bu, s2bu
    , u2b, u2t,  u2s, b2u
    , b2bl, bl2b -- lazy bytestring
    , bl2t, t2bl
    , bb2t, bb2s  -- conversion with error if not UTF8
    , s2latin, t2latin, latin2t, latin2s -- conversion to the latin1 encoding
    , BSlat (..), s2lat, lat2s, t2lat, lat2t
    , s3lat, t3lat, s3latin, t3latin
    , s2url, url2s,  unURL, t22latin
    , convertLatin, findNonLatinChars, findNonLatinCharsT
    , filterLatin
    , module Safe
    )   where
import           Safe (fromJustNote)
import GHC.Generics (Generic)
import Uniform.Zero (Zeros(zero) )

import Control.Monad (join)

import           Data.Text            (Text)
import qualified Data.Text            as T
import Data.Char (ord)
import Data.List (nub)
import           Data.ByteString      (ByteString)
import qualified Data.ByteString      as ByteString
import qualified Data.ByteString.Lazy as Lazy 
import Data.ByteString.Char8 (pack, unpack)

import           Data.Text.Encoding   (decodeUtf8, decodeUtf8', encodeUtf8)

import qualified Network.URI          as URI
import qualified Snap.Core            as SN

import qualified Data.Text.Lazy as LText  -- (toStrict, fromStrict)

bl2t :: LazyByteString ->Text
-- ^ conversion from LazyByteString to text (only if guarantee that only utf8 values)
bl2t :: LazyByteString -> Text
bl2t =    BSUTF -> Text
bu2t (BSUTF -> Text)
-> (LazyByteString -> BSUTF) -> LazyByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> BSUTF
BSUTF (ByteString -> BSUTF)
-> (LazyByteString -> ByteString) -> LazyByteString -> BSUTF
forall b c a. (b -> c) -> (a -> b) -> a -> c
. LazyByteString -> ByteString

t2bl :: Text -> LazyByteString
t2bl :: Text -> LazyByteString
t2bl =   ByteString -> LazyByteString
b2bl (ByteString -> LazyByteString)
-> (Text -> ByteString) -> Text -> LazyByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString

s2t :: String -> Text
-- ^ String to Text (invertable)
s2t :: String -> Text
s2t = String -> Text

t2s :: Text -> String
-- ^ String to Text (invertable)
t2s :: Text -> String
t2s = Text -> String

tl2t :: LText.Text -> Text
tl2t :: Text -> Text
tl2t = Text -> Text

t2tl :: Text -> LText.Text
t2tl :: Text -> Text
t2tl = Text -> Text

type LazyByteString = Lazy.ByteString

instance Zeros ByteString where zero :: ByteString
zero = Text -> ByteString
t2b Text
instance Zeros LazyByteString where zero :: LazyByteString
zero = ByteString -> LazyByteString
b2bl ByteString
forall z. Zeros z => z

newtype BSUTF = BSUTF ByteString
    deriving (Int -> BSUTF -> ShowS
[BSUTF] -> ShowS
BSUTF -> String
(Int -> BSUTF -> ShowS)
-> (BSUTF -> String) -> ([BSUTF] -> ShowS) -> Show BSUTF
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BSUTF] -> ShowS
$cshowList :: [BSUTF] -> ShowS
show :: BSUTF -> String
$cshow :: BSUTF -> String
showsPrec :: Int -> BSUTF -> ShowS
$cshowsPrec :: Int -> BSUTF -> ShowS
Show, ReadPrec [BSUTF]
ReadPrec BSUTF
Int -> ReadS BSUTF
(Int -> ReadS BSUTF)
-> ReadS [BSUTF]
-> ReadPrec BSUTF
-> ReadPrec [BSUTF]
-> Read BSUTF
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [BSUTF]
$creadListPrec :: ReadPrec [BSUTF]
readPrec :: ReadPrec BSUTF
$creadPrec :: ReadPrec BSUTF
readList :: ReadS [BSUTF]
$creadList :: ReadS [BSUTF]
readsPrec :: Int -> ReadS BSUTF
$creadsPrec :: Int -> ReadS BSUTF
Read, BSUTF -> BSUTF -> Bool
(BSUTF -> BSUTF -> Bool) -> (BSUTF -> BSUTF -> Bool) -> Eq BSUTF
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BSUTF -> BSUTF -> Bool
$c/= :: BSUTF -> BSUTF -> Bool
== :: BSUTF -> BSUTF -> Bool
$c== :: BSUTF -> BSUTF -> Bool
-> (BSUTF -> BSUTF -> Ordering)
-> (BSUTF -> BSUTF -> Bool)
-> (BSUTF -> BSUTF -> Bool)
-> (BSUTF -> BSUTF -> Bool)
-> (BSUTF -> BSUTF -> Bool)
-> Ord BSUTF
BSUTF -> BSUTF -> Bool
BSUTF -> BSUTF -> Ordering
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: BSUTF -> BSUTF -> BSUTF
$cmin :: BSUTF -> BSUTF -> BSUTF
max :: BSUTF -> BSUTF -> BSUTF
$cmax :: BSUTF -> BSUTF -> BSUTF
>= :: BSUTF -> BSUTF -> Bool
$c>= :: BSUTF -> BSUTF -> Bool
> :: BSUTF -> BSUTF -> Bool
$c> :: BSUTF -> BSUTF -> Bool
<= :: BSUTF -> BSUTF -> Bool
$c<= :: BSUTF -> BSUTF -> Bool
< :: BSUTF -> BSUTF -> Bool
$c< :: BSUTF -> BSUTF -> Bool
compare :: BSUTF -> BSUTF -> Ordering
$ccompare :: BSUTF -> BSUTF -> Ordering
$cp1Ord :: Eq BSUTF
Ord, (forall x. BSUTF -> Rep BSUTF x)
-> (forall x. Rep BSUTF x -> BSUTF) -> Generic BSUTF
forall x. Rep BSUTF x -> BSUTF
forall x. BSUTF -> Rep BSUTF x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep BSUTF x -> BSUTF
$cfrom :: forall x. BSUTF -> Rep BSUTF x
Generic, BSUTF
Eq BSUTF => BSUTF -> Bool
-> (Eq BSUTF => BSUTF -> Bool)
-> (Eq BSUTF => BSUTF -> Bool)
-> Zeros BSUTF
forall z.
z -> (Eq z => z -> Bool) -> (Eq z => z -> Bool) -> Zeros z
notZero :: BSUTF -> Bool
$cnotZero :: Eq BSUTF => BSUTF -> Bool
isZero :: BSUTF -> Bool
$cisZero :: Eq BSUTF => BSUTF -> Bool
zero :: BSUTF
$czero :: BSUTF
Zeros, Addr#
Addr# -> BSUTF -> BSUTF -> BSUTF
-> (NonEmpty BSUTF -> BSUTF)
-> (forall b. Integral b => b -> BSUTF -> BSUTF)
-> Semigroup BSUTF
forall b. Integral b => b -> BSUTF -> BSUTF
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
forall a. Addr# -> a
noMethodBindingError :: forall a. Addr# -> a
stimes :: b -> BSUTF -> BSUTF
$cstimes :: forall b. Integral b => b -> BSUTF -> BSUTF
sconcat :: NonEmpty BSUTF -> BSUTF
$csconcat :: NonEmpty BSUTF -> BSUTF
$c<> :: BSUTF -> BSUTF -> BSUTF
Semigroup, Semigroup BSUTF
Semigroup BSUTF
-> ([BSUTF] -> BSUTF)
-> Monoid BSUTF
Addr# -> BSUTF
forall a.
Semigroup a -> a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
forall a. Addr# -> a
noMethodBindingError :: forall a. Addr# -> a
mconcat :: [BSUTF] -> BSUTF
$cmconcat :: [BSUTF] -> BSUTF
mappend :: BSUTF -> BSUTF -> BSUTF
$cmappend :: BSUTF -> BSUTF -> BSUTF
$cmempty :: BSUTF
$cp1Monoid :: Semigroup BSUTF

unBSUTF :: BSUTF -> ByteString
unBSUTF :: BSUTF -> ByteString
unBSUTF (BSUTF ByteString
a) = ByteString

t2bu :: Text ->  BSUTF
-- ^ Text to Bytestring (invertable)
t2bu :: Text -> BSUTF
t2bu = ByteString -> BSUTF
BSUTF (ByteString -> BSUTF) -> (Text -> ByteString) -> Text -> BSUTF
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString

bu2t ::  BSUTF -> Text
-- ^ ByteString to Text --  inverse (not an arbitrary input)
bu2t :: BSUTF -> Text
bu2t = ByteString -> Text
decodeUtf8 (ByteString -> Text) -> (BSUTF -> ByteString) -> BSUTF -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. BSUTF -> ByteString

-- conversion ByteString BSUTF
b2bu :: ByteString -> Maybe BSUTF
b2bu :: ByteString -> Maybe BSUTF
b2bu ByteString
a = if ByteString -> Bool
testByteStringUtf8 ByteString
a then BSUTF -> Maybe BSUTF
forall a. a -> Maybe a
Just (ByteString -> BSUTF
BSUTF ByteString
a) else Maybe BSUTF
forall a. Maybe a

bu2b :: BSUTF -> ByteString
bu2b :: BSUTF -> ByteString
bu2b = BSUTF -> ByteString

bu2s :: BSUTF -> String
bu2s :: BSUTF -> String
bu2s = Text -> String
t2s (Text -> String) -> (BSUTF -> Text) -> BSUTF -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. BSUTF -> Text

testByteStringUtf8 :: ByteString -> Bool
-- ^ test whether a byte string is valid utf8 encoded
-- used for avoiding problems with the quickcheck conversions
testByteStringUtf8 :: ByteString -> Bool
testByteStringUtf8 ByteString
b =
    case ByteString -> Either UnicodeException Text
decodeUtf8' ByteString
b of
                -- :: ByteString -> Either UnicodeException Text
                    Left UnicodeException
s  -> Bool
                    Right Text
t -> Bool

t2b :: Text -> ByteString
t2b :: Text -> ByteString
t2b = BSUTF -> ByteString
bu2b (BSUTF -> ByteString) -> (Text -> BSUTF) -> Text -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> BSUTF

b2t :: ByteString -> Maybe Text
b2t :: ByteString -> Maybe Text
b2t = (BSUTF -> Text) -> Maybe BSUTF -> Maybe Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap BSUTF -> Text
bu2t (Maybe BSUTF -> Maybe Text)
-> (ByteString -> Maybe BSUTF) -> ByteString -> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe BSUTF

bb2s :: ByteString -> String
-- converts and stops with error when not UTF8
bb2s :: ByteString -> String
bb2s ByteString
s = String -> Maybe String -> String
forall a. Partial => String -> Maybe a -> a
fromJustNote (String
"bb2s - bytestring to string conversion: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> String
forall a. Show a => a -> String
show ByteString
        String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" was not a utf8") (Maybe String -> String)
-> (ByteString -> Maybe String) -> ByteString -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe String
b2s (ByteString -> String) -> ByteString -> String
forall a b. (a -> b) -> a -> b
$ ByteString

bb2t :: ByteString -> Text
-- converts and stopw with error when not UTF8
bb2t :: ByteString -> Text
bb2t ByteString
s = String -> Maybe Text -> Text
forall a. Partial => String -> Maybe a -> a
fromJustNote (String
"bb2s - bytestring to text conversion: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> String
forall a. Show a => a -> String
show ByteString
        String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" was not a utf8") (Maybe Text -> Text)
-> (ByteString -> Maybe Text) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe Text
b2t (ByteString -> Text) -> ByteString -> Text
forall a b. (a -> b) -> a -> b
$ ByteString
-- bytestring -- string (just a composition of t2s . b2t and reverse)
s2bu :: String ->  BSUTF
-- ^ String to Bytestring (invertable)
s2bu :: String -> BSUTF
s2bu = ByteString -> BSUTF
BSUTF (ByteString -> BSUTF) -> (String -> ByteString) -> String -> BSUTF
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
encodeUtf8 (Text -> ByteString) -> (String -> Text) -> String -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text

--bu2s ::  BSUTF -> String
---- ^ ByteString to String -- not inverse (not any arbitrary input)
--bu2s = t2s . decodeUtf8 . unBSUTF

s2b :: String -> ByteString
s2b :: String -> ByteString
s2b = Text -> ByteString
t2b (Text -> ByteString) -> (String -> Text) -> String -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text

b2bl :: ByteString -> Lazy.ByteString
b2bl :: ByteString -> LazyByteString
b2bl = ByteString -> LazyByteString

bl2b ::  Lazy.ByteString -> ByteString
bl2b :: LazyByteString -> ByteString
bl2b = LazyByteString -> ByteString

b2s :: ByteString -> Maybe String
b2s :: ByteString -> Maybe String
b2s = (Text -> String) -> Maybe Text -> Maybe String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> String
t2s (Maybe Text -> Maybe String)
-> (ByteString -> Maybe Text) -> ByteString -> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe Text

newtype URL = URL String deriving (Int -> URL -> ShowS
[URL] -> ShowS
URL -> String
(Int -> URL -> ShowS)
-> (URL -> String) -> ([URL] -> ShowS) -> Show URL
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [URL] -> ShowS
$cshowList :: [URL] -> ShowS
show :: URL -> String
$cshow :: URL -> String
showsPrec :: Int -> URL -> ShowS
$cshowsPrec :: Int -> URL -> ShowS
Show, URL -> URL -> Bool
(URL -> URL -> Bool) -> (URL -> URL -> Bool) -> Eq URL
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: URL -> URL -> Bool
$c/= :: URL -> URL -> Bool
== :: URL -> URL -> Bool
$c== :: URL -> URL -> Bool
instance Zeros URL where zero :: URL
zero = String -> URL
URL String
forall z. Zeros z => z

unURL :: URL -> String
unURL :: URL -> String
unURL (URL String
t) = String

s2url :: String -> URL
-- ^ convert string to url   (uses code from Network.HTTP, which converts space into %20)
s2url :: String -> URL
s2url =   String -> URL
URL (String -> URL) -> ShowS -> String -> URL
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
URI.escapeURIString Char -> Bool
--s2url =   URL . HTTP.urlEncode

url2s :: URL -> String
-- ^ convert url to string   (uses code from Network.HTTP, which converts space into %20)
url2s :: URL -> String
url2s  =   ShowS
URI.unEscapeString ShowS -> (URL -> String) -> URL -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. URL -> String

testUrlEncodingURI :: String -> Bool
testUrlEncodingURI :: String -> Bool
testUrlEncodingURI String
a = String
a String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== (URL -> String
unURL (URL -> String) -> (String -> URL) -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> URL
s2url (String -> URL) -> ShowS -> String -> URL
forall b c a. (b -> c) -> (a -> b) -> a -> c
. URL -> String
url2s (URL -> String) -> (String -> URL) -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> URL
URL ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ String

url2u :: URL -> String
url2u :: URL -> String
url2u = URL -> String
u2url :: String -> Maybe URL
u2url :: String -> Maybe URL
u2url String
a = if String -> Bool
testUrlEncodingURI String
a then URL -> Maybe URL
forall a. a -> Maybe a
Just (String -> URL
URL String
a) else Maybe URL
forall a. Maybe a

s2u :: String -> String
-- ^ convert string to url   (uses code from Network.HTTP, which converts space into %20)
s2u :: ShowS
s2u = URL -> String
url2u (URL -> String) -> (String -> URL) -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> URL

u2s :: String -> Maybe String     --not inverse
-- ^ convert url to string   (uses code from Network.HTTP, which converts space into %20)
u2s :: String -> Maybe String
u2s  =   (URL -> String) -> Maybe URL -> Maybe String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap URL -> String
url2s (Maybe URL -> Maybe String)
-> (String -> Maybe URL) -> String -> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe URL

-- case for encoding of form content (with + for space)
-- to remove for 9.2.1 

newtype URLform = URLform ByteString deriving (Int -> URLform -> ShowS
[URLform] -> ShowS
URLform -> String
(Int -> URLform -> ShowS)
-> (URLform -> String) -> ([URLform] -> ShowS) -> Show URLform
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [URLform] -> ShowS
$cshowList :: [URLform] -> ShowS
show :: URLform -> String
$cshow :: URLform -> String
showsPrec :: Int -> URLform -> ShowS
$cshowsPrec :: Int -> URLform -> ShowS
Show, URLform -> URLform -> Bool
(URLform -> URLform -> Bool)
-> (URLform -> URLform -> Bool) -> Eq URLform
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: URLform -> URLform -> Bool
$c/= :: URLform -> URLform -> Bool
== :: URLform -> URLform -> Bool
$c== :: URLform -> URLform -> Bool
unURLform :: URLform -> ByteString
unURLform :: URLform -> ByteString
unURLform (URLform ByteString
t) = ByteString

b2urlf :: ByteString -> URLform
-- ^ convert string to url   (uses code from SNAP, which converts space into +)
b2urlf :: ByteString -> URLform
b2urlf =   ByteString -> URLform
URLform (ByteString -> URLform)
-> (ByteString -> ByteString) -> ByteString -> URLform
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString

urlf2b :: URLform -> ByteString
-- ^ convert url to string   (uses code from SNAP, which converts space into +)
urlf2b :: URLform -> ByteString
urlf2b = String -> Maybe ByteString -> ByteString
forall a. Partial => String -> Maybe a -> a
fromJustNote String
"urlf2b nothing" (Maybe ByteString -> ByteString)
-> (URLform -> Maybe ByteString) -> URLform -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe ByteString
SN.urlDecode (ByteString -> Maybe ByteString)
-> (URLform -> ByteString) -> URLform -> Maybe ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. URLform -> ByteString

testUrlEncodingSNAP :: ByteString -> Bool
testUrlEncodingSNAP :: ByteString -> Bool
testUrlEncodingSNAP ByteString
a =  Bool -> (ByteString -> Bool) -> Maybe ByteString -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False ((ByteString
a ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
==)(ByteString -> Bool)
-> (ByteString -> ByteString) -> ByteString -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
SN.urlEncode) (Maybe ByteString -> Bool)
-> (ByteString -> Maybe ByteString) -> ByteString -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe ByteString
SN.urlDecode (ByteString -> Bool) -> ByteString -> Bool
forall a b. (a -> b) -> a -> b
$ ByteString
urlf2u :: URLform -> ByteString
urlf2u :: URLform -> ByteString
urlf2u = URLform -> ByteString
u2urlf :: ByteString -> Maybe URLform
u2urlf :: ByteString -> Maybe URLform
u2urlf ByteString
a = if ByteString -> Bool
testUrlEncodingSNAP ByteString
a then URLform -> Maybe URLform
forall a. a -> Maybe a
Just (ByteString -> URLform
URLform ByteString
a) else Maybe URLform
forall a. Maybe a
-- this test allows control in url encoded strings ...

b2uf :: ByteString -> ByteString
-- ^ convert ByteString to url   (uses code from SNAP which converts space into +)
b2uf :: ByteString -> ByteString
b2uf = URLform -> ByteString
urlf2u (URLform -> ByteString)
-> (ByteString -> URLform) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> URLform

uf2b :: ByteString -> Maybe ByteString     --not inverse
-- ^ convert url to ByteString   (uses code from SNAP, which converts space into +)
uf2b :: ByteString -> Maybe ByteString
uf2b  =   (URLform -> ByteString) -> Maybe URLform -> Maybe ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap URLform -> ByteString
urlf2b (Maybe URLform -> Maybe ByteString)
-> (ByteString -> Maybe URLform) -> ByteString -> Maybe ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe URLform

t2u :: Text -> Text
t2u :: Text -> Text
t2u = String -> Text
s2t (String -> Text) -> (Text -> String) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
s2u ShowS -> (Text -> String) -> Text -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
u2t :: Text -> Maybe Text
u2t :: Text -> Maybe Text
u2t = (String -> Text) -> Maybe String -> Maybe Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> Text
s2t (Maybe String -> Maybe Text)
-> (Text -> Maybe String) -> Text -> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe String
u2s (String -> Maybe String)
-> (Text -> String) -> Text -> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String

b2u :: ByteString -> Maybe ByteString
b2u :: ByteString -> Maybe ByteString
b2u = (String -> ByteString) -> Maybe String -> Maybe ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (String -> ByteString
s2b (String -> ByteString) -> ShowS -> String -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
s2u) (Maybe String -> Maybe ByteString)
-> (ByteString -> Maybe String) -> ByteString -> Maybe ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe String
u2b :: ByteString -> Maybe ByteString
u2b :: ByteString -> Maybe ByteString
u2b = (String -> ByteString) -> Maybe String -> Maybe ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> ByteString
s2b (Maybe String -> Maybe ByteString)
-> (ByteString -> Maybe String) -> ByteString -> Maybe ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe (Maybe String) -> Maybe String
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join  (Maybe (Maybe String) -> Maybe String)
-> (ByteString -> Maybe (Maybe String))
-> ByteString
-> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> Maybe String) -> Maybe String -> Maybe (Maybe String)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> Maybe String
u2s (Maybe String -> Maybe (Maybe String))
-> (ByteString -> Maybe String)
-> ByteString
-> Maybe (Maybe String)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe String

-- | bytestring with latin1 encoded characters
newtype BSlat = BSlat ByteString deriving (Int -> BSlat -> ShowS
[BSlat] -> ShowS
BSlat -> String
(Int -> BSlat -> ShowS)
-> (BSlat -> String) -> ([BSlat] -> ShowS) -> Show BSlat
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BSlat] -> ShowS
$cshowList :: [BSlat] -> ShowS
show :: BSlat -> String
$cshow :: BSlat -> String
showsPrec :: Int -> BSlat -> ShowS
$cshowsPrec :: Int -> BSlat -> ShowS
Show, BSlat -> BSlat -> Bool
(BSlat -> BSlat -> Bool) -> (BSlat -> BSlat -> Bool) -> Eq BSlat
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BSlat -> BSlat -> Bool
$c/= :: BSlat -> BSlat -> Bool
== :: BSlat -> BSlat -> Bool
$c== :: BSlat -> BSlat -> Bool
unBSlat :: BSlat -> ByteString
unBSlat :: BSlat -> ByteString
unBSlat (BSlat ByteString
a) = ByteString

lat2s :: BSlat -> String
-- ^ bytestring with latin encoding to string
lat2s :: BSlat -> String
lat2s = ByteString -> String
latin2s (ByteString -> String) -> (BSlat -> ByteString) -> BSlat -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. BSlat -> ByteString

s2lat :: String -> Maybe BSlat   -- is this always possible ?
-- ^ string encoded as ByteString with latin encoding, if possible
s2lat :: String -> Maybe BSlat
s2lat =  (ByteString -> BSlat) -> Maybe ByteString -> Maybe BSlat
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> BSlat
BSlat (Maybe ByteString -> Maybe BSlat)
-> (String -> Maybe ByteString) -> String -> Maybe BSlat
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe ByteString

s3lat :: String ->  BSlat   -- is this always possible ?
-- ^ string converted to represenatable as latin and then encoded
-- lossy!
s3lat :: String -> BSlat
s3lat   =  ByteString -> BSlat
BSlat (ByteString -> BSlat) -> (String -> ByteString) -> String -> BSlat
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString

lat2t :: BSlat -> Text
-- ^ Text encoded as ByteString with latin encoding, if possible
lat2t :: BSlat -> Text
lat2t = ByteString -> Text
latin2t (ByteString -> Text) -> (BSlat -> ByteString) -> BSlat -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. BSlat -> ByteString

t2lat :: Text -> Maybe BSlat   -- is this always possible
-- ^ Text encoded as ByteString with latin encoding, if possible
t2lat :: Text -> Maybe BSlat
t2lat = (ByteString -> BSlat) -> Maybe ByteString -> Maybe BSlat
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> BSlat
BSlat (Maybe ByteString -> Maybe BSlat)
-> (Text -> Maybe ByteString) -> Text -> Maybe BSlat
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Maybe ByteString

t3lat :: Text -> BSlat   -- is this always possible
-- ^ Text converted to represenatable as latin and then encoded
-- lossy!
t3lat :: Text -> BSlat
t3lat =  ByteString -> BSlat
BSlat (ByteString -> BSlat) -> (Text -> ByteString) -> Text -> BSlat
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString

latin2s :: ByteString -> String
    --    | works always, but produces unexpected results if bytestring is not latin encoded
latin2s :: ByteString -> String
latin2s = ByteString -> String
s2latin :: String ->  ByteString
        --  | works always, but produces unexpected results if bytestring is not latin encoded
s2latin :: String -> ByteString
s2latin =  String -> ByteString

s22latin :: String -> Maybe ByteString
s22latin :: String -> Maybe ByteString
s22latin String
s = if (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all  ((Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
256) (Int -> Bool) -> (Char -> Int) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord) String
s  then  ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString)
-> (String -> ByteString) -> String -> Maybe ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.  String -> ByteString
s2latin  (String -> Maybe ByteString) -> String -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ String
s else Maybe ByteString
forall a. Maybe a
Nothing   -- Data.ByteString.Char8.pack . T.unpack

s3latin :: String ->  ByteString
s3latin :: String -> ByteString
s3latin =   String -> ByteString
s2latin  (String -> ByteString) -> ShowS -> String -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS

filterLatin :: String -> String
filterLatin :: ShowS
filterLatin = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
filter ((Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
256)(Int -> Bool) -> (Char -> Int) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
.Char -> Int
ord )

convertLatin :: String -> String
-- ^ convert a string to contain only characters in latin1
convertLatin :: ShowS
convertLatin = (Char -> Char) -> ShowS
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char

conv2latinChar :: Char -> Char
-- ^ convert character not in the latin1 encoding (intelligently treating quotes and double quotes)
-- possibly other cases later added
conv2latinChar :: Char -> Char
conv2latinChar Char
c = if Char -> Int
ord Char
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
256 then Char
c else
    case Char
c of
'\x201C' -> Char
'\x201D' -> Char
'\x201E' -> Char
'\8212' -> Char
'-'    -- em dash
'\8222' -> Char
'"'    -- lower quote
'\8216' -> Char
'\''    --  left single quote
'\8217' -> Char
'\''    -- right single quote
'\8218' -> Char
'\''    --  quote
'\8221' -> Char
'"'    -- unclear why 8221 but is quote
--        '\x2018' -> '\''   -- same as 8216
--        '\x2019' -> '\''  -- same as 8217

_ -> Char
c -- '\SUB'    -- could be another char ? \SUB

findNonLatinChars :: String -> String
-- ^ the result is a string of all the characters not in the latin1 encoding
-- possibly apply conv2latinChar first
findNonLatinChars :: ShowS
findNonLatinChars = ShowS
forall a. Eq a => [a] -> [a]
nub ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
filter ((Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
256)(Int -> Bool) -> (Char -> Int) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
.Char -> Int
ord )
--            (\c -> conv2latinChar c == '\SUB')

findNonLatinCharsT :: Text -> Text
-- ^ the result is a string of all the characters not in the latin1 encoding
findNonLatinCharsT :: Text -> Text
findNonLatinCharsT = String -> Text
s2t (String -> Text) -> (Text -> String) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
findNonLatinChars ShowS -> (Text -> String) -> Text -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String

latin2t :: ByteString -> Text
latin2t :: ByteString -> Text
latin2t = String -> Text
s2t (String -> Text) -> (ByteString -> String) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
latin2s  -- T.pack .  Data.ByteString.Char8.unpack

t2latin :: Text ->  ByteString
-- text to bytestring -  works always, but produces unexpected results if bytestring is not latin encoded
t2latin :: Text -> ByteString
t2latin   = String -> ByteString
s2latin (String -> ByteString) -> (Text -> String) -> Text -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
t2s  -- Data.ByteString.Char8.pack . T.unpack

t22latin :: Text -> Maybe ByteString
-- ^ converts text to bytestring, if meaningful
t22latin :: Text -> Maybe ByteString
t22latin   = String -> Maybe ByteString
s22latin (String -> Maybe ByteString)
-> (Text -> String) -> Text -> Maybe ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
t2s    -- Data.ByteString.Char8.pack . T.unpack
--t22latin t = if all  ((<256) . ord) (t2s t) then  Just .  s2latin . t2s $ t else Nothing   -- Data.ByteString.Char8.pack . T.unpack

t3latin :: Text ->  ByteString
-- text to bytestring - meaningful, but converted -- lossy!
t3latin :: Text -> ByteString
t3latin   = String -> ByteString
s3latin (String -> ByteString) -> (Text -> String) -> Text -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
t2s  -- Data.ByteString.Char8.pack . T.unpack

putIOwords :: [Text] -> IO ()
putIOwords :: [Text] -> IO ()
putIOwords = String -> IO ()
putStrLn (String -> IO ()) -> ([Text] -> String) -> [Text] -> IO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String
unlines ([String] -> String) -> ([Text] -> [String]) -> [Text] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> String) -> [Text] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map Text -> String

-- chars :: [GHC.Word.Word8]
chars :: [Word8]
chars = [Word8
198, Word8
216, Word8
197, Word8
206, Word8
202, Word8
419, Word8
420, Word8
1937 ]
difficultBString :: ByteString
difficultBString = [Word8] -> ByteString
ByteString.pack [Word8]
difficultTString :: String
difficultTString = String