{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE DeriveGeneric #-} {-| Module : Data.Svfactor.Text.Escape Copyright : (C) CSIRO 2017-2018 License : BSD3 Maintainer : George Wilson Stability : experimental Portability : non-portable Quote characters can be escaped in CSV documents by using two quote characters instead of one. sv's parser will unescape these sequences as it parses them, so it wraps them in the newtype 'Unescaped' Encoding requires you to provide an 'Escaper', which is a function to escape strings on the way out. -} module Data.Svfactor.Text.Escape ( Unescaped (Unescaped, getRawUnescaped) , Escaper , Escaper' , escapeString , escapeText , escapeUtf8 , escapeUtf8Lazy , escapeChar ) where import Control.DeepSeq (NFData) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LBS import qualified Data.ByteString.UTF8 as UTF8 import qualified Data.ByteString.Lazy.UTF8 as UTF8L import Data.Foldable (Foldable) import Data.Functor (Functor) import Data.Monoid (Monoid) import Data.Semigroup (Semigroup ((<>))) import Data.Text (Text) import qualified Data.Text as Text import Data.Traversable (Traversable) import GHC.Generics (Generic) -- | Wrapper for text that is known to be in an unescaped form newtype Unescaped a = Unescaped { getRawUnescaped :: a } deriving (Eq, Ord, Show, Semigroup, Monoid, Functor, Foldable, Traversable, Generic) instance NFData a => NFData (Unescaped a) -- | A function that, given a char, escapes all occurrences of that char. -- -- This version allows the escaping to be type-changing. For example, escaping -- a single char can result in a string with two characters. type Escaper s t = Char -> Unescaped s -> t -- | A function that, given a char, escapes all occurrences of that char. type Escaper' a = Char -> Unescaped a -> a -- | Replaces all occurrences of the given character with two occurrences of that -- character, non-recursively, in the given 'String'. -- -- >>> escapeString ''' "hello 'string'" -- "hello ''string''" -- escapeString :: Escaper' String escapeString c = concatMap (doubleChar c) . getRawUnescaped -- | Replaces all occurrences of the given character with two occurrences of that -- character in the given 'Text' -- -- @ -- {- LANGUAGE OverloadedStrings -} -- -- >>> escapeText ''' "hello 'text'" -- "hello ''text''" -- @ escapeText :: Escaper' Text escapeText c = let ct = Text.singleton c in Text.replace ct (ct <> ct) . getRawUnescaped -- | Replaces all occurrences of the given character with two occurrences of that -- character in the given ByteString, which is assumed to be UTF-8 compatible. -- -- @ -- {- LANGUAGE OverloadedStrings -} -- >>> escapeUtf8 ''' "hello 'bytestring'" -- "hello ''bytestring''" -- @ escapeUtf8 :: Escaper' BS.ByteString escapeUtf8 c = UTF8.fromString . concatMap (doubleChar c) . UTF8.toString . getRawUnescaped -- | Replaces all occurrences of the given character with two occurrences of that -- character in the given lazy ByteString, which is assumed to be UTF-8 compatible. -- -- @ -- {- LANGUAGE OverloadedStrings -} -- -- >>> escapeUtf8Lazy ''' "hello 'lazy bytestring'" -- "hello ''lazy bytestring''" -- @ escapeUtf8Lazy :: Escaper' LBS.ByteString escapeUtf8Lazy c = UTF8L.fromString . concatMap (doubleChar c) . UTF8L.toString . getRawUnescaped -- | Escape a character, which must return a string. -- -- >>> escapeChar ''' ''' -- "''" -- -- >>> escapeChar ''' 'z' -- "z" -- escapeChar :: Escaper Char String escapeChar c = doubleChar c . getRawUnescaped doubleChar :: Char -> Char -> String doubleChar q z = if z == q then [q,q] else [z]