-- | Substitutions.
module Substitutions
( authorSubst
, titleSubst
, replace
, replaceHTMLEntities
, replaceHTMLSymbols
, unicodeSubst
, weirdSubst
) where
import Data.Text ( Text )
import qualified Data.Text as T
------------------------------------------------------------------------------
-- Replacements
type HTMLEntityName = Text
type HTMLEntityDec = Text
type HTMLEntityHex = Text
type HTMLSymbolDec = Text
type HTMLSymbolHex = Text
replaceHTMLEntities ∷ Text → Text
replaceHTMLEntities = replace nameSubst . replace decSubst . replace hexSubst
where
nameSubst ∷ [(HTMLEntityName, Text)]
nameSubst = map (\ (a, _, _, d) → (a, d)) htmlEntitySubst
decSubst ∷ [(HTMLEntityDec, Text)]
decSubst = map (\ (_, b, _, d) → (b, d)) htmlEntitySubst
hexSubst ∷ [(HTMLEntityHex, Text)]
hexSubst = map (\ (_, _, c, d) → (c, d)) htmlEntitySubst
replaceHTMLSymbols ∷ Text → Text
replaceHTMLSymbols = replace decSubst . replace hexSubst
where
decSubst ∷ [(HTMLSymbolDec, Text)]
decSubst = map (\ (a, _, c) → (a, c)) htmlSymbolSubst
hexSubst ∷ [(HTMLSymbolHex, Text)]
hexSubst = map (\ (_, b, c) → (b, c)) htmlSymbolSubst
replace ∷ [(Text,Text)] → Text → Text
replace xs ys = foldl (flip (uncurry T.replace)) ys xs
------------------------------------------------------------------------------
-- HTML entities and symbols substitutions
-- Used by example for the Journal of Functional Programmning.
-- | Substitutions of HTML entities.
htmlEntitySubst ∷ [(HTMLEntityName, HTMLEntityDec, HTMLEntityHex, Text)]
htmlEntitySubst =
[ ("Â", "Â", "Â", "A") -- LATIN CAPITAL LETTER A WITH CIRCUMFLEX
, ("Ä", "Ä", "Ä", "A") -- LATIN CAPITAL LETTER A WITH DIAERESIS
, ("Ç", "Ç", "Ç", "C") -- LATIN CAPITAL LETTER C WITH CEDILLA
, ("Ë", "Ë", "Ë", "E") -- LATIN CAPITAL LETTER E WITH DIAERESIS
, ("Ï", "Ï", "Ï", "I") -- LATIN CAPITAL LETTER I WITH DIAERESIS
, ("Ö", "Ö", "Ö", "O") -- LATIN CAPITAL LETTER U WITH DIAERESIS
, ("Ø", "Ø", "Ø", "O") -- LATIN CAPITAL LETTER O WITH STROKE
, ("Ü", "Ü", "Ü", "U") -- LATIN CAPITAL LETTER U WITH DIAERESIS
, ("á", "á", "á", "a") -- LATIN SMALL LETTER A WITH ACUTE
, ("â", "â", "â", "a") -- LATIN SMALL LETTER A WITH CIRCUMFLEX
, ("ä", "ä", "ä", "a") -- LATIN SMALL LETTER A WITH DIAERESIS
, ("ç", "ç", "ç", "c") -- LATIN SMALL LETTER C WITH CEDILLA
, ("é", "é", "é", "e") -- LATIN SMALL LETTER E WITH ACUTE
, ("ë", "ë", "ë", "e") -- LATIN SMALL LETTER E WITH DIAERESIS
, ("í", "í", "í", "i") -- LATIN SMALL LETTER I WITH ACUTE
, ("ï", "ï", "ï", "i") -- LATIN SMALL LETTER I WITH DIAERESIS
, ("ó", "ó", "ó", "o") -- LATIN SMALL LETTER O WITH ACUTE
, ("ö", "ö", "ö", "o") -- LATIN SMALL LETTER O WITH DIAERESIS
, ("ø", "ø", "ø", "o") -- LATIN SMALL LETTER O WITH STROKE
, ("ú", "ú", "ú", "u") -- LATIN SMALL LETTER U WITH ACUTE
, ("ü", "ü", "ü", "u") -- LATIN SMALL LETTER U WITH DIAERESIS
, ("Ī", "Ī", "Ī", "I") -- LATIN CAPITAL LETTER I WITH MACRON
, ("ī", "ī", "ī", "I") -- LATIN SMALL LETTER I WITH MACRON
, ("Ś", "Ś", "Ś", "S") -- LATIN CAPITAL LETTER S WITH ACUTE
, ("ś", "ś", "ś", "s") -- LATIN SMALL LETTER S WITH ACUTE
, ("Ş", "Ş", "Ş", "s") -- LATIN CAPITAL LETTER S WITH CEDILLA
, ("ş", "ş", "ş", "s") -- LATIN SMALL LETTER S WITH CEDILLA
, ("Š", "Š", "Š", "S") -- LATIN CAPITAL LETTER S WITH CARON
, ("š", "š", "š", "s") -- LATIN SMALL LETTER S WITH CARON
, ("α", "α", "α", "alpha") -- GREEK SMALL LETTER ALPHA
, ("β", "β", "β", "beta") -- GREEK SMALL LETTER BETA
, ("γ", "γ", "γ", "gamma") -- GREEK SMALL LETTER GAMMA
, ("δ", "δ", "δ", "delta") -- GREEK SMALL LETTER DELTA
, ("ε", "ε", "ε", "epsilon") -- GREEK SMALL LETTER EPSILON
, ("ζ", "ζ", "ζ", "zeta") -- GREEK SMALL LETTER ZETA
, ("η", "η", "η", "eta") -- GREEK SMALL LETTER ETA
, ("θ", "θ", "θ", "theta") -- GREEK SMALL LETTER THETA
, ("ι", "ι", "ι", "iota") -- GREEK SMALL LETTER IOTA
, ("κ", "κ", "κ", "kappa") -- GREEK SMALL LETTER KAPPA
, ("λ", "λ", "λ", "lambda") -- GREEK SMALL LETTER LAMDA
, ("μ", "μ", "μ", "mu") -- GREEK SMALL LETTER MU
, ("ν", "ν", "ν", "nu") -- GREEK SMALL LETTER NU
, ("ξ", "ξ", "ξ", "xi") -- GREEK SMALL LETTER ZI
, ("ο", "ο", "ο", "omicron") -- GREEK SMALL LETTER OMICRON
, ("π", "π", "π", "pi") -- GREEK SMALL LETTER PI
, ("ρ", "ρ", "ρ", "rho") -- GREEK SMALL LETTER RHO
, ("ς", "ς", "ς", "sigma") -- GREEK SMALL LETTER FINAL SIGMA
, ("σ", "σ", "σ", "sigma") -- GREEK SMALL LETTER SIGMA
, ("τ", "τ", "τ", "tau") -- GREEK SMALL LETTER TAU
, ("υ", "υ", "υ", "upsilon") -- GREEK SMALL LETTER UPSILON
, ("φ", "φ", "φ", "phi") -- GREEK SMALL LETTER PHI
, ("χ", "χ", "χ", "chi") -- GREEK SMALL LETTER CHI
, ("ψ", "ψ", "ψ", "psi") -- GREEK SMALL LETTER PSI
, ("ω", "ω", "ω", "omega") -- GREEK SMALL LETTER OMEGA
, ("–", "–", "–", "-") -- EN DASH
, ("—", "—", "—", ".") -- EM DAS
, ("‘", "‘", "‘", "") -- LEFT SINGLE QUOTATION MARK
, ("’", "’", "’", "") -- RIGHT SINGLE QUOTATION MARK
, ("‚", "‚", "‚", "") -- SINGLE LOW-9 QUOTATION MAR
, ("“", "“", "“", "") -- LEFT DOUBLE QUOTATION MARK
, ("”", "”", "”", "") -- RIGHT DOUBLE QUOTATION MARK
, ("„", "„", "„", "") -- DOUBLE LOW-9 QUOTATION MARK
, ("†", "†", "†", "dagger") -- DAGGER
, ("‡", "‡", "‡", "dagger-dagger") -- DOUBLE DAGGER
, ("•", "•", "•", "") -- BULLET
, ("…", "…", "…", "") -- HORIZONTAL ELLIPSIS
, ("⊃", "₃", "⊃", "") -- SUPERSET OF
]
-- | Substitutions of HTML symbols.
htmlSymbolSubst ∷ [(HTMLSymbolDec, HTMLSymbolHex, Text)]
htmlSymbolSubst =
[ ("‐", "‐", "-") -- HYPHEN
, ("‛", "‛", "") -- SINGLE HIGH-REVERSED-9 QUOTATION MARK
, ("‟", "‟", "") -- DOUBLE HIGH-REVERSED-9 QUOTATION MARK
, ("⌝", "⌝", "") -- TOP RIGHT CORNER
]
------------------------------------------------------------------------------
-- Unicode substitutions
-- | Unicode substitutions.
unicodeSubst ∷ [(Text,Text)]
unicodeSubst =
[ ("\t", "") -- U+0009 CHARACTER TABULATION
, ("\n", "") -- U+000A LINE FEED (LF)
, ("\f", "") -- U+000C FORM FEED (FF)
, ("\r", "") -- U+000D CARRIAGE RETURN (CR)
, (" ", "-") -- U+0020 SPACE
, ("!", "") -- U+0021 EXCLAMATION MARK
, ("\"", "") -- U+0022 QUOTATION MARK
, ("#", "") -- U+0023 NUMBER SIGN
, ("$", "") -- U+0024 DOLLAR SIGN
, ("%", "") -- U+0025 PERCENT SIGN
, ("&", "") -- U+0026 AMPERSAND
, ("'", "") -- U+0027 APOSTROPHE
, ("(", "") -- U+0028 LEFT PARENTHESIS
, (")", "") -- U+0029 RIGHT PARENTHESIS
, ("*", "") -- U+002A ASTERISK
, ("+", "") -- U+002B PLUS SIGN
, (",", "") -- U+002C COMMA
, (".", "") -- U+002D FULL STOP
, ("/", "") -- U+002F SOLIDUS
, (":", "") -- U+003A COLON
, (";", "") -- U+003B SEMICOLON
, ("<", "") -- U+003C LESS-THAN SIGN
, ("=", "") -- U+003D EQUALS SIGN
, (">", "") -- U+003E GREATER-THAN SIGN
, ("?", "") -- U+003F QUESTION MARK
, ("@", "") -- U+0040 COMMERCIAL AT
, ("[", "") -- U+005B LEFT SQUARE BRACKET
, ("\\", "") -- U+005C REVERSE SOLIDUS
, ("]", "") -- U+005D RIGHT SQUARE BRACKET
, ("^", "") -- U+005E CIRCUMFLEX ACCENT
, ("_", "-") -- U+005F LOW LINE
, ("`", "") -- U+0060 GRAVE ACCENT
, ("{", "") -- U+007B LEFT CURLY BRACKET
, ("|", "") -- U+007C VERTICAL LINE
, ("}", "") -- U+007B RIGHT CURLY BRACKET
, ("~", "") -- U+007E TILDE
, ("¡", "") -- U+00A1 INVERTED EXCLAMATION MARK
, ("¬", "") -- U+00AC NOT SIGN
, ("²", "2") -- U+00B2 SUPERSCRIPT TWO
, ("³", "3") -- U+00B3 SUPERSCRIPT THREE
, ("¹", "1") -- U+00B9 SUPERSCRIPT ONE
, ("À", "A") -- U+00C0 LATIN CAPITAL LETTER A WITH GRAVE
, ("Á", "A") -- U+00C1 LATIN CAPITAL LETTER A WITH ACUTE
, ("Â", "A") -- U+00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
, ("Ã", "A") -- U+00C3 LATIN CAPITAL LETTER A WITH TILDE
, ("Ä", "A") -- U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
, ("Å", "A") -- U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
, ("Æ", "AE") -- U+00C6 LATIN CAPITAL LETTER AE
, ("Ç", "C") -- U+00C7 LATIN CAPITAL LETTER C WITH CEDILLA
, ("È", "E") -- U+00C8 LATIN CAPITAL LETTER E WITH GRAVE
, ("É", "E") -- U+00C9 LATIN CAPITAL LETTER E WITH ACUTE
, ("Ê", "E") -- U+00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX
, ("Ë", "E") -- U+00CB LATIN CAPITAL LETTER E WITH DIAERESIS
, ("Í", "I") -- U+00CD LATIN CAPITAL LETTER I WITH ACUTE
, ("Î", "I") -- U+00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
, ("Ï", "I") -- U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
, ("Ñ", "N") -- U+00D1 LATIN CAPITAL LETTER N WITH TILDE
, ("Ó", "O") -- U+00D3 LATIN CAPITAL LETTER O WITH ACUTE
, ("Ô", "O") -- U+00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
, ("Õ", "O") -- U+00D5 LATIN CAPITAL LETTER O WITH TILDE
, ("Ö", "O") -- U+00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
, ("×", "") -- U+00D7 MULTIPLICATION SIGN
, ("Ø", "O") -- U+00D8 LATIN CAPITAL LETTER O WITH STROKE
, ("Ù", "U") -- U+00D9 LATIN CAPITAL LETTER U WITH GRAVE
, ("Ú", "U") -- U+00DA LATIN CAPITAL LETTER U WITH ACUTE
, ("Û", "U") -- U+00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX
, ("Ü", "U") -- U+00DC LATIN CAPITAL LETTER U WITH DIAERESIS
, ("Ý", "Y") -- U+00DD LATIN CAPITAL LETTER Y WITH ACUTE
, ("ß", "ss") -- U+00DF LATIN SMALL LETTER SHARP S
, ("à", "a") -- U+00E0 LATIN SMALL LETTER A WITH GRAVE
, ("á", "a") -- U+00E1 LATIN SMALL LETTER A WITH ACUTE
, ("â", "a") -- U+00E2 LATIN SMALL LETTER A CIRCUMFLEX
, ("ã", "a") -- U+00E3 LATIN SMALL LETTER A WITH TILDE
, ("ä", "a") -- U+00E4 LATIN SMALL LETTER A WITH DIAERESIS
, ("å", "a") -- U+00E5 LATIN SMALL LETTER A WITH RING ABOVE
, ("æ", "ae") -- U+00E6 LATIN SMALL LETTER AE
, ("ç", "c") -- U+00E7 LATIN SMALL LETTER C WITH CEDILLA
, ("è", "e") -- U+00E8 LATIN SMALL LETTER E WITH GRAVE
, ("é", "e") -- U+00E9 LATIN SMALL LETTER E WITH ACUTE
, ("ê", "e") -- U+00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
, ("ë", "e") -- U+00EB LATIN SMALL LETTER E WITH DIAERESIS
, ("í", "i") -- U+00ED LATIN SMALL LETTER I WITH ACUTE
, ("î", "i") -- U+00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
, ("ï", "i") -- U+00EF LATIN SMALL LETTER I WITH DIAERESIS
, ("ñ", "n") -- U+00F1 LATIN SMALL LETTER N WITH TILDE
, ("ò", "o") -- U+00F2 LATIN SMALL LETTER O WITH GRAVE
, ("ó", "o") -- U+00F3 LATIN SMALL LETTER O WITH ACUTE
, ("ô", "o") -- U+00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
, ("õ", "o") -- U+00F5 LATIN SMALL LETTER O WITH TILDE
, ("ö", "o") -- U+00F6 LATIN SMALL LETTER O WITH DIAERESIS
, ("ø", "o") -- U+00F8 LATIN SMALL LETTER O WITH STROKE
, ("ù", "u") -- U+00F9 LATIN SMALL LETTER U WITH GRAVE
, ("ú", "u") -- U+00FA LATIN SMALL LETTER U WITH ACUTE
, ("û", "u") -- U+00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
, ("ü", "u") -- U+00FC LATIN SMALL LETTER U WITH DIAERESIS
, ("ý", "y") -- U+00FD LATIN SMALL LETTER Y WITH ACUTE
, ("þ", "t") -- U+00FE LATIN SMALL LETTER THORN
, ("ÿ", "y") -- U+00FF LATIN SMALL LETTER Y WITH DIAERESIS
, ("Ā", "A") -- U+0100 LATIN CAPITAL LETTER A WITH MACRON
, ("ā", "a") -- U+0101 LATIN SMALL LETTER A WITH MACRON
, ("Ă", "A") -- U+0102 LATIN CAPITAL LETTER A WITH BREVE
, ("ă", "a") -- U+0103 LATIN SMALL LETTER A WITH
, ("Ą", "A") -- U+0104 LATIN CAPITAL LETTER A WITH OGONEK
, ("ą", "a") -- U+0105 LATIN SMALL LETTER A WITH OGONEK
, ("Ć", "c") -- U+0106 LATIN CAPITAL LETTER C WITH ACUTE
, ("ć", "c") -- U+0107 LATIN SMALL LETTER C WITH ACUTE
, ("Č", "C") -- U+010C LATIN CAPITAL LETTER C WITH CARON
, ("č", "c") -- U+010D LATIN SMALL LETTER C WITH CARON
, ("Ď", "D") -- U+010E LATIN CAPITAL LETTER D WITH CARON
, ("ď", "d") -- U+010F LATIN SMALL LETTER D WITH CARON
, ("Ē", "E") -- U+0112 LATIN CAPITAL LETTER E WITH MACRON
, ("ē", "e") -- U+0113 LATIN SMALL LETTER E WITH MACRON
, ("Ę", "e") -- U+0118 LATIN CAPITAL LETTER E WITH OGONEK
, ("ę", "e") -- U+0119 LATIN SMALL LETTER E WITH OGONEK
, ("Ě", "E") -- U+011A LATIN CAPITAL LETTER E WITH CARON
, ("ě", "e") -- U+011B LATIN SMALL LETTER E WITH CARON
, ("Ğ", "G") -- U+011E LATIN CAPITAL LETTER G WITH BREVE
, ("ğ", "g") -- U+011F LATIN SMALL LETTER G WITH BREVE
, ("Ģ", "G") -- U+0122 LATIN CAPITAL LETTER G WITH CEDILLA
, ("ģ", "g") -- U+0123 LATIN SMALL LETTER G WITH CEDILLA
, ("Ī", "I") -- U+012A LATIN CAPITAL LETTER I WITH MACRON
, ("ī", "I") -- U+012B LATIN SMALL LETTER I WITH MACRON
, ("İ", "I") -- U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
, ("Ķ", "K") -- U+0136 LATIN CAPITAL LETTER K WITH CEDILLA
, ("ķ", "k") -- U+0137 LATIN SMALL LETTER K WITH CEDILLA
, ("Ļ", "L") -- U+013B LATIN CAPITAL LETTER L WITH CEDILLA
, ("ļ", "l") -- U+013C LATIN SMALL LETTER L WITH CEDILLA
, ("Ł", "L") -- U+0141 LATIN CAPITAL LETTER L WITH STROKE
, ("ł", "l") -- U+0142 LATIN SMALL LETTER L WITH STROKE
, ("Ń", "N") -- U+0143 LATIN CAPITAL LETTER N WITH ACUTE
, ("ń", "n") -- U+0144 LATIN SMALL LETTER N WITH ACUTE
, ("Ņ", "n") -- U+0145 LATIN CAPITAL LETTER N WITH CEDILLA
, ("ņ", "n") -- U+0146 LATIN SMALL LETTER N WITH CEDILLA
, ("Ň", "N") -- U+0147 LATIN CAPITAL LETTER N WITH CARON
, ("ň", "n") -- U+0148 LATIN SMALL LETTER N WITH CARON
, ("Ő", "O") -- U+0150 LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
, ("ő", "o") -- U+0151 LATIN SMALL LETTER O WITH DOUBLE ACUTE
, ("Œ", "OE") -- U+0152 LATIN CAPITAL LIGATURE OE
, ("œ", "oe") -- U+0153 LATIN SMALL LIGATURE OE
, ("Ř", "R") -- U+0158 LATIN CAPITAL LETTER R WITH CARON
, ("ř", "r") -- U+0159 LATIN SMALL LETTER R WITH CARON
, ("Ś", "S") -- U+015A LATIN CAPITAL LETTER S WITH ACUTE
, ("ś", "s") -- U+015B LATIN SMALL LETTER S WITH ACUTE
, ("Ş", "S") -- U+015E LATIN CAPITAL LETTER S WITH CEDILLA
, ("ş", "s") -- U+015F LATIN SMALL LETTER S WITH CEDILLA
, ("Š", "S") -- U+0160 LATIN CAPITAL LETTER S WITH CARON
, ("š", "s") -- U+0161 LATIN SMALL LETTER S WITH CARON
, ("Ť", "T") -- U+0164 LATIN CAPITAL LETTER T WITH CARON
, ("ť", "t") -- U+0165 LATIN SMALL LETTER T WITH CARON
, ("Ū", "U") -- U+016A LATIN CAPITAL LETTER U WITH MACRON
, ("ū", "u") -- U+016B LATIN SMALL LETTER U WITH MACRON
, ("Ů", "U") -- U+016E LATIN CAPITAL LETTER U WITH RING ABOVE
, ("ů", "u") -- U+016F LATIN SMALL LETTER U WITH RING ABOVE
, ("Ű", "U") -- U+0170 LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
, ("ű", "u") -- U+0171 LATIN SMALL LETTER U WITH DOUBLE ACUTE
, ("Ÿ", "Y") -- U+0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
, ("Ź", "Z") -- U+0179 LATIN CAPITAL LETTER Z WITH ACUTE
, ("ź", "z") -- U+017A LATIN SMALL LETTER Z WITH ACUTE
, ("Ż", "Z") -- U+017B LATIN CAPITAL LETTER Z WITH DOT ABOVE
, ("ż", "z") -- U+017C LATIN SMALL LETTER Z WITH DOT ABOVE
, ("Ž", "z") -- U+017D LATIN CAPITAL LETTER Z WITH CARON
, ("ž", "z") -- U+017E LATIN SMALL LETTER Z WITH CARON
, ("Ə", "A") -- U+018F LATIN CAPITAL LETTER SCHWA
, ("Ș", "S") -- U+0218 LATIN CAPITAL LETTER S WITH COMMA BELOW
, ("ș", "s") -- U+0219 LATIN SMALL LETTER S WITH COMMA BELOW
, ("Ț", "T") -- U+021A LATIN CAPITAL LETTER T WITH COMMA BELOW
, ("ț", "t") -- U+021B LATIN SMALL LETTER T WITH COMMA BELOW
, ("ə", "a") -- U+0259 LATIN SMALL LETTER SCHWA
, ("Ω", "Omega") -- U+03A9 GREEK CAPITAL LETTER OMEGA
, ("α", "alpha") -- U+03B1 GREEK SMALL LETTER ALPHA
, ("β", "beta") -- U+03B2 GREEK SMALL LETTER BETA
, ("γ", "gamma") -- U+03B3 GREEK SMALL LETTER GAMMA
, ("δ", "delta") -- U+03B4 GREEK SMALL LETTER DELTA
, ("ε", "epsilon") -- U+03B5 GREEK SMALL LETTER EPSILON
, ("ζ", "zeta") -- U+03B6 GREEK SMALL LETTER ZETA
, ("η", "eta") -- U+03B7 GREEK SMALL LETTER ETA
, ("θ", "theta") -- U+03B8 GREEK SMALL LETTER THETA
, ("ι", "iota") -- U+03B9 GREEK SMALL LETTER IOTA
, ("κ", "kappa") -- U+03BA GREEK SMALL LETTER KAPPA
, ("λ", "lambda") -- U+03BB GREEK SMALL LETTER LAMDA
, ("μ", "mu") -- U+03BC GREEK SMALL LETTER MU
, ("ν", "nu") -- U+03BD GREEK SMALL LETTER NU
, ("ξ", "xi") -- U+03BE GREEK SMALL LETTER ZI
, ("ο", "omicron") -- U+03BF GREEK SMALL LETTER OMICRON
, ("π", "pi") -- U+03C0 GREEK SMALL LETTER PI
, ("ρ", "rho") -- U+03C1 GREEK SMALL LETTER RHO
, ("ς", "sigma") -- U+03C2 GREEK SMALL LETTER FINAL SIGMA
, ("σ", "sigma") -- U+03C3 GREEK SMALL LETTER SIGMA
, ("τ", "tau") -- U+03C4 GREEK SMALL LETTER TAU
, ("υ", "upsilon") -- U+03C5 GREEK SMALL LETTER UPSILON
, ("φ", "phi") -- U+03C6 GREEK SMALL LETTER PHI
, ("χ", "chi") -- U+03C7 GREEK SMALL LETTER CHI
, ("ψ", "psi") -- U+03C8 GREEK SMALL LETTER PSI
, ("ω", "omega") -- U+03C9 GREEK SMALL LETTER OMEGA
, ("ẞ", "SS") -- U+1E9E LATIN CAPITAL LETTER SHARP S
, ("‐", "-") -- U+2210 HYPHEN
, ("–", "-") -- U+2013 EN DASH
, ("—", "-") -- U+2014 EM DASH
, ("‘", "") -- U+2018 LEFT SINGLE QUOTATION MARK
, ("’", "") -- U+2019 RIGHT SINGLE QUOTATION MARK
, ("‚", "") -- U+201A SINGLE LOW-9 QUOTATION MARK
, ("‛", "") -- U+201B SINGLE HIGH-REVERSED-9 QUOTATION MARK
, ("“", "") -- U+201C LEFT DOUBLE QUOTATION MARK
, ("”", "") -- U+201D RIGHT DOUBLE QUOTATION MARK
, ("„", "") -- U+201E DOUBLE LOW-9 QUOTATION MARK
, ("‟", "") -- U+201F DOUBLE HIGH-REVERSED-9 QUOTATION MARK
, ("†", "") -- U+2020 DAGGER
, ("‡", "") -- U+2021 DOUBLE DAGGER
, ("•", "") -- U+2022 BULLET
, ("…", "") -- U+2026 HORIZONTAL ELLIPSIS
, ("™", "") -- U+2122 TRADE MARK SIGN
, ("ℵ", "aleph") -- U+2135 ALEF SYMBOL
, ("ℶ", "beth") -- U+2136 BET SYMBOL
, ("⊃", "") -- U+2283 SUPERSET OF
, ("⌝", "") -- U+231D TOP RIGHT CORNER
, ("�", "") -- U+FFFD REPLACEMENT CHARACTER
]
------------------------------------------------------------------------------
-- Author and title substitutions
-- | Author substitutions.
authorSubst ∷ [(Text, Text)]
authorSubst =
[ (", ", ",")
, (" and", ",")
]
-- | Title substitutions.
-- These substitutions should be done before the substitutions of the
-- HTML entities and converting to lower case.
titleSubst ∷ [(Text,Text)]
titleSubst =
[ ("P ", "P")
, ("0 ", "0")
, ("C", "C")
, ("CC", "CC")
, ("I ", "I")
, ("J", "J")
, ("Modus ponens", "Modus ponens")
, ("P ", "P")
, ("really ", "really")
, ("S-P", "S-P")
, ("3", "3")
-- The whitespace around `+` is not the standard one.
-- TODO (2017-07-04): Added test case.
, (" + ", "plus")
, ("ω", "omega")
, ("$\\alpha$", "alpha")
, ("$\\beta$", "beta")
, ("$\\gamma$", "gamma")
, ("$\\epsilon$", "epsilon")
, ("$\\eta$", "eta")
, ("$\\lambda$", "lambda")
, ("$\\pi$", "pi")
, ("$\\omega$", "omega")
, ("{\\sc Coq}", "Coq")
, ("{\\sf Haskell}:", "Haskell")
, ("{\\sc QuickSpec}:", "QuickSpec")
, ("{\\sc QuodLibet}!", "QuodLibet")
, ("{\\sc Vampire}", "Vampire")
]
------------------------------------------------------------------------------
-- Weird substitutions.
-- These substitutions should be done before the substitutions of the
-- HTML entities and symbols and converting to lower case.
-- | Weird author and title substitutions.
weirdSubst ∷ [(Text, Text)]
weirdSubst =
[ ("á", "a") -- U+00C3 and U+00A1 (LATIN SMALL LETTER A WITH GRAVE)
, ("é", "e") -- U+00C3 and U+00A9 (LATIN SMALL LETTER E WITH ACUTE)
, ("Ã\x00AD", "i") -- U+00C3 and U+00AD (LATIN SMALL LETTER I GRAVE)
-- We erase `ö` because it follows an `o` in the examples we know.
, ("ö", "") -- U+00C3 and U+00B6 (LATIN SMALL LETTER O WITH DIAERESIS)
, ("Å›", "s") -- U+00C5 and U+203A (LATIN CAPITAL LETTER S WITH ACUTE)
-- TODO (2017-07-20): Five hex numbers
, ("‐", "-") -- U+2010 HYPHEN
-- TODO (2017-07-17): Missing `;`.
, ("’", "") -- U+2019 RIGHT SINGLE QUOTATION MARK
]