-- | Substituions.
module Substituions
( authorSubst
, chSubst
, titleSubst
) where
import Data.Text ( Text )
------------------------------------------------------------------------------
-- Characters substituions
-- If a new entry is added here, please also add it to the
-- characters-decimal substituions.
-- | Characters in non-numeric notation.
chNNSubst ∷ [(Text,Text)]
chNNSubst =
[ ("ö", "o") -- LATIN SMALL LETTER O WITH DIAERESIS (ö)
, ("Ä", "A") -- LATIN CAPITAL LETTER A WITH DIAERESIS (Ä)
, ("Ü", "U") -- LATIN CAPITAL LETTER U WITH DIAERESIS (Ü)
]
-- If a new entry is added here, please also add it to the
-- characters-hexadecimal substituions.
-- | Characters substituions in decimal notation.
chDecSubst ∷ [(Text,Text)]
chDecSubst =
[ ("á", "a") -- LATIN SMALL LETTER A WITH ACUTE
, ("é", "e") -- LATIN SMALL LETTER E WITH ACUTE
, ("í", "i") -- LATIN SMALL LETTER I WITH ACUTE
, ("ó", "o") -- LATIN SMALL LETTER O WITH ACUTE
, ("ö", "o") -- LATIN SMALL LETTER O WITH DIAERESIS (ö)
, ("ú", "u") -- LATIN SMALL LETTER U WITH ACUTE
, ("Š", "S") -- LATIN CAPITAL LETTER S WITH CARON
, ("š", "s") -- LATIN SMALL LETTER S WITH CARON
, ("λ", "lambda") -- GREEK SMALL LETTER LAMDA
, ("‘", "") -- LEFT SINGLE QUOTATION MARK
, ("’", "") -- RIGHT SINGLE QUOTATION MARK
]
-- | Characters substituions in hexadecimal notation.
chHexSubst ∷ [(Text,Text)]
chHexSubst =
[ ("Ä", "A") -- LATIN CAPITAL LETTER A WITH DIAERESIS (Ä)
, ("Ü", "U") -- LATIN CAPITAL LETTER U WITH DIAERESIS (Ü)
, ("á", "a") -- LATIN SMALL LETTER A WITH ACUTE
, ("é", "e") -- LATIN SMALL LETTER E WITH ACUTE
, ("í", "i") -- LATIN SMALL LETTER I WITH ACUTE
, ("ó", "o") -- LATIN SMALL LETTER O WITH ACUTE
, ("ú", "u") -- LATIN SMALL LETTER U WITH ACUTE
, ("ö", "o") -- LATIN SMALL LETTER O WITH DIAERESIS (ö)
, ("ü", "u") -- LATIN SMALL LETTER U WITH DIAERESIS (ü)
, ("Š", "S") -- LATIN CAPITAL LETTER S WITH CARON
, ("š", "s") -- LATIN SMALL LETTER S WITH CARON
, ("Ī", "I") -- LATIN CAPITAL LETTER I WITH MACRON
, ("ī", "I") -- LATIN SMALL LETTER I WITH MACRON
, ("Ś", "S") -- LATIN CAPITAL LETTER S WITH ACUTE
, ("ś", "s") -- LATIN SMALL LETTER S WITH ACUTE
, ("Ş", "s") -- LATIN CAPITAL LETTER S WITH CEDILLA
, ("ş", "s") -- LATIN SMALL LETTER S WITH CEDILLA
, ("Ī", "I") -- LATIN CAPITAL LETTER I WITH MACRON
, ("ī", "i") -- LATIN SMALL LETTER I WITH MACRON
, ("α", "alpha") -- GREEK SMALL LETTER ALPHA
, ("β", "beta") -- GREEK SMALL LETTER BETA
, ("γ", "gamma") -- GREEK SMALL LETTER GAMMA
, ("δ", "delta") -- GREEK SMALL LETTER DELTA
, ("ε", "epsilon") -- GREEK SMALL LETTER EPSILON
, ("ζ", "zeta") -- GREEK SMALL LETTER ZETA
, ("η", "eta") -- GREEK SMALL LETTER ETA
, ("θ", "theta") -- GREEK SMALL LETTER THETA
, ("ι", "iota") -- GREEK SMALL LETTER IOTA
, ("κ", "kappa") -- GREEK SMALL LETTER KAPPA
, ("λ", "lambda") -- GREEK SMALL LETTER LAMDA
, ("μ", "mu") -- GREEK SMALL LETTER MU
, ("ν", "nu") -- GREEK SMALL LETTER NU
, ("ξ", "xi") -- GREEK SMALL LETTER XI
, ("ο", "omicron") -- GREEK SMALL LETTER OMICRON
, ("π", "pi") -- GREEK SMALL LETTER PI
, ("ρ", "rho") -- GREEK SMALL LETTER RHO
, ("ς", "sigma") -- GREEK SMALL LETTER FINAL SIGMA
, ("σ", "sigma") -- GREEK SMALL LETTER SIGMA
, ("τ", "tau") -- GREEK SMALL LETTER TAU
, ("υ", "upsilon") -- GREEK SMALL LETTER UPSILON
, ("φ", "phi") -- GREEK SMALL LETTER PHI
, ("χ", "chi") -- GREEK SMALL LETTER CHI
, ("ψ", "psi") -- GREEK SMALL LETTER PSI
, ("ω", "omega") -- GREEK SMALL LETTER OMEGA
, ("‐", "-") -- HYPHEN
, ("–", "-") -- EN DASH
, ("—", ".") -- EM DAS
, ("‘", "") -- LEFT SINGLE QUOTATION MARK
, ("’", "") -- RIGHT SINGLE QUOTATION MARK
, ("‚", "") -- SINGLE LOW-9 QUOTATION MAR
, ("“", "") -- LEFT DOUBLE QUOTATION MARK
, ("”", "") -- RIGHT DOUBLE QUOTATION MARK
, ("„", "") -- DOUBLE LOW-9 QUOTATION MARK
, ("†", "dagger") -- DAGGER
, ("‡", "dagger-dagger") -- DOUBLE DAGGER
, ("•", "") -- BULLET
, ("…", "") -- HORIZONTAL ELLIPSIS
, ("⊃", "") -- SUPERSET OF
, ("⌝", "") -- TOP RIGHT CORNER
, ("‐", "-") -- HYPHEN
]
-- | Characters substituions in Unicode notation.
chUnicodeSubst ∷ [(Text,Text)]
chUnicodeSubst =
[ ("\r", "") -- U+000D CARRIAGE RETURN (CR)
, (" ", "-") -- U+0020 SPACE
, ("!", "") -- U+0021 EXCLAMATION MARK
, ("\"", "") -- U+0022 QUOTATION MARK
, ("#", "") -- U+0023 NUMBER SIGN
, ("$", "") -- U+0024 DOLLAR SIGN
, ("&", "") -- U+0026 AMPERSAND
, ("'", "") -- U+0027 APOSTROPHE
, ("(", "") -- U+0028 LEFT PARENTHESIS
, (")", "") -- U+0029 RIGHT PARENTHESIS
, ("*", "") -- U+002A ASTERISK
, ("+", "") -- U+002B PLUS SIGN
, (",", "") -- U+002C COMMA
, ("/", "-") -- U+002F SOLIDUS
, ("²", "2") -- U+00B2 SUPERSCRIPT TWO
, ("³", "3") -- U+00B3 SUPERSCRIPT THREE
, ("¹", "1") -- U+00B9 SUPERSCRIPT ONE
, (":", ".") -- U+003A COLON
, (";", ".") -- U+003B SEMICOLON
, ("<", "") -- U+003C LESS-THAN SIGN
, ("=", "") -- U+003D EQUALS SIGN
, (">", "") -- U+003E GREATER-THAN SIGN
, ("?", "") -- U+003F QUESTION MARK
, ("@", "") -- U+0040 COMMERCIAL AT
, ("[", "") -- U+005B LEFT SQUARE BRACKET
, ("\\", "") -- U+005C REVERSE SOLIDUS
, ("]", "") -- U+005D RIGHT SQUARE BRACKET
, ("_", "-") -- U+005F LOW LINE
, ("`", "") -- U+0060 GRAVE ACCENT
, ("|", "") -- U+007C VERTICAL LINE
, ("¡", "") -- U+00A1 INVERTED EXCLAMATION MARK
, ("¬", "") -- U+00AC NOT SIGN
, ("À", "A") -- U+00C0 LATIN CAPITAL LETTER A WITH GRAVE
, ("Á", "A") -- U+00C1 LATIN CAPITAL LETTER A WITH ACUTE
, ("Ã", "A") -- U+00C3 LATIN CAPITAL LETTER A WITH TILDE
, ("Æ", "E") -- U+00C6 LATIN CAPITAL LETTER AE
, ("É", "E") -- U+00C9 LATIN CAPITAL LETTER E WITH ACUTE
, ("Í", "I") -- U+00CD LATIN CAPITAL LETTER I WITH ACUTE
, ("Ñ", "N") -- U+00D1 LATIN CAPITAL LETTER N WITH TILDE
, ("Ó", "O") -- U+00D3 LATIN CAPITAL LETTER O WITH ACUTE
, ("Ú", "U") -- U+00DA LATIN CAPITAL LETTER U WITH ACUTE
, ("Ö", "O") -- U+00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
, ("×", "") -- U+00D7 MULTIPLICATION SIGN
, ("à", "a") -- U+00E0 LATIN SMALL LETTER A WITH GRAVE
, ("á", "a") -- U+00E1 LATIN SMALL LETTER A WITH ACUTE
, ("â", "a") -- U+00E2 LATIN SMALL LETTER A CIRCUMFLEX
, ("ã", "a") -- U+00E3 LATIN SMALL LETTER A WITH TILDE
, ("ä", "a") -- U+00E4 LATIN SMALL LETTER A WITH DIAERESIS
, ("æ", "e") -- U+00E6 LATIN SMALL LETTER AE
, ("ç", "c") -- U+00E7 LATIN SMALL LETTER C WITH CEDILLA
, ("é", "e") -- U+00E9 LATIN SMALL LETTER E WITH ACUTE
, ("í", "i") -- U+00ED LATIN SMALL LETTER I WITH ACUTE
, ("ñ", "n") -- U+00F1 LATIN SMALL LETTER N WITH TILDE
, ("ò", "o") -- U+00F2 LATIN SMALL LETTER O WITH GRAVE
, ("ó", "o") -- U+00F3 LATIN SMALL LETTER O WITH ACUTE
, ("ö", "o") -- U+00F6 LATIN SMALL LETTER O WITH DIAERESIS
, ("ø", "o") -- U+00F8 LATIN SMALL LETTER O WITH STROKE
, ("ú", "u") -- U+00FA LATIN SMALL LETTER U WITH ACUTE
, ("ü", "u") -- U+00FC LATIN SMALL LETTER U WITH DIAERESIS
, ("þ", "t") -- U+00FE LATIN SMALL LETTER THORN
, ("ÿ", "y") -- U+00FF LATIN SMALL LETTER Y WITH DIAERESIS
, ("ć", "c") -- U+0107 LATIN SMALL LETTER C WITH ACUTE
, ("č", "c") -- U+010D LATIN SMALL LETTER C WITH CARON
, ("Ł", "L") -- U+0141 LATIN CAPITAL LETTER L WITH STROKE
, ("ņ", "n") -- U+0146 LATIN SMALL LETTER N WITH CEDILLA
, ("ř", "r") -- U+0159 LATIN SMALL LETTER R WITH CARON
, ("š", "s") -- U+0161 LATIN SMALL LETTER S WITH CARON
, ("ū", "u") -- U+016B LATIN SMALL LETTER U WITH MACRON
, ("Ÿ", "Y") -- U+0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
, ("Ω", "Omega") -- U+03A9 GREEK CAPITAL LETTER OMEGA
, ("α", "alpha") -- U+03B1 GREEK SMALL LETTER ALPHA
, ("β", "beta") -- U+03B2 GREEK SMALL LETTER BETA
, ("γ", "gamma") -- U+03B3 GREEK SMALL LETTER GAMMA
, ("δ", "delta") -- U+03B4 GREEK SMALL LETTER DELTA
, ("ε", "epsilon") -- U+03B5 GREEK SMALL LETTER EPSILON
, ("ζ", "zeta") -- U+03B6 GREEK SMALL LETTER ZETA
, ("η", "eta") -- U+03B7 GREEK SMALL LETTER ETA
, ("θ", "theta") -- U+03B8 GREEK SMALL LETTER THETA
, ("ι", "iota") -- U+03B9 GREEK SMALL LETTER IOTA
, ("κ", "kappa") -- U+03BA GREEK SMALL LETTER KAPPA
, ("λ", "lambda") -- U+03BB GREEK SMALL LETTER LAMDA
, ("μ", "mu") -- U+03BC GREEK SMALL LETTER MU
, ("ν", "nu") -- U+03BD GREEK SMALL LETTER NU
, ("ξ", "xi") -- U+03BE GREEK SMALL LETTER ZI
, ("ο", "omicron") -- U+03BF GREEK SMALL LETTER OMICRON
, ("π", "pi") -- U+03C0 GREEK SMALL LETTER PI
, ("ρ", "rho") -- U+03C1 GREEK SMALL LETTER RHO
, ("σ", "sigma") -- U+03C3 GREEK SMALL LETTER SIGMA
, ("ς", "sigma") -- U+03C2 GREEK SMALL LETTER FINAL SIGMA
, ("τ", "tau") -- U+03C4 GREEK SMALL LETTER TAU
, ("υ", "upsilon") -- U+03C5 GREEK SMALL LETTER UPSILON
, ("φ", "phi") -- U+03C6 GREEK SMALL LETTER PHI
, ("χ", "chi") -- U+03C7 GREEK SMALL LETTER CHI
, ("ψ", "psi") -- U+03C8 GREEK SMALL LETTER PSI
, ("ω", "omega") -- U+03C9 GREEK SMALL LETTER OMEGA
, ("–", "-") -- U+2013 EN DASH
, ("—", "-") -- U+2014 EM DASH
, ("‘", "") -- U+2018 LEFT SINGLE QUOTATION MARK
, ("’", "") -- U+2019 RIGHT SINGLE QUOTATION MARK
, ("‡", "") -- U+2021 DOUBLE DAGGER
, ("™", "") -- U+2122 TRADE MARK SIGN
, ("�", "") -- U+FFFD REPLACEMENT CHARACTER
]
-- | All the characters substituions.
-- NB that the substituions are not commutative.
chSubst ∷ [(Text, Text)]
chSubst = chHexSubst ++ chNNSubst ++ chDecSubst ++ chUnicodeSubst
------------------------------------------------------------------------------
-- Author substituions
authorSubst ∷ [(Text, Text)]
authorSubst =
[ (", ", ",")
, (" and", ",")
-- See Issue #1.
, ("Ã\x00AD", "i") -- U+00C3 and U+00AD
-- See Issue #1.
, ("á", "a") -- U+00C3 and U+00A1
, ("Mcbride", "McBride")
]
------------------------------------------------------------------------------
-- Title substituions
-- These substituions should be done before converting to lower case.
titleSubst ∷ [(Text,Text)]
titleSubst =
[ ("P ", "P")
, ("0 ", "0")
, ("C", "C")
, ("CC", "CC")
, ("I ", "I")
, ("J", "J")
, ("Modus ponens", "Modus ponens")
, ("P ", "P")
, ("really ", "really")
, ("S-P", "S-P")
, ("3", "3")
-- The whitespace around `+` is not the standard one.
-- TODO (2017-07-04): Added test case.
, (" + ", "plus")
, ("ω", "omega")
, ("$\\alpha$", "alpha")
, ("$\\beta$", "beta")
, ("$\\gamma$", "gamma")
, ("$\\epsilon$", "epsilon")
, ("$\\eta$", "eta")
-- See Issue #2.
, ("$\\lambda$", "lambda")
, ("$\\pi$", "pi")
, ("$\\omega$", "omega")
, ("{\\sc Coq}", "Coq")
, ("{\\sf Haskell}:", "Haskell")
, ("{\\sc QuickSpec}:", "QuickSpec")
, ("{\\sc QuodLibet}!", "QuodLibet")
, ("{\\sc Vampire}", "Vampire")
, ("Å›", "s") -- U+00C5 and U+203A
, ("ö", "") -- U+00C3 and U+00B6
]