-- | Substituions. module Substituions ( authorSubst , chSubst , titleSubst ) where import Data.Text ( Text ) ------------------------------------------------------------------------------ -- Characters substituions -- If a new entry is added here, please also add it to the -- characters-decimal substituions. -- | Characters in non-numeric notation. chNNSubst ∷ [(Text,Text)] chNNSubst = [ ("ö", "o") -- LATIN SMALL LETTER O WITH DIAERESIS (ö) , ("Ä", "A") -- LATIN CAPITAL LETTER A WITH DIAERESIS (Ä) , ("Ü", "U") -- LATIN CAPITAL LETTER U WITH DIAERESIS (Ü) ] -- If a new entry is added here, please also add it to the -- characters-hexadecimal substituions. -- | Characters substituions in decimal notation. chDecSubst ∷ [(Text,Text)] chDecSubst = [ ("á", "a") -- LATIN SMALL LETTER A WITH ACUTE , ("é", "e") -- LATIN SMALL LETTER E WITH ACUTE , ("í", "i") -- LATIN SMALL LETTER I WITH ACUTE , ("ó", "o") -- LATIN SMALL LETTER O WITH ACUTE , ("ö", "o") -- LATIN SMALL LETTER O WITH DIAERESIS (ö) , ("ú", "u") -- LATIN SMALL LETTER U WITH ACUTE , ("Š", "S") -- LATIN CAPITAL LETTER S WITH CARON , ("š", "s") -- LATIN SMALL LETTER S WITH CARON , ("λ", "lambda") -- GREEK SMALL LETTER LAMDA , ("‘", "") -- LEFT SINGLE QUOTATION MARK , ("’", "") -- RIGHT SINGLE QUOTATION MARK ] -- | Characters substituions in hexadecimal notation. chHexSubst ∷ [(Text,Text)] chHexSubst = [ ("Ä", "A") -- LATIN CAPITAL LETTER A WITH DIAERESIS (Ä) , ("Ü", "U") -- LATIN CAPITAL LETTER U WITH DIAERESIS (Ü) , ("á", "a") -- LATIN SMALL LETTER A WITH ACUTE , ("é", "e") -- LATIN SMALL LETTER E WITH ACUTE , ("í", "i") -- LATIN SMALL LETTER I WITH ACUTE , ("ó", "o") -- LATIN SMALL LETTER O WITH ACUTE , ("ú", "u") -- LATIN SMALL LETTER U WITH ACUTE , ("ö", "o") -- LATIN SMALL LETTER O WITH DIAERESIS (ö) , ("ü", "u") -- LATIN SMALL LETTER U WITH DIAERESIS (ü) , ("Š", "S") -- LATIN CAPITAL LETTER S WITH CARON , ("š", "s") -- LATIN SMALL LETTER S WITH CARON , ("Ī", "I") -- LATIN CAPITAL LETTER I WITH MACRON , ("ī", "I") -- LATIN SMALL LETTER I WITH MACRON , ("Ś", "S") -- LATIN CAPITAL LETTER S WITH ACUTE , ("ś", "s") -- LATIN SMALL LETTER S WITH ACUTE , ("Ş", "s") -- LATIN CAPITAL LETTER S WITH CEDILLA , ("ş", "s") -- LATIN SMALL LETTER S WITH CEDILLA , ("Ī", "I") -- LATIN CAPITAL LETTER I WITH MACRON , ("ī", "i") -- LATIN SMALL LETTER I WITH MACRON , ("α", "alpha") -- GREEK SMALL LETTER ALPHA , ("β", "beta") -- GREEK SMALL LETTER BETA , ("γ", "gamma") -- GREEK SMALL LETTER GAMMA , ("δ", "delta") -- GREEK SMALL LETTER DELTA , ("ε", "epsilon") -- GREEK SMALL LETTER EPSILON , ("ζ", "zeta") -- GREEK SMALL LETTER ZETA , ("η", "eta") -- GREEK SMALL LETTER ETA , ("θ", "theta") -- GREEK SMALL LETTER THETA , ("ι", "iota") -- GREEK SMALL LETTER IOTA , ("κ", "kappa") -- GREEK SMALL LETTER KAPPA , ("λ", "lambda") -- GREEK SMALL LETTER LAMDA , ("μ", "mu") -- GREEK SMALL LETTER MU , ("ν", "nu") -- GREEK SMALL LETTER NU , ("ξ", "xi") -- GREEK SMALL LETTER XI , ("ο", "omicron") -- GREEK SMALL LETTER OMICRON , ("π", "pi") -- GREEK SMALL LETTER PI , ("ρ", "rho") -- GREEK SMALL LETTER RHO , ("ς", "sigma") -- GREEK SMALL LETTER FINAL SIGMA , ("σ", "sigma") -- GREEK SMALL LETTER SIGMA , ("τ", "tau") -- GREEK SMALL LETTER TAU , ("υ", "upsilon") -- GREEK SMALL LETTER UPSILON , ("φ", "phi") -- GREEK SMALL LETTER PHI , ("χ", "chi") -- GREEK SMALL LETTER CHI , ("ψ", "psi") -- GREEK SMALL LETTER PSI , ("ω", "omega") -- GREEK SMALL LETTER OMEGA , ("‐", "-") -- HYPHEN , ("–", "-") -- EN DASH , ("—", ".") -- EM DAS , ("‘", "") -- LEFT SINGLE QUOTATION MARK , ("’", "") -- RIGHT SINGLE QUOTATION MARK , ("‚", "") -- SINGLE LOW-9 QUOTATION MAR , ("“", "") -- LEFT DOUBLE QUOTATION MARK , ("”", "") -- RIGHT DOUBLE QUOTATION MARK , ("„", "") -- DOUBLE LOW-9 QUOTATION MARK , ("†", "dagger") -- DAGGER , ("‡", "dagger-dagger") -- DOUBLE DAGGER , ("•", "") -- BULLET , ("…", "") -- HORIZONTAL ELLIPSIS , ("⊃", "") -- SUPERSET OF , ("⌝", "") -- TOP RIGHT CORNER , ("‐", "-") -- HYPHEN ] -- | Characters substituions in Unicode notation. chUnicodeSubst ∷ [(Text,Text)] chUnicodeSubst = [ ("\r", "") -- U+000D CARRIAGE RETURN (CR) , (" ", "-") -- U+0020 SPACE , ("!", "") -- U+0021 EXCLAMATION MARK , ("\"", "") -- U+0022 QUOTATION MARK , ("#", "") -- U+0023 NUMBER SIGN , ("$", "") -- U+0024 DOLLAR SIGN , ("&", "") -- U+0026 AMPERSAND , ("'", "") -- U+0027 APOSTROPHE , ("(", "") -- U+0028 LEFT PARENTHESIS , (")", "") -- U+0029 RIGHT PARENTHESIS , ("*", "") -- U+002A ASTERISK , ("+", "") -- U+002B PLUS SIGN , (",", "") -- U+002C COMMA , ("/", "-") -- U+002F SOLIDUS , ("²", "2") -- U+00B2 SUPERSCRIPT TWO , ("³", "3") -- U+00B3 SUPERSCRIPT THREE , ("¹", "1") -- U+00B9 SUPERSCRIPT ONE , (":", ".") -- U+003A COLON , (";", ".") -- U+003B SEMICOLON , ("<", "") -- U+003C LESS-THAN SIGN , ("=", "") -- U+003D EQUALS SIGN , (">", "") -- U+003E GREATER-THAN SIGN , ("?", "") -- U+003F QUESTION MARK , ("@", "") -- U+0040 COMMERCIAL AT , ("[", "") -- U+005B LEFT SQUARE BRACKET , ("\\", "") -- U+005C REVERSE SOLIDUS , ("]", "") -- U+005D RIGHT SQUARE BRACKET , ("_", "-") -- U+005F LOW LINE , ("`", "") -- U+0060 GRAVE ACCENT , ("|", "") -- U+007C VERTICAL LINE , ("¡", "") -- U+00A1 INVERTED EXCLAMATION MARK , ("¬", "") -- U+00AC NOT SIGN , ("À", "A") -- U+00C0 LATIN CAPITAL LETTER A WITH GRAVE , ("Á", "A") -- U+00C1 LATIN CAPITAL LETTER A WITH ACUTE , ("Ã", "A") -- U+00C3 LATIN CAPITAL LETTER A WITH TILDE , ("Æ", "E") -- U+00C6 LATIN CAPITAL LETTER AE , ("É", "E") -- U+00C9 LATIN CAPITAL LETTER E WITH ACUTE , ("Í", "I") -- U+00CD LATIN CAPITAL LETTER I WITH ACUTE , ("Ñ", "N") -- U+00D1 LATIN CAPITAL LETTER N WITH TILDE , ("Ó", "O") -- U+00D3 LATIN CAPITAL LETTER O WITH ACUTE , ("Ú", "U") -- U+00DA LATIN CAPITAL LETTER U WITH ACUTE , ("Ö", "O") -- U+00D6 LATIN CAPITAL LETTER O WITH DIAERESIS , ("×", "") -- U+00D7 MULTIPLICATION SIGN , ("à", "a") -- U+00E0 LATIN SMALL LETTER A WITH GRAVE , ("á", "a") -- U+00E1 LATIN SMALL LETTER A WITH ACUTE , ("â", "a") -- U+00E2 LATIN SMALL LETTER A CIRCUMFLEX , ("ã", "a") -- U+00E3 LATIN SMALL LETTER A WITH TILDE , ("ä", "a") -- U+00E4 LATIN SMALL LETTER A WITH DIAERESIS , ("æ", "e") -- U+00E6 LATIN SMALL LETTER AE , ("ç", "c") -- U+00E7 LATIN SMALL LETTER C WITH CEDILLA , ("é", "e") -- U+00E9 LATIN SMALL LETTER E WITH ACUTE , ("í", "i") -- U+00ED LATIN SMALL LETTER I WITH ACUTE , ("ñ", "n") -- U+00F1 LATIN SMALL LETTER N WITH TILDE , ("ò", "o") -- U+00F2 LATIN SMALL LETTER O WITH GRAVE , ("ó", "o") -- U+00F3 LATIN SMALL LETTER O WITH ACUTE , ("ö", "o") -- U+00F6 LATIN SMALL LETTER O WITH DIAERESIS , ("ø", "o") -- U+00F8 LATIN SMALL LETTER O WITH STROKE , ("ú", "u") -- U+00FA LATIN SMALL LETTER U WITH ACUTE , ("ü", "u") -- U+00FC LATIN SMALL LETTER U WITH DIAERESIS , ("þ", "t") -- U+00FE LATIN SMALL LETTER THORN , ("ÿ", "y") -- U+00FF LATIN SMALL LETTER Y WITH DIAERESIS , ("ć", "c") -- U+0107 LATIN SMALL LETTER C WITH ACUTE , ("č", "c") -- U+010D LATIN SMALL LETTER C WITH CARON , ("Ł", "L") -- U+0141 LATIN CAPITAL LETTER L WITH STROKE , ("ņ", "n") -- U+0146 LATIN SMALL LETTER N WITH CEDILLA , ("ř", "r") -- U+0159 LATIN SMALL LETTER R WITH CARON , ("š", "s") -- U+0161 LATIN SMALL LETTER S WITH CARON , ("ū", "u") -- U+016B LATIN SMALL LETTER U WITH MACRON , ("Ÿ", "Y") -- U+0178 LATIN CAPITAL LETTER Y WITH DIAERESIS , ("Ω", "Omega") -- U+03A9 GREEK CAPITAL LETTER OMEGA , ("α", "alpha") -- U+03B1 GREEK SMALL LETTER ALPHA , ("β", "beta") -- U+03B2 GREEK SMALL LETTER BETA , ("γ", "gamma") -- U+03B3 GREEK SMALL LETTER GAMMA , ("δ", "delta") -- U+03B4 GREEK SMALL LETTER DELTA , ("ε", "epsilon") -- U+03B5 GREEK SMALL LETTER EPSILON , ("ζ", "zeta") -- U+03B6 GREEK SMALL LETTER ZETA , ("η", "eta") -- U+03B7 GREEK SMALL LETTER ETA , ("θ", "theta") -- U+03B8 GREEK SMALL LETTER THETA , ("ι", "iota") -- U+03B9 GREEK SMALL LETTER IOTA , ("κ", "kappa") -- U+03BA GREEK SMALL LETTER KAPPA , ("λ", "lambda") -- U+03BB GREEK SMALL LETTER LAMDA , ("μ", "mu") -- U+03BC GREEK SMALL LETTER MU , ("ν", "nu") -- U+03BD GREEK SMALL LETTER NU , ("ξ", "xi") -- U+03BE GREEK SMALL LETTER ZI , ("ο", "omicron") -- U+03BF GREEK SMALL LETTER OMICRON , ("π", "pi") -- U+03C0 GREEK SMALL LETTER PI , ("ρ", "rho") -- U+03C1 GREEK SMALL LETTER RHO , ("σ", "sigma") -- U+03C3 GREEK SMALL LETTER SIGMA , ("ς", "sigma") -- U+03C2 GREEK SMALL LETTER FINAL SIGMA , ("τ", "tau") -- U+03C4 GREEK SMALL LETTER TAU , ("υ", "upsilon") -- U+03C5 GREEK SMALL LETTER UPSILON , ("φ", "phi") -- U+03C6 GREEK SMALL LETTER PHI , ("χ", "chi") -- U+03C7 GREEK SMALL LETTER CHI , ("ψ", "psi") -- U+03C8 GREEK SMALL LETTER PSI , ("ω", "omega") -- U+03C9 GREEK SMALL LETTER OMEGA , ("–", "-") -- U+2013 EN DASH , ("—", "-") -- U+2014 EM DASH , ("‘", "") -- U+2018 LEFT SINGLE QUOTATION MARK , ("’", "") -- U+2019 RIGHT SINGLE QUOTATION MARK , ("‡", "") -- U+2021 DOUBLE DAGGER , ("™", "") -- U+2122 TRADE MARK SIGN , ("�", "") -- U+FFFD REPLACEMENT CHARACTER ] -- | All the characters substituions. -- NB that the substituions are not commutative. chSubst ∷ [(Text, Text)] chSubst = chHexSubst ++ chNNSubst ++ chDecSubst ++ chUnicodeSubst ------------------------------------------------------------------------------ -- Author substituions authorSubst ∷ [(Text, Text)] authorSubst = [ (", ", ",") , (" and", ",") -- See Issue #1. , ("Ã\x00AD", "i") -- U+00C3 and U+00AD -- See Issue #1. , ("á", "a") -- U+00C3 and U+00A1 , ("Mcbride", "McBride") ] ------------------------------------------------------------------------------ -- Title substituions -- These substituions should be done before converting to lower case. titleSubst ∷ [(Text,Text)] titleSubst = [ ("P ", "P") , ("0 ", "0") , ("C", "C") , ("CC", "CC") , ("I ", "I") , ("J", "J") , ("Modus ponens", "Modus ponens") , ("P ", "P") , ("really ", "really") , ("S-P", "S-P") , ("3", "3") -- The whitespace around `+` is not the standard one. -- TODO (2017-07-04): Added test case. , (" + ", "plus") , ("ω", "omega") , ("$\\alpha$", "alpha") , ("$\\beta$", "beta") , ("$\\gamma$", "gamma") , ("$\\epsilon$", "epsilon") , ("$\\eta$", "eta") -- See Issue #2. , ("$\\lambda$", "lambda") , ("$\\pi$", "pi") , ("$\\omega$", "omega") , ("{\\sc Coq}", "Coq") , ("{\\sf Haskell}:", "Haskell") , ("{\\sc QuickSpec}:", "QuickSpec") , ("{\\sc QuodLibet}!", "QuodLibet") , ("{\\sc Vampire}", "Vampire") , ("Å›", "s") -- U+00C5 and U+203A , ("ö", "") -- U+00C3 and U+00B6 ]