{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.RoffChar
   Copyright   : Copyright (C) 2007-2020 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Roff character escaping/unescaping.
-}

module Text.Pandoc.RoffChar (
    standardEscapes
  , characterCodes
  , combiningAccents
  ) where
import qualified Data.Text as T

-- | These are the escapes specifically mentioned in groff_man(7),
-- plus @ and ellipsis.
standardEscapes :: [(Char, T.Text)]
standardEscapes :: [(Char, Text)]
standardEscapes =
  [ (Char
'\160', Text
"\\ ")
  , (Char
'\'', Text
"\\[aq]")
  , (Char
'‘', Text
"\\[oq]")
  , (Char
'’', Text
"\\[cq]")
  , (Char
'"', Text
"\\[dq]")
  , (Char
'“', Text
"\\[lq]")
  , (Char
'”', Text
"\\[rq]")
  , (Char
'—', Text
"\\[em]")
  , (Char
'–', Text
"\\[en]")
  , (Char
'`', Text
"\\[ga]")
  , (Char
'^', Text
"\\[ha]")
  , (Char
'~', Text
"\\[ti]")
  , (Char
'\\', Text
"\\[rs]")
  , (Char
'@', Text
"\\[at]") -- because we use @ as a table and math delimiter
  , (Char
'\x2026', Text
"\\&...")  -- because u2026 doesn't render on tty
  ]

characterCodes :: [(Char, T.Text)]
characterCodes :: [(Char, Text)]
characterCodes =
  [ (Char
'Ð', Text
"-D")
  , (Char
'ð', Text
"Sd")
  , (Char
'Þ', Text
"TP")
  , (Char
'þ', Text
"Tp")
  , (Char
'ß', Text
"ss")
  , (Char
'ff', Text
"ff")
  , (Char
'fi', Text
"fi")
  , (Char
'fl', Text
"fl")
  , (Char
'ffi', Text
"Fi")
  , (Char
'ffl', Text
"Fl")
  , (Char
'Ł', Text
"/L")
  , (Char
'ł', Text
"/l")
  , (Char
'Ø', Text
"/O")
  , (Char
'ø', Text
"/o")
  , (Char
'Æ', Text
"AE")
  , (Char
'æ', Text
"ae")
  , (Char
'Œ', Text
"OE")
  , (Char
'œ', Text
"oe")
  , (Char
'IJ', Text
"IJ")
  , (Char
'ij', Text
"ij")
  , (Char
'ı', Text
".i")
  , (Char
'ȷ', Text
".j")
  , (Char
'Á', Text
"'A")
  , (Char
'Ć', Text
"'C")
  , (Char
'É', Text
"'E")
  , (Char
'Í', Text
"'I")
  , (Char
'Ó', Text
"'O")
  , (Char
'Ú', Text
"'U")
  , (Char
'Ý', Text
"'Y")
  , (Char
'á', Text
"'a")
  , (Char
'ć', Text
"'c")
  , (Char
'é', Text
"'e")
  , (Char
'í', Text
"'i")
  , (Char
'ó', Text
"'o")
  , (Char
'ú', Text
"'u")
  , (Char
'ý', Text
"'y")
  , (Char
'Ä', Text
":A")
  , (Char
'Ë', Text
":E")
  , (Char
'Ï', Text
":I")
  , (Char
'Ö', Text
":O")
  , (Char
'Ü', Text
":U")
  , (Char
'Ÿ', Text
":Y")
  , (Char
'ä', Text
":a")
  , (Char
'ë', Text
":e")
  , (Char
'ï', Text
":i")
  , (Char
'ö', Text
":o")
  , (Char
'ü', Text
":u")
  , (Char
'ÿ', Text
":y")
  , (Char
'Â', Text
"^A")
  , (Char
'Ê', Text
"^E")
  , (Char
'Î', Text
"^I")
  , (Char
'Ô', Text
"^O")
  , (Char
'Û', Text
"^U")
  , (Char
'â', Text
"^a")
  , (Char
'ê', Text
"^e")
  , (Char
'î', Text
"^i")
  , (Char
'ô', Text
"^o")
  , (Char
'û', Text
"^u")
  , (Char
'À', Text
"`A")
  , (Char
'È', Text
"`E")
  , (Char
'Ì', Text
"`I")
  , (Char
'Ò', Text
"`O")
  , (Char
'Ù', Text
"`U")
  , (Char
'à', Text
"`a")
  , (Char
'è', Text
"`e")
  , (Char
'ì', Text
"`i")
  , (Char
'ò', Text
"`o")
  , (Char
'ù', Text
"`u")
  , (Char
'Ã', Text
"~A")
  , (Char
'Ñ', Text
"~N")
  , (Char
'Õ', Text
"~O")
  , (Char
'ã', Text
"~a")
  , (Char
'ñ', Text
"~n")
  , (Char
'õ', Text
"~o")
  , (Char
'Š', Text
"vS")
  , (Char
'š', Text
"vs")
  , (Char
'Ž', Text
"vZ")
  , (Char
'ž', Text
"vz")
  , (Char
'Ç', Text
",C")
  , (Char
'ç', Text
",c")
  , (Char
'Å', Text
"oA")
  , (Char
'å', Text
"oa")
  , (Char
'˝', Text
"a\"")
  , (Char
'¯', Text
"a-")
  , (Char
'˙', Text
"a.")
  , (Char
'^', Text
"a^")
  , (Char
'´', Text
"aa")
  , (Char
'`', Text
"ga")
  , (Char
'˘', Text
"ab")
  , (Char
'¸', Text
"ac")
  , (Char
'¨', Text
"ad")
  , (Char
'ˇ', Text
"ah")
  , (Char
'˚', Text
"ao")
  , (Char
'~', Text
"a~")
  , (Char
'˛', Text
"ho")
  , (Char
'^', Text
"ha")
  , (Char
'~', Text
"ti")
  , (Char
'„', Text
"Bq")
  , (Char
'‚', Text
"bq")
  , (Char
'“', Text
"lq")
  , (Char
'”', Text
"rq")
  , (Char
'‘', Text
"oq")
  , (Char
'’', Text
"cq")
  , (Char
'\'', Text
"aq")
  , (Char
'"', Text
"dq")
  , (Char
'«', Text
"Fo")
  , (Char
'»', Text
"Fc")
  , (Char
'‹', Text
"fo")
  , (Char
'›', Text
"fc")
  , (Char
'¡', Text
"r!")
  , (Char
'¿', Text
"r?")
  , (Char
'—', Text
"em")
  , (Char
'–', Text
"en")
  , (Char
'‐', Text
"hy")
  , (Char
'[', Text
"lB")
  , (Char
']', Text
"rB")
  , (Char
'{', Text
"lC")
  , (Char
'}', Text
"rC")
  , (Char
'⟨', Text
"la")
  , (Char
'⟩', Text
"ra")
  , (Char
'⎪', Text
"bv")
  , (Char
'⎪', Text
"braceex")
  , (Char
'⎡', Text
"bracketlefttp")
  , (Char
'⎣', Text
"bracketleftbt")
  , (Char
'⎢', Text
"bracketleftex")
  , (Char
'⎤', Text
"bracketrighttp")
  , (Char
'⎦', Text
"bracketrightbt")
  , (Char
'⎥', Text
"bracketrightex")
  , (Char
'╭', Text
"lt")
  , (Char
'⎧', Text
"bracelefttp")
  , (Char
'┥', Text
"lk")
  , (Char
'⎨', Text
"braceleftmid")
  , (Char
'╰', Text
"lb")
  , (Char
'⎩', Text
"braceleftbt")
  , (Char
'⎪', Text
"braceleftex")
  , (Char
'╮', Text
"rt")
  , (Char
'⎫', Text
"bracerighttp")
  , (Char
'┝', Text
"rk")
  , (Char
'⎬', Text
"bracerightmid")
  , (Char
'╯', Text
"rb")
  , (Char
'⎭', Text
"bracerightbt")
  , (Char
'⎪', Text
"bracerightex")
  , (Char
'⎛', Text
"parenlefttp")
  , (Char
'⎝', Text
"parenleftbt")
  , (Char
'⎜', Text
"parenleftex")
  , (Char
'⎞', Text
"parenrighttp")
  , (Char
'⎠', Text
"parenrightbt")
  , (Char
'⎟', Text
"parenrightex")
  , (Char
'←', Text
"<-")
  , (Char
'→', Text
"->")
  , (Char
'↔', Text
"<>")
  , (Char
'↓', Text
"da")
  , (Char
'↑', Text
"ua")
  , (Char
'↕', Text
"va")
  , (Char
'⇐', Text
"lA")
  , (Char
'⇒', Text
"rA")
  , (Char
'⇔', Text
"hA")
  , (Char
'⇓', Text
"dA")
  , (Char
'⇑', Text
"uA")
  , (Char
'⇕', Text
"vA")
  , (Char
'⎯', Text
"an")
  , (Char
'|', Text
"ba")
  , (Char
'│', Text
"br")
  , (Char
'_', Text
"ul")
  , (Char
'‾', Text
"rn")
  , (Char
'_', Text
"ru")
  , (Char
'¦', Text
"bb")
  , (Char
'/', Text
"sl")
  , (Char
'\\', Text
"rs")
  , (Char
'○', Text
"ci")
  , (Char
'·', Text
"bu")
  , (Char
'‡', Text
"dd")
  , (Char
'†', Text
"dg")
  , (Char
'◊', Text
"lz")
  , (Char
'□', Text
"sq")
  , (Char
'¶', Text
"ps")
  , (Char
'§', Text
"sc")
  , (Char
'☜', Text
"lh")
  , (Char
'☞', Text
"rh")
  , (Char
'@', Text
"at")
  , (Char
'#', Text
"sh")
  , (Char
'↵', Text
"CR")
  , (Char
'✓', Text
"OK")
  , (Char
'©', Text
"co")
  , (Char
'®', Text
"rg")
  , (Char
'™', Text
"tm")
  , (Char
'$', Text
"Do")
  , (Char
'¢', Text
"ct")
  , (Char
'€', Text
"eu")
  , (Char
'€', Text
"Eu")
  , (Char
'¥', Text
"Ye")
  , (Char
'£', Text
"Po")
  , (Char
'¤', Text
"Cs")
  , (Char
'ƒ', Text
"Fn")
  , (Char
'°', Text
"de")
  , (Char
'‰', Text
"%0")
  , (Char
'′', Text
"fm")
  , (Char
'″', Text
"sd")
  , (Char
'µ', Text
"mc")
  , (Char
'ª', Text
"Of")
  , (Char
'º', Text
"Om")
  , (Char
'∧', Text
"AN")
  , (Char
'∨', Text
"OR")
  , (Char
'¬', Text
"no")
  , (Char
'¬', Text
"tno")
  , (Char
'∃', Text
"te")
  , (Char
'∀', Text
"fa")
  , (Char
'∋', Text
"st")
  , (Char
'∴', Text
"3d")
  , (Char
'∴', Text
"tf")
  , (Char
'|', Text
"or")
  , (Char
'½', Text
"12")
  , (Char
'¼', Text
"14")
  , (Char
'¾', Text
"34")
  , (Char
'⅛', Text
"18")
  , (Char
'⅜', Text
"38")
  , (Char
'⅝', Text
"58")
  , (Char
'⅞', Text
"78")
  , (Char
'¹', Text
"S1")
  , (Char
'²', Text
"S2")
  , (Char
'³', Text
"S3")
  , (Char
'+', Text
"pl")
  , (Char
'−', Text
"mi")
  , (Char
'∓', Text
"-+")
  , (Char
'±', Text
"+-")
  , (Char
'±', Text
"t+-")
  , (Char
'·', Text
"pc")
  , (Char
'⋅', Text
"md")
  , (Char
'×', Text
"mu")
  , (Char
'×', Text
"tmu")
  , (Char
'⊗', Text
"c*")
  , (Char
'⊕', Text
"c+")
  , (Char
'÷', Text
"di")
  , (Char
'÷', Text
"tdi")
  , (Char
'⁄', Text
"f/")
  , (Char
'∗', Text
"**")
  , (Char
'≤', Text
"<=")
  , (Char
'≥', Text
">=")
  , (Char
'≪', Text
"<<")
  , (Char
'≫', Text
">>")
  , (Char
'=', Text
"eq")
  , (Char
'≠', Text
"!=")
  , (Char
'≡', Text
"==")
  , (Char
'≢', Text
"ne")
  , (Char
'≅', Text
"=~")
  , (Char
'≃', Text
"|=")
  , (Char
'∼', Text
"ap")
  , (Char
'≈', Text
"~~")
  , (Char
'≈', Text
"~=")
  , (Char
'∝', Text
"pt")
  , (Char
'∅', Text
"es")
  , (Char
'∈', Text
"mo")
  , (Char
'∉', Text
"nm")
  , (Char
'⊂', Text
"sb")
  , (Char
'⊄', Text
"nb")
  , (Char
'⊃', Text
"sp")
  , (Char
'⊅', Text
"nc")
  , (Char
'⊆', Text
"ib")
  , (Char
'⊇', Text
"ip")
  , (Char
'∩', Text
"ca")
  , (Char
'∪', Text
"cu")
  , (Char
'∠', Text
"/_")
  , (Char
'⊥', Text
"pp")
  , (Char
'∫', Text
"is")
  , (Char
'∫', Text
"integral")
  , (Char
'∑', Text
"sum")
  , (Char
'∏', Text
"product")
  , (Char
'∐', Text
"coproduct")
  , (Char
'∇', Text
"gr")
  , (Char
'√', Text
"sr")
  , (Char
'√', Text
"sqrt")
  -- , "radicalex"
  -- "sqrtex"
  , (Char
'⌈', Text
"lc")
  , (Char
'⌉', Text
"rc")
  , (Char
'⌊', Text
"lf")
  , (Char
'⌋', Text
"rf")
  , (Char
'∞', Text
"if")
  , (Char
'ℵ', Text
"Ah")
  , (Char
'ℑ', Text
"Im")
  , (Char
'ℜ', Text
"Re")
  , (Char
'℘', Text
"wp")
  , (Char
'∂', Text
"pd")
  , (Char
'ℏ', Text
"-h")
  , (Char
'ℏ', Text
"hbar")
  , (Char
'Α', Text
"*A")
  , (Char
'Β', Text
"*B")
  , (Char
'Γ', Text
"*G")
  , (Char
'Δ', Text
"*D")
  , (Char
'Ε', Text
"*E")
  , (Char
'Ζ', Text
"*Z")
  , (Char
'Η', Text
"*Y")
  , (Char
'Θ', Text
"*H")
  , (Char
'Ι', Text
"*I")
  , (Char
'Κ', Text
"*K")
  , (Char
'Λ', Text
"*L")
  , (Char
'Μ', Text
"*M")
  , (Char
'Ν', Text
"*N")
  , (Char
'Ξ', Text
"*C")
  , (Char
'Ο', Text
"*O")
  , (Char
'Π', Text
"*P")
  , (Char
'Ρ', Text
"*R")
  , (Char
'Σ', Text
"*S")
  , (Char
'Τ', Text
"*T")
  , (Char
'Υ', Text
"*U")
  , (Char
'Φ', Text
"*F")
  , (Char
'Χ', Text
"*X")
  , (Char
'Ψ', Text
"*Q")
  , (Char
'Ω', Text
"*W")
  , (Char
'α', Text
"*a")
  , (Char
'β', Text
"*b")
  , (Char
'γ', Text
"*g")
  , (Char
'δ', Text
"*d")
  , (Char
'ε', Text
"*e")
  , (Char
'ζ', Text
"*z")
  , (Char
'η', Text
"*y")
  , (Char
'θ', Text
"*h")
  , (Char
'ι', Text
"*i")
  , (Char
'κ', Text
"*k")
  , (Char
'λ', Text
"*l")
  , (Char
'μ', Text
"*m")
  , (Char
'ν', Text
"*n")
  , (Char
'ξ', Text
"*c")
  , (Char
'ο', Text
"*o")
  , (Char
'π', Text
"*p")
  , (Char
'ρ', Text
"*r")
  , (Char
'ς', Text
"ts")
  , (Char
'σ', Text
"*s")
  , (Char
'τ', Text
"*t")
  , (Char
'υ', Text
"*u")
  , (Char
'ϕ', Text
"*f")
  , (Char
'χ', Text
"*x")
  , (Char
'ψ', Text
"*q")
  , (Char
'ω', Text
"*w")
  , (Char
'ϑ', Text
"+h")
  , (Char
'φ', Text
"+f")
  , (Char
'ϖ', Text
"+p")
  , (Char
'ϵ', Text
"+e")
  , (Char
'♣', Text
"CL")
  , (Char
'♠', Text
"SP")
  , (Char
'♥', Text
"HE")
  , (Char
'♦', Text
"DI")
  , (Char
'˝' , Text
"a\"")
  , (Char
'¯', Text
"a-")
  , (Char
'˙', Text
"a.")
  , (Char
'^', Text
"a^")
  , (Char
'´', Text
"aa")
  , (Char
'`', Text
"ga")
  , (Char
'˘', Text
"ab")
  , (Char
'¸', Text
"ac")
  , (Char
'¨', Text
"ad")
  , (Char
'ˇ', Text
"ah")
  , (Char
'˚', Text
"ao")
  , (Char
'~', Text
"a~")
  , (Char
'˛', Text
"ho")
  , (Char
'^', Text
"ha")
  , (Char
'~', Text
"ti")
  ]

-- use like: \\[E a^ aa]
combiningAccents :: [(Char, T.Text)]
combiningAccents :: [(Char, Text)]
combiningAccents =
  [ (Char
'\779' , Text
"a\"")
  , (Char
'\772', Text
"a-")
  , (Char
'\775', Text
"a.")
  , (Char
'\770', Text
"a^")
  , (Char
'\769', Text
"aa")
  , (Char
'\768', Text
"ga")
  , (Char
'\774', Text
"ab")
  , (Char
'\807', Text
"ac")
  , (Char
'\776', Text
"ad")
  , (Char
'\780', Text
"ah")
  , (Char
'\778', Text
"ao")
  , (Char
'\771', Text
"a~")
  , (Char
'\808', Text
"ho")
  , (Char
'\770', Text
"ha")
  , (Char
'\771', Text
"ti")
  ]