{-# LANGUAGE OverloadedStrings #-}
module Language.Gemini (
-- * Gemini documents
  GeminiDocument
, GeminiLine(..)
-- * Decoding
, decodeGemini
-- * Encoding
, encodeGemini
) where

import Data.Text.Lazy (Text)
import qualified Data.Text.Lazy as T

import Data.Char (isSpace)
import Data.Int (Int64)

-- Gemini documents
----------------------------

type GeminiDocument = [GeminiLine]

data GeminiLine = LText Text -- ^ Normal text
                | LLink Text (Maybe Text) -- ^ A link with an optional description
                | LPre [Text] -- ^ A preformatted block containing multiple lines
                | LH1 Text -- ^ A first level heading
                | LH2 Text -- ^ A second level heading
                | LH3 Text -- ^ A third level heading
                | LItem Text -- ^ A list item
                | LQuote Text -- ^ A quotation
  deriving (Show, Read, Eq)

-- Decoding
----------------------------

decodeGemini :: Bool -- ^ Whether to allow unix-style line endings (\n)
             -> Text -- ^ Text to parse
             -> GeminiDocument
-- gemini is really simple, so we do not even use a parsing library
decodeGemini allowUnixStyle = go . (if allowUnixStyle then concatMap T.lines else id)
                                 . T.splitOn "\CR\LF"
  where
    go [] = []
    go (l:ls) | isPreToggle l = let (pres, rest) = break isPreToggle ls
                                 in LPre pres : go (drop 1 rest)
              | "=>" `T.isPrefixOf` l = parseLink (dropPrefix 2 l) : go ls
              | "###" `T.isPrefixOf` l = LH3 (dropPrefix 3 l) : go ls
              | "##" `T.isPrefixOf` l = LH2 (dropPrefix 2 l) : go ls
              | "#" `T.isPrefixOf` l = LH1 (dropPrefix 1 l) : go ls
              | "* " `T.isPrefixOf` l = LItem (dropPrefix 2 l) : go ls
              | ">" `T.isPrefixOf` l = LQuote (dropPrefix 1 l) : go ls
              | otherwise = LText l : go ls

isPreToggle :: Text -> Bool
isPreToggle = T.isPrefixOf "```"

dropPrefix :: Int64 -> Text -> Text
dropPrefix n = T.stripStart . T.drop n

parseLink :: Text -> GeminiLine
parseLink txt = LLink link $ if T.null desc' then Nothing else Just desc'
  where
    (link, desc) = T.break isSpace txt
    desc' = T.stripStart desc

-- Encoding
----------------------------

encodeGemini :: GeminiDocument -> Text
encodeGemini = T.intercalate "\CR\LF" . fmap encodeLine

encodeLine :: GeminiLine -> Text
encodeLine (LText t) = escapeLText t
encodeLine (LLink l desc) = "=> " <> escapeLink l <> " " <> desc'
  where desc' = maybe T.empty escapeNewlines desc
encodeLine (LPre ls) = T.intercalate "\CR\LF" $
  "```" : fmap escapeLPre ls <> ["```"]
encodeLine (LH1 t) = "# " <> escapeNewlines t
encodeLine (LH2 t) = "## " <> escapeNewlines t
encodeLine (LH3 t) = "### " <> escapeNewlines t
encodeLine (LItem t) = "* " <> escapeNewlines t
encodeLine (LQuote t) = "> " <> escapeNewlines t

--- TODO ask about actual escaping rules instead of just using "\\" and stripping newlines
escapeCharacter :: Char
escapeCharacter = ' '

escapeLPre :: Text -> Text
escapeLPre = escapePrePrefix . escapeNewlines

escapeLText :: Text -> Text
escapeLText = escapeAnyPrefix . escapeNewlines

escapeLink :: Text -> Text
-- Ideally spaces should be urlencoded but nonmalicious agents wouldn't put
-- whitespace in a link anyway.
escapeLink = T.map $ \c -> if isSpace c then '+' else c

escapeNewlines :: Text -> Text
escapeNewlines = T.map crlfToSpace
  where
    crlfToSpace '\CR' = ' '
    crlfToSpace '\LF' = ' '
    crlfToSpace c     = c

escapePrePrefix :: Text -> Text
escapePrePrefix t | "```" `T.isPrefixOf` t = T.cons escapeCharacter t
                  | otherwise                     = t

escapeAnyPrefix :: Text -> Text
escapeAnyPrefix t | reservedPrefix t = T.cons escapeCharacter t
                  | otherwise        = t

reservedPrefix :: Text -> Bool
reservedPrefix t = any (`T.isPrefixOf` t)
  [ "=>"
  , "```"
  , "#"
  , "* "
  , ">"
  ]