{-# LANGUAGE OverloadedStrings #-}

-- |
-- Module      :  Text.Gemini
-- Copyright   :  (c) Sena, 2023
-- License     :  AGPL-3.0-or-later
--
-- Maintainer  :  Sena <jn-sena@proton.me>
-- Stability   :  stable
-- Portability :  portable
--
-- A tiny Gemtext (@text/gemini@) parser.
--
-- Parses Gemtext documents from and back to 'Text'. See the Section 5 of the
-- [Gemini Protocol specification](https://geminiprotocol.net/docs/specification.gmi).

module Text.Gemini
  ( -- * Gemtext Types
    GemDocument
  , GemItem (..)
    -- * Decoding from Text
  , decode
  , decodeLine
    -- * Encoding as Text
  , encode
  , encodeItem
  ) where

import Data.Text (Text)
import qualified Data.Text as T
import Data.Maybe (fromMaybe)
import Data.Char (isSpace)
import Data.Bool (bool)


-- | A Gemtext document, in the form of an ordered list of 'GemItem's.
type GemDocument = [GemItem]

-- | A Gemtext item.
data GemItem = GemText !Text -- ^ A regular Gemtext line. -- @'GemText' \<Text>@
             | GemLink !Text !(Maybe Text) -- ^ A Gemtext link. -- @'GemLink' \<Link> \[Optional Description]@
             | GemHeading !Int !Text -- ^ A Gemtext heading of 3 levels max. -- @'GemHeading' \<Level> \<Text>@
             | GemList ![Text] -- ^ A Gemtext unordered list. -- @'GemList' \<Lines>@
             | GemQuote !Text -- ^ A Gemtext quote. -- @'GemQuote' \<Text>@
             | GemPre ![Text] !(Maybe Text) -- ^ A Gemtext preformat. -- @'GemPre' \<Lines> [Optional Alt Text]@
             deriving (Int -> GemItem -> ShowS
GemDocument -> ShowS
GemItem -> String
(Int -> GemItem -> ShowS)
-> (GemItem -> String) -> (GemDocument -> ShowS) -> Show GemItem
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> GemItem -> ShowS
showsPrec :: Int -> GemItem -> ShowS
$cshow :: GemItem -> String
show :: GemItem -> String
$cshowList :: GemDocument -> ShowS
showList :: GemDocument -> ShowS
Show, GemItem -> GemItem -> Bool
(GemItem -> GemItem -> Bool)
-> (GemItem -> GemItem -> Bool) -> Eq GemItem
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: GemItem -> GemItem -> Bool
== :: GemItem -> GemItem -> Bool
$c/= :: GemItem -> GemItem -> Bool
/= :: GemItem -> GemItem -> Bool
Eq)


-- | Parse a Gemtext file as 'GemDocument'.
-- The text should be supplied as an LF-ending 'Text'.
decode :: Text -> GemDocument
decode :: Text -> GemDocument
decode = GemDocument -> [Text] -> GemDocument
parse [] ([Text] -> GemDocument) -> (Text -> [Text]) -> Text -> GemDocument
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
T.lines
    where -- Default line-by-line recursive parser with 'GemDocument' carried.
          -- If a list or preformatting is detected, will continue with
          -- @parseList@ and @parsePre@ respectively. Otherwise, will use
          -- 'decodeLine' on the current line only.
          parse :: GemDocument -> [Text] -> GemDocument
          parse :: GemDocument -> [Text] -> GemDocument
parse GemDocument
doc (Text
l:[Text]
ls)
              | Text
l Text -> Text -> Bool
`hasValueOf` Text
"* " = GemDocument -> [Text] -> [Text] -> GemDocument
parseList GemDocument
doc [Int -> Text -> Text
value Int
1 Text
l] [Text]
ls
              | Text
"```" Text -> Text -> Bool
`T.isPrefixOf` Text
l = GemDocument -> [Text] -> Maybe Text -> [Text] -> GemDocument
parsePre GemDocument
doc [] (Text -> Maybe Text
optional (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
value Int
3 Text
l) [Text]
ls
              | Bool
otherwise = GemDocument -> [Text] -> GemDocument
parse (GemDocument
doc GemDocument -> GemDocument -> GemDocument
forall a. Semigroup a => a -> a -> a
<> [Text -> GemItem
decodeLine Text
l]) [Text]
ls
          parse GemDocument
doc [] = GemDocument
doc

          -- List recursive parser. Continues with @parse@ when the line
          -- is no longer a list item, and adds the 'GemList' to the 'GemDocument'.
          parseList :: GemDocument -> [Text] -> [Text] -> GemDocument
          parseList :: GemDocument -> [Text] -> [Text] -> GemDocument
parseList GemDocument
doc [Text]
glist (Text
l:[Text]
ls)
              | Text
l Text -> Text -> Bool
`hasValueOf` Text
"* " = GemDocument -> [Text] -> [Text] -> GemDocument
parseList GemDocument
doc ([Text]
glist [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Int -> Text -> Text
value Int
1 Text
l]) [Text]
ls
              | Bool
otherwise = GemDocument -> [Text] -> GemDocument
parse (GemDocument
doc GemDocument -> GemDocument -> GemDocument
forall a. Semigroup a => a -> a -> a
<> [[Text] -> GemItem
GemList [Text]
glist]) (Text
lText -> [Text] -> [Text]
forall a. a -> [a] -> [a]
:[Text]
ls)
          parseList GemDocument
doc [Text]
glist [] = GemDocument
doc GemDocument -> GemDocument -> GemDocument
forall a. Semigroup a => a -> a -> a
<> [[Text] -> GemItem
GemList [Text]
glist]

          -- Preformatted recursive parser. Continues with @parse@ when the
          -- closing delimiter is reached, and adds the 'GemPre' to the 'GemDocument'.
          parsePre :: GemDocument -> [Text] -> Maybe Text -> [Text] -> GemDocument
          parsePre :: GemDocument -> [Text] -> Maybe Text -> [Text] -> GemDocument
parsePre GemDocument
doc [Text]
glines Maybe Text
alt (Text
l:[Text]
ls)
              | Text
"```" Text -> Text -> Bool
`T.isPrefixOf` Text
l = GemDocument -> [Text] -> GemDocument
parse (GemDocument
doc GemDocument -> GemDocument -> GemDocument
forall a. Semigroup a => a -> a -> a
<> [[Text] -> Maybe Text -> GemItem
GemPre [Text]
glines Maybe Text
alt]) [Text]
ls
              | Bool
otherwise = GemDocument -> [Text] -> Maybe Text -> [Text] -> GemDocument
parsePre GemDocument
doc ([Text]
glines [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Text
l]) Maybe Text
alt [Text]
ls
          parsePre GemDocument
doc [Text]
glines Maybe Text
alt [] = GemDocument
doc GemDocument -> GemDocument -> GemDocument
forall a. Semigroup a => a -> a -> a
<> [[Text] -> Maybe Text -> GemItem
GemPre [Text]
glines Maybe Text
alt]

-- | Parse a /single/ Gemtext line as 'GemItem'.
--
-- The preformatted texts are regarded as 'GemText's as they are strictly multiline.
decodeLine :: Text -> GemItem
decodeLine :: Text -> GemItem
decodeLine Text
line
    | Text
line Text -> Text -> Bool
`hasValueOf` Text
"=>" = let (Text
link, Text
desc) = (Char -> Bool) -> Text -> (Text, Text)
T.break Char -> Bool
isSpace (Text -> (Text, Text)) -> Text -> (Text, Text)
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
value Int
2 Text
line
                                in Text -> Maybe Text -> GemItem
GemLink Text
link (Maybe Text -> GemItem) -> Maybe Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text -> Maybe Text
optional (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
desc
    | Text
line Text -> Text -> Bool
`hasValueOf` Text
"#" = Text -> GemItem
parseHeading Text
line
    | Text
line Text -> Text -> Bool
`hasValueOf` Text
"* " = [Text] -> GemItem
GemList [Int -> Text -> Text
value Int
1 Text
line]
    | Text
line Text -> Text -> Bool
`hasValueOf` Text
">" = Text -> GemItem
GemQuote (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
value Int
1 Text
line
    | Bool
otherwise = Text -> GemItem
GemText (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.stripEnd Text
line
    where -- Parse a Gemtext heading.
          -- The max level of a heading is 3.
          parseHeading :: Text -> GemItem
          parseHeading :: Text -> GemItem
parseHeading Text
l
              | Text -> Bool
T.null (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
text = Text -> GemItem
GemText (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
l
              | Bool
otherwise = Int -> Text -> GemItem
GemHeading (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Text -> Int
T.length Text
pre) Int
3) (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
text
              where (Text
pre, Text
text) = (Char -> Bool) -> Text -> (Text, Text)
T.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'#') Text
l


-- | Encode parsed 'GemDocument' as a Gemtext file.
-- The output 'Text' uses LF-endings. Uses the 'encodeItem' function below.
--
-- Valid prefixes are escaped before encoding.
--
-- Empty lists are ignored if given.
encode :: GemDocument -> Text
encode :: GemDocument -> Text
encode = [Text] -> Text
T.unlines ([Text] -> Text) -> (GemDocument -> [Text]) -> GemDocument -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (GemItem -> Text) -> GemDocument -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map GemItem -> Text
encodeItem (GemDocument -> [Text])
-> (GemDocument -> GemDocument) -> GemDocument -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (GemItem -> Bool) -> GemDocument -> GemDocument
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (GemItem -> Bool) -> GemItem -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. GemItem -> Bool
empty)
    where empty :: GemItem -> Bool
          empty :: GemItem -> Bool
empty (GemList [Text]
list) = [Text] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Text]
list
          empty GemItem
_ = Bool
False

-- | Encode a /single/ parsed 'GemItem' as Gemtext text.
-- The output 'Text' uses LF-endings and might be multiple lines.
--
-- Valid prefixes are escaped before encoding.
--
-- /Beware/ that the output text doesn't end with a newline.
--
-- The spaces in 'GemLink's are replaced with @%20@ to normalize them.
encodeItem :: GemItem -> Text
encodeItem :: GemItem -> Text
encodeItem (GemText Text
line) = Text -> Text
escapePrefixes Text
line
encodeItem (GemLink Text
link Maybe Text
desc) = Text
"=> " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> HasCallStack => Text -> Text -> Text -> Text
Text -> Text -> Text -> Text
T.replace Text
" " Text
"%20" Text
link Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> (Text -> Text) -> Maybe Text -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
"" (Text
" " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<>) Maybe Text
desc
encodeItem (GemHeading Int
level Text
text) = Int -> Text -> Text
T.replicate (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
level Int
3) Text
"#" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
text
encodeItem (GemList [Text]
list) = Text -> [Text] -> Text
T.intercalate Text
"\n" ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text
"* " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<>) [Text]
list
encodeItem (GemQuote Text
text) = Text
"> " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
text
encodeItem (GemPre [Text]
text Maybe Text
alt) = Text -> [Text] -> Text
T.intercalate Text
"\n" ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ [Text
"```" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" Maybe Text
alt] [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Text]
text [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Text
"```"]


-- Escape the line prefixes by adding a whitespace at the beginning for 'GemText'.
escapePrefixes :: Text -> Text
escapePrefixes :: Text -> Text
escapePrefixes Text
line = Text -> Text -> Bool -> Text
forall a. a -> a -> Bool -> a
bool Text
line (Char -> Text -> Text
T.cons Char
' ' Text
line) Bool
escape
    where escape :: Bool
escape = Text
"```" Text -> Text -> Bool
`T.isPrefixOf` Text
line Bool -> Bool -> Bool
||
                   (Text -> Bool) -> [Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Text -> Text -> Bool
hasValueOf Text
line) [Text
"=>", Text
"* ", Text
">"] Bool -> Bool -> Bool
||
                   (Text
line Text -> Text -> Bool
`hasValueOf` Text
"#" Bool -> Bool -> Bool
&& let (Text
_, Text
text) = (Char -> Bool) -> Text -> (Text, Text)
T.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'#') Text
line
                                              in Bool -> Bool
not (Text -> Bool
T.null (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
text))


-- @True@ if the the line has prefix /and/ a following value.
hasValueOf :: Text -> Text -> Bool
hasValueOf :: Text -> Text -> Bool
hasValueOf Text
line Text
prefix = Text
prefix Text -> Text -> Bool
`T.isPrefixOf` Text
line Bool -> Bool -> Bool
&&
                         Bool -> Bool
not (Text -> Bool
T.null (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
value (Text -> Int
T.length Text
prefix) Text
line)

-- Get the value of a line.
-- Removes the prefix of given length and strips.
value :: Int -> Text -> Text
value :: Int -> Text -> Text
value Int
prefix = Text -> Text
T.strip (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
T.drop Int
prefix

-- @Nothing@ if the text is empty.
optional :: Text -> Maybe Text
optional :: Text -> Maybe Text
optional Text
text = Maybe Text -> Maybe Text -> Bool -> Maybe Text
forall a. a -> a -> Bool -> a
bool (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
text) Maybe Text
forall a. Maybe a
Nothing (Bool -> Maybe Text) -> Bool -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Text -> Bool
T.null Text
text