{- |
Copyright: (c) 2018-2020 Kowainik
SPDX-License-Identifier: MPL-2.0
Maintainer: Kowainik <xrom.xkov@gmail.com>

Parsers for strings in TOML format, including basic and literal strings
both singleline and multiline.
-}

module Toml.Parser.String
       ( textP
       , basicStringP
       , literalStringP
       ) where

import Control.Applicative (Alternative (..))
import Control.Applicative.Combinators (count, manyTill, optional)
import Data.Char (chr, isControl)
import Data.Text (Text)

import Toml.Parser.Core (Parser, anySingle, char, eol, hexDigitChar, lexeme, satisfy, space, string,
                         tab, try, (<?>))

import qualified Data.Text as Text


{- | Parser for TOML text. Includes:

1. Basic single-line string.
2. Literal single-line string.
3. Basic multiline string.
4. Literal multiline string.
-}
textP :: Parser Text
textP :: Parser Text
textP = (Parser Text
multilineBasicStringP   Parser Text -> String -> Parser Text
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> "multiline basic string")
    Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text
multilineLiteralStringP Parser Text -> String -> Parser Text
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> "multiline literal string")
    Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text
literalStringP          Parser Text -> String -> Parser Text
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> "literal string")
    Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text
basicStringP            Parser Text -> String -> Parser Text
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> "basic string")
    Parser Text -> String -> Parser Text
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> "text"

{- | Parse a non-control character (control character is a non-printing
character of the Latin-1 subset of Unicode).
-}
nonControlCharP :: Parser Text
nonControlCharP :: Parser Text
nonControlCharP = Char -> Text
Text.singleton (Char -> Text) -> ParsecT Void Text Identity Char -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Token Text -> Bool) -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isControl) Parser Text -> String -> Parser Text
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> "non-control char"

-- | Parse escape sequences inside basic strings.
escapeSequenceP :: Parser Text
escapeSequenceP :: Parser Text
escapeSequenceP = Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Token Text
'\\' ParsecT Void Text Identity Char
-> ParsecT Void Text Identity Char
-> ParsecT Void Text Identity Char
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ParsecT Void Text Identity Char
forall e s (m :: * -> *). MonadParsec e s m => m (Token s)
anySingle ParsecT Void Text Identity Char
-> (Char -> Parser Text) -> Parser Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
    'b'  -> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure "\b"
    't'  -> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure "\t"
    'n'  -> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure "\n"
    'f'  -> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure "\f"
    'r'  -> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure "\r"
    '"'  -> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure "\""
    '\\' -> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure "\\"
    'u'  -> Int -> Parser Text
hexUnicodeP 4
    'U'  -> Int -> Parser Text
hexUnicodeP 8
    c :: Char
c    -> String -> Parser Text
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser Text) -> String -> Parser Text
forall a b. (a -> b) -> a -> b
$ "Invalid escape sequence: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> "\\" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> [Char
c]
  where
    hexUnicodeP :: Int -> Parser Text
    hexUnicodeP :: Int -> Parser Text
hexUnicodeP n :: Int
n = Int
-> ParsecT Void Text Identity Char
-> ParsecT Void Text Identity String
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
count Int
n ParsecT Void Text Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
hexDigitChar ParsecT Void Text Identity String
-> (String -> Parser Text) -> Parser Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \x :: String
x -> case Int -> Maybe Char
toUnicode (Int -> Maybe Char) -> Int -> Maybe Char
forall a b. (a -> b) -> a -> b
$ String -> Int
hexToInt String
x of
        Just c :: Char
c  -> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char -> Text
Text.singleton Char
c)
        Nothing -> String -> Parser Text
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser Text) -> String -> Parser Text
forall a b. (a -> b) -> a -> b
$ "Invalid unicode character: \\"
            String -> String -> String
forall a. Semigroup a => a -> a -> a
<> (if Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 4 then "u" else "U")
            String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
x
      where
        hexToInt :: String -> Int
        hexToInt :: String -> Int
hexToInt xs :: String
xs = String -> Int
forall a. Read a => String -> a
read (String -> Int) -> String -> Int
forall a b. (a -> b) -> a -> b
$ "0x" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
xs

        toUnicode :: Int -> Maybe Char
        toUnicode :: Int -> Maybe Char
toUnicode x :: Int
x
            -- Ranges from "The Unicode Standard".
            -- See definition D76 in Section 3.9, Unicode Encoding Forms.
            | Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 0      Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= 0xD7FF   = Char -> Maybe Char
forall a. a -> Maybe a
Just (Int -> Char
chr Int
x)
            | Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 0xE000 Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= 0x10FFFF = Char -> Maybe Char
forall a. a -> Maybe a
Just (Int -> Char
chr Int
x)
            | Bool
otherwise                    = Maybe Char
forall a. Maybe a
Nothing

-- | Parser for basic string in double quotes.
basicStringP :: Parser Text
basicStringP :: Parser Text
basicStringP = Parser Text -> Parser Text
forall a. Parser a -> Parser a
lexeme (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ [Text] -> Text
forall a. Monoid a => [a] -> a
mconcat ([Text] -> Text)
-> ParsecT Void Text Identity [Text] -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Token Text
'"' ParsecT Void Text Identity Char
-> ParsecT Void Text Identity [Text]
-> ParsecT Void Text Identity [Text]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
charP Parser Text
-> ParsecT Void Text Identity Char
-> ParsecT Void Text Identity [Text]
forall (m :: * -> *) a end. Alternative m => m a -> m end -> m [a]
`manyTill` Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Token Text
'"')
  where
    charP :: Parser Text
    charP :: Parser Text
charP = Parser Text
escapeSequenceP Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
nonControlCharP

-- | Parser for literal string in single quotes.
literalStringP :: Parser Text
literalStringP :: Parser Text
literalStringP = Parser Text -> Parser Text
forall a. Parser a -> Parser a
lexeme (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ String -> Text
Text.pack (String -> Text)
-> ParsecT Void Text Identity String -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Token Text
'\'' ParsecT Void Text Identity Char
-> ParsecT Void Text Identity String
-> ParsecT Void Text Identity String
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ParsecT Void Text Identity Char
nonEolCharP ParsecT Void Text Identity Char
-> ParsecT Void Text Identity Char
-> ParsecT Void Text Identity String
forall (m :: * -> *) a end. Alternative m => m a -> m end -> m [a]
`manyTill` Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Token Text
'\'')
  where
    nonEolCharP :: Parser Char
    nonEolCharP :: ParsecT Void Text Identity Char
nonEolCharP = (Token Text -> Bool) -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy (\c :: Token Text
c -> Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= '\n' Bool -> Bool -> Bool
&& Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= '\r')

-- | Generic parser for multiline string. Used in 'multilineBasicStringP' and
-- 'multilineLiteralStringP'.
multilineP :: Parser Text -> Parser Text -> Parser Text
multilineP :: Parser Text -> Parser Text -> Parser Text
multilineP quotesP :: Parser Text
quotesP allowedCharP :: Parser Text
allowedCharP = Parser Text -> Parser Text
forall a. Parser a -> Parser a
lexeme (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ [Text] -> Text
forall a. Monoid a => [a] -> a
mconcat ([Text] -> Text)
-> ParsecT Void Text Identity [Text] -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
    (Parser Text
quotesP Parser Text
-> ParsecT Void Text Identity (Maybe Text)
-> ParsecT Void Text Identity (Maybe Text)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text -> ParsecT Void Text Identity (Maybe Text)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Text
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Tokens s)
eol ParsecT Void Text Identity (Maybe Text)
-> ParsecT Void Text Identity [Text]
-> ParsecT Void Text Identity [Text]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
allowedCharP Parser Text -> Parser Text -> ParsecT Void Text Identity [Text]
forall (m :: * -> *) a end. Alternative m => m a -> m end -> m [a]
`manyTill` Parser Text
quotesP)

-- Parser for basic multiline string in """ quotes.
multilineBasicStringP :: Parser Text
multilineBasicStringP :: Parser Text
multilineBasicStringP = Parser Text -> Parser Text -> Parser Text
multilineP Parser Text
quotesP Parser Text
allowedCharP
  where
    quotesP :: Parser Text
    quotesP :: Parser Text
quotesP = Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string "\"\"\""

    allowedCharP :: Parser Text
    allowedCharP :: Parser Text
allowedCharP = Parser Text
lineEndingBackslashP Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
escapeSequenceP Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
nonControlCharP Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Tokens s)
eol

    lineEndingBackslashP :: Parser Text
    lineEndingBackslashP :: Parser Text
lineEndingBackslashP = Text
Text.empty Text -> ParsecT Void Text Identity () -> Parser Text
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ParsecT Void Text Identity () -> ParsecT Void Text Identity ()
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try (Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Token Text
'\\' ParsecT Void Text Identity Char -> Parser Text -> Parser Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Tokens s)
eol Parser Text
-> ParsecT Void Text Identity () -> ParsecT Void Text Identity ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space)

-- Parser for literal multiline string in ''' quotes.
multilineLiteralStringP :: Parser Text
multilineLiteralStringP :: Parser Text
multilineLiteralStringP = Parser Text -> Parser Text -> Parser Text
multilineP Parser Text
quotesP Parser Text
allowedCharP
  where
    quotesP :: Parser Text
    quotesP :: Parser Text
quotesP = Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string "'''"

    allowedCharP :: Parser Text
    allowedCharP :: Parser Text
allowedCharP = Parser Text
nonControlCharP Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Tokens s)
eol Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Text
Text.singleton (Char -> Text) -> ParsecT Void Text Identity Char -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Void Text Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
tab