{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE OverloadedStrings #-}
module Dhall.Parser.Token (
validCodepoint,
whitespace,
nonemptyWhitespace,
bashEnvironmentVariable,
posixEnvironmentVariable,
ComponentType(..),
text,
char,
file_,
label,
anyLabel,
labels,
httpRaw,
hexdig,
identifier,
hexNumber,
doubleLiteral,
doubleInfinity,
naturalLiteral,
integerLiteral,
_Optional,
_if,
_then,
_else,
_let,
_in,
_as,
_using,
_merge,
_toMap,
_assert,
_Some,
_None,
_NaturalFold,
_NaturalBuild,
_NaturalIsZero,
_NaturalEven,
_NaturalOdd,
_NaturalToInteger,
_NaturalShow,
_NaturalSubtract,
_IntegerClamp,
_IntegerNegate,
_IntegerShow,
_IntegerToDouble,
_DoubleShow,
_ListBuild,
_ListFold,
_ListLength,
_ListHead,
_ListLast,
_ListIndexed,
_ListReverse,
_OptionalFold,
_OptionalBuild,
_Bool,
_Natural,
_Integer,
_Double,
_Text,
_TextShow,
_List,
_True,
_False,
_NaN,
_Type,
_Kind,
_Sort,
_Location,
_equal,
_or,
_plus,
_textAppend,
_listAppend,
_and,
_times,
_doubleEqual,
_notEqual,
_dot,
_openBrace,
_closeBrace,
_openBracket,
_closeBracket,
_openAngle,
_closeAngle,
_bar,
_comma,
_openParens,
_closeParens,
_colon,
_at,
_equivalent,
_missing,
_importAlt,
_combine,
_combineTypes,
_prefer,
_lambda,
_forall,
_arrow,
_doubleColon,
) where
import Dhall.Parser.Combinators
import Control.Applicative (Alternative(..), optional)
import Data.Bits ((.&.))
import Data.Functor (void, ($>))
import Data.Semigroup (Semigroup(..))
import Data.Text (Text)
import Dhall.Syntax
import Dhall.Set (Set)
import Prelude hiding (const, pi)
import Text.Parser.Combinators (choice, try, (<?>))
import qualified Control.Monad
import qualified Data.Char as Char
import qualified Data.Foldable
import qualified Data.HashSet
import qualified Data.List.NonEmpty
import qualified Data.Text
import qualified Dhall.Set
import qualified Network.URI.Encode as URI.Encode
import qualified Text.Megaparsec
import qualified Text.Megaparsec.Char.Lexer
import qualified Text.Parser.Char
import qualified Text.Parser.Combinators
import Numeric.Natural (Natural)
import Prelude hiding (const, pi)
validCodepoint :: Int -> Bool
validCodepoint c =
not (category == Char.Surrogate
|| c .&. 0xFFFE == 0xFFFE
|| c .&. 0xFFFF == 0xFFFF)
where
category = Char.generalCategory (Char.chr c)
whitespace :: Parser ()
whitespace = Text.Parser.Combinators.skipMany whitespaceChunk
nonemptyWhitespace :: Parser ()
nonemptyWhitespace = Text.Parser.Combinators.skipSome whitespaceChunk
alpha :: Char -> Bool
alpha c = ('\x41' <= c && c <= '\x5A') || ('\x61' <= c && c <= '\x7A')
digit :: Char -> Bool
digit c = '\x30' <= c && c <= '\x39'
alphaNum :: Char -> Bool
alphaNum c = alpha c || digit c
hexdig :: Char -> Bool
hexdig c =
('0' <= c && c <= '9')
|| ('A' <= c && c <= 'F')
|| ('a' <= c && c <= 'f')
signPrefix :: Num a => Parser (a -> a)
signPrefix = (do
let positive = fmap (\_ -> id ) (char '+')
let negative = fmap (\_ -> negate) (char '-')
positive <|> negative ) <?> "sign"
doubleLiteral :: Parser Double
doubleLiteral = (do
sign <- signPrefix <|> pure id
a <- Text.Megaparsec.Char.Lexer.float
return (sign a) ) <?> "literal"
doubleInfinity :: Parser Double
doubleInfinity = (do
let negative = fmap (\_ -> negate) (char '-')
sign <- negative <|> pure id
a <- text "Infinity" >> return (1.0/0.0)
return (sign a) ) <?> "literal"
integerLiteral :: Parser Integer
integerLiteral = (do
sign <- signPrefix
a <- naturalLiteral
return (sign (fromIntegral a)) ) <?> "literal"
naturalLiteral :: Parser Natural
naturalLiteral = (do
a <- try (char '0' >> char 'x' >> Text.Megaparsec.Char.Lexer.hexadecimal)
<|> decimal
<|> (char '0' $> 0)
return a ) <?> "literal"
where
decimal = do
n <- headDigit
ns <- many tailDigit
return (mkNum (n:ns))
where
headDigit = decimalDigit nonZeroDigit <?> "non-zero digit"
where
nonZeroDigit c = '1' <= c && c <= '9'
tailDigit = decimalDigit digit <?> "digit"
decimalDigit predicate = do
c <- Text.Parser.Char.satisfy predicate
return (fromIntegral (Char.ord c - Char.ord '0'))
mkNum = Data.Foldable.foldl' step 0
where
step acc x = acc * 10 + x
identifier :: Parser Var
identifier = do
x <- label
let indexed = try $ do
whitespace
_at
whitespace
n <- naturalLiteral
return (fromIntegral n)
n <- indexed <|> pure 0
return (V x n)
whitespaceChunk :: Parser ()
whitespaceChunk =
choice
[ void (Dhall.Parser.Combinators.takeWhile1 predicate)
, void (Text.Parser.Char.text "\r\n" <?> "newline")
, lineComment
, blockComment
] <?> "whitespace"
where
predicate c = c == ' ' || c == '\t' || c == '\n'
hexNumber :: Parser Int
hexNumber = choice [ hexDigit, hexUpper, hexLower ]
where
hexDigit = do
c <- Text.Parser.Char.satisfy predicate
return (Char.ord c - Char.ord '0')
where
predicate c = '0' <= c && c <= '9'
hexUpper = do
c <- Text.Parser.Char.satisfy predicate
return (10 + Char.ord c - Char.ord 'A')
where
predicate c = 'A' <= c && c <= 'F'
hexLower = do
c <- Text.Parser.Char.satisfy predicate
return (10 + Char.ord c - Char.ord 'a')
where
predicate c = 'a' <= c && c <= 'f'
lineComment :: Parser ()
lineComment = do
_ <- text "--"
let predicate c = ('\x20' <= c && c <= '\x10FFFF') || c == '\t'
_ <- Dhall.Parser.Combinators.takeWhile predicate
endOfLine
return ()
where
endOfLine =
( void (Text.Parser.Char.char '\n' )
<|> void (Text.Parser.Char.text "\r\n")
) <?> "newline"
blockComment :: Parser ()
blockComment = do
_ <- text "{-"
blockCommentContinue
blockCommentChunk :: Parser ()
blockCommentChunk =
choice
[ blockComment
, characters
, character
, endOfLine
]
where
characters = void (Dhall.Parser.Combinators.takeWhile1 predicate)
where
predicate c =
'\x20' <= c && c <= '\x10FFFF' && c /= '-' && c /= '{'
|| c == '\n'
|| c == '\t'
character = void (Text.Parser.Char.satisfy predicate)
where
predicate c = '\x20' <= c && c <= '\x10FFFF' || c == '\n' || c == '\t'
endOfLine = void (Text.Parser.Char.text "\r\n" <?> "newline")
blockCommentContinue :: Parser ()
blockCommentContinue = endOfComment <|> continue
where
endOfComment = void (text "-}")
continue = do
blockCommentChunk
blockCommentContinue
simpleLabel :: Bool -> Parser Text
simpleLabel allowReserved = try (do
c <- Text.Parser.Char.satisfy headCharacter
rest <- Dhall.Parser.Combinators.takeWhile tailCharacter
let t = Data.Text.cons c rest
Control.Monad.guard (allowReserved || not (Data.HashSet.member t reservedIdentifiers))
return t )
where
headCharacter :: Char -> Bool
headCharacter c = alpha c || c == '_'
tailCharacter :: Char -> Bool
tailCharacter c = alphaNum c || c == '_' || c == '-' || c == '/'
backtickLabel :: Parser Text
backtickLabel = do
_ <- char '`'
t <- takeWhile1 predicate
_ <- char '`'
return t
where
predicate c =
'\x20' <= c && c <= '\x5F'
|| '\x61' <= c && c <= '\x7E'
labels :: Parser (Set Text)
labels = do
_openBrace
whitespace
xs <- nonEmptyLabels <|> emptyLabels
_closeBrace
return xs
where
emptyLabels = pure Dhall.Set.empty
nonEmptyLabels = do
x <- anyLabel
whitespace
xs <- many (do _comma; whitespace; l <- anyLabel; whitespace; return l)
noDuplicates (x : xs)
label :: Parser Text
label = backtickLabel <|> simpleLabel False <?> "label"
anyLabel :: Parser Text
anyLabel = (do
t <- backtickLabel <|> simpleLabel True
return t ) <?> "any label"
bashEnvironmentVariable :: Parser Text
bashEnvironmentVariable = satisfy predicate0 <> star (satisfy predicate1)
where
predicate0 c = alpha c || c == '_'
predicate1 c = alphaNum c || c == '_'
posixEnvironmentVariable :: Parser Text
posixEnvironmentVariable = plus posixEnvironmentVariableCharacter
posixEnvironmentVariableCharacter :: Parser Text
posixEnvironmentVariableCharacter =
escapeCharacter <|> satisfy predicate1
where
escapeCharacter = do
_ <- char '\\'
c <- Text.Parser.Char.satisfy (`elem` ("\"\\abfnrtv" :: String))
case c of
'"' -> return "\""
'\\' -> return "\\"
'a' -> return "\a"
'b' -> return "\b"
'f' -> return "\f"
'n' -> return "\n"
'r' -> return "\r"
't' -> return "\t"
'v' -> return "\v"
_ -> empty
predicate1 c =
('\x20' <= c && c <= '\x21')
|| ('\x23' <= c && c <= '\x3C')
|| ('\x3E' <= c && c <= '\x5B')
|| ('\x5D' <= c && c <= '\x7E')
quotedPathCharacter :: Char -> Bool
quotedPathCharacter c =
('\x20' <= c && c <= '\x21')
|| ('\x23' <= c && c <= '\x2E')
|| ('\x30' <= c && c <= '\x10FFFF')
data ComponentType = URLComponent | FileComponent
pathComponent :: ComponentType -> Parser Text
pathComponent componentType = do
_ <- "/" :: Parser Text
let pathData =
case componentType of
FileComponent -> do
Text.Megaparsec.takeWhile1P Nothing Dhall.Syntax.pathCharacter
URLComponent -> do
star pchar
let quotedPathData = do
_ <- char '"'
t <- Text.Megaparsec.takeWhile1P Nothing quotedPathCharacter
_ <- char '"'
case componentType of
FileComponent -> do
return t
URLComponent -> do
return (URI.Encode.encodeText t)
quotedPathData <|> pathData
file_ :: ComponentType -> Parser File
file_ componentType = do
let emptyPath =
case componentType of
URLComponent -> pure (pure "")
FileComponent -> empty
path <- Data.List.NonEmpty.some1 (pathComponent componentType) <|> emptyPath
let directory = Directory (reverse (Data.List.NonEmpty.init path))
let file = Data.List.NonEmpty.last path
return (File {..})
scheme_ :: Parser Scheme
scheme_ =
("http" :: Parser Text)
*> ((("s" :: Parser Text) *> pure HTTPS) <|> pure HTTP)
<* ("://" :: Parser Text)
httpRaw :: Parser URL
httpRaw = do
scheme <- scheme_
authority <- authority_
path <- file_ URLComponent
query <- optional (("?" :: Parser Text) *> query_)
let headers = Nothing
return (URL {..})
authority_ :: Parser Text
authority_ = option (try (userinfo <> "@")) <> host <> option (":" <> port)
userinfo :: Parser Text
userinfo = star (satisfy predicate <|> pctEncoded)
where
predicate c = unreserved c || subDelims c || c == ':'
host :: Parser Text
host = choice [ ipLiteral, try ipV4Address, domain ]
port :: Parser Text
port = star (satisfy digit)
ipLiteral :: Parser Text
ipLiteral = "[" <> (ipV6Address <|> ipVFuture) <> "]"
ipVFuture :: Parser Text
ipVFuture = "v" <> plus (satisfy hexdig) <> "." <> plus (satisfy predicate)
where
predicate c = unreserved c || subDelims c || c == ':'
ipV6Address :: Parser Text
ipV6Address =
choice
[ try alternative0
, try alternative1
, try alternative2
, try alternative3
, try alternative4
, try alternative5
, try alternative6
, try alternative7
, alternative8
]
where
alternative0 = count 6 (h16 <> ":") <> ls32
alternative1 = "::" <> count 5 (h16 <> ":") <> ls32
alternative2 = option h16 <> "::" <> count 4 (h16 <> ":") <> ls32
alternative3 =
option (h16 <> range 0 1 (try (":" <> h16)))
<> "::"
<> count 3 (h16 <> ":")
<> ls32
alternative4 =
option (h16 <> range 0 2 (try (":" <> h16)))
<> "::"
<> count 2 (h16 <> ":")
<> ls32
alternative5 =
option (h16 <> range 0 3 (try (":" <> h16)))
<> "::"
<> h16
<> ":"
<> ls32
alternative6 =
option (h16 <> range 0 4 (try (":" <> h16))) <> "::" <> ls32
alternative7 =
option (h16 <> range 0 5 (try (":" <> h16))) <> "::" <> h16
alternative8 =
option (h16 <> range 0 6 (try (":" <> h16))) <> "::"
h16 :: Parser Text
h16 = range 1 3 (satisfy hexdig)
ls32 :: Parser Text
ls32 = try (h16 <> ":" <> h16) <|> ipV4Address
ipV4Address :: Parser Text
ipV4Address = decOctet <> "." <> decOctet <> "." <> decOctet <> "." <> decOctet
decOctet :: Parser Text
decOctet =
choice
[ try alternative4
, try alternative3
, try alternative2
, try alternative1
, alternative0
]
where
alternative0 = satisfy digit
alternative1 = satisfy predicate <> satisfy digit
where
predicate c = '\x31' <= c && c <= '\x39'
alternative2 = "1" <> count 2 (satisfy digit)
alternative3 = "2" <> satisfy predicate <> satisfy digit
where
predicate c = '\x30' <= c && c <= '\x34'
alternative4 = "25" <> satisfy predicate
where
predicate c = '\x30' <= c && c <= '\x35'
domain :: Parser Text
domain = domainLabel <> star ("." <> domainLabel ) <> option "."
domainLabel :: Parser Text
domainLabel = plus alphaNum_ <> star (plus "-" <> plus alphaNum_)
where
alphaNum_ = satisfy alphaNum
pchar :: Parser Text
pchar = satisfy predicate <|> pctEncoded
where
predicate c = unreserved c || subDelims c || c == ':' || c == '@'
query_ :: Parser Text
query_ = star (pchar <|> satisfy predicate)
where
predicate c = c == '/' || c == '?'
pctEncoded :: Parser Text
pctEncoded = "%" <> count 2 (satisfy hexdig)
subDelims :: Char -> Bool
subDelims c = c `elem` ("!$&'()*+,;=" :: String)
unreserved :: Char -> Bool
unreserved c =
alphaNum c || c == '-' || c == '.' || c == '_' || c == '~'
text :: Data.Text.Text -> Parser Text
text t = Text.Parser.Char.text t <?> Data.Text.unpack t
{-# INLINE text #-}
char :: Char -> Parser Char
char c = Text.Parser.Char.char c <?> [ c ]
{-# INLINE char #-}
reserved :: Data.Text.Text -> Parser ()
reserved x = void (text x)
reservedChar :: Char -> Parser ()
reservedChar c = void (char c)
builtin :: Data.Text.Text -> Parser ()
builtin x = reserved x <?> "built-in"
{-# INLINE builtin #-}
operator :: Data.Text.Text -> Parser ()
operator x = reserved x <?> "operator"
{-# INLINE operator #-}
operatorChar :: Char -> Parser ()
operatorChar x = reservedChar x <?> "operator"
{-# INLINE operatorChar #-}
keyword :: Data.Text.Text -> Parser ()
keyword x = try (void (text x)) <?> "keyword"
_if :: Parser ()
_if = keyword "if"
_then :: Parser ()
_then = keyword "then"
_else :: Parser ()
_else = keyword "else"
_let :: Parser ()
_let = keyword "let"
_in :: Parser ()
_in = keyword "in"
_as :: Parser ()
_as = keyword "as"
_using :: Parser ()
_using = keyword "using"
_merge :: Parser ()
_merge = keyword "merge"
_toMap :: Parser ()
_toMap = keyword "toMap"
_assert :: Parser ()
_assert = keyword "assert"
_Some :: Parser ()
_Some = keyword "Some"
_None :: Parser ()
_None = builtin "None"
_NaturalFold :: Parser ()
_NaturalFold = builtin "Natural/fold"
_NaturalBuild :: Parser ()
_NaturalBuild = builtin "Natural/build"
_NaturalIsZero :: Parser ()
_NaturalIsZero = builtin "Natural/isZero"
_NaturalEven :: Parser ()
_NaturalEven = builtin "Natural/even"
_NaturalOdd :: Parser ()
_NaturalOdd = builtin "Natural/odd"
_NaturalToInteger :: Parser ()
_NaturalToInteger = builtin "Natural/toInteger"
_NaturalShow :: Parser ()
_NaturalShow = builtin "Natural/show"
_NaturalSubtract :: Parser ()
_NaturalSubtract = builtin "Natural/subtract"
_IntegerClamp :: Parser ()
_IntegerClamp = builtin "Integer/clamp"
_IntegerNegate :: Parser ()
_IntegerNegate = builtin "Integer/negate"
_IntegerShow :: Parser ()
_IntegerShow = builtin "Integer/show"
_IntegerToDouble :: Parser ()
_IntegerToDouble = builtin "Integer/toDouble"
_DoubleShow :: Parser ()
_DoubleShow = builtin "Double/show"
_ListBuild :: Parser ()
_ListBuild = builtin "List/build"
_ListFold :: Parser ()
_ListFold = builtin "List/fold"
_ListLength :: Parser ()
_ListLength = builtin "List/length"
_ListHead :: Parser ()
_ListHead = builtin "List/head"
_ListLast :: Parser ()
_ListLast = builtin "List/last"
_ListIndexed :: Parser ()
_ListIndexed = builtin "List/indexed"
_ListReverse :: Parser ()
_ListReverse = builtin "List/reverse"
_OptionalFold :: Parser ()
_OptionalFold = builtin "Optional/fold"
_OptionalBuild :: Parser ()
_OptionalBuild = builtin "Optional/build"
_Bool :: Parser ()
_Bool = builtin "Bool"
_Optional :: Parser ()
_Optional = builtin "Optional"
_Natural :: Parser ()
_Natural = builtin "Natural"
_Integer :: Parser ()
_Integer = builtin "Integer"
_Double :: Parser ()
_Double = builtin "Double"
_Text :: Parser ()
_Text = builtin "Text"
_TextShow :: Parser ()
_TextShow = builtin "Text/show"
_List :: Parser ()
_List = builtin "List"
_True :: Parser ()
_True = builtin "True"
_False :: Parser ()
_False = builtin "False"
_NaN :: Parser ()
_NaN = builtin "NaN"
_Type :: Parser ()
_Type = builtin "Type"
_Kind :: Parser ()
_Kind = builtin "Kind"
_Sort :: Parser ()
_Sort = builtin "Sort"
_Location :: Parser ()
_Location = builtin "Location"
_equal :: Parser ()
_equal = reservedChar '='
_or :: Parser ()
_or = operator "||"
_plus :: Parser ()
_plus = operatorChar '+'
_textAppend :: Parser ()
_textAppend = operator "++"
_listAppend :: Parser ()
_listAppend = operatorChar '#'
_and :: Parser ()
_and = operator "&&"
_times :: Parser ()
_times = operatorChar '*'
_doubleEqual :: Parser ()
_doubleEqual = operator "=="
_notEqual :: Parser ()
_notEqual = operator "!="
_dot :: Parser ()
_dot = operatorChar '.'
_openBrace :: Parser ()
_openBrace = reservedChar '{'
_closeBrace :: Parser ()
_closeBrace = reservedChar '}'
_openBracket :: Parser ()
_openBracket = reservedChar '['
_closeBracket :: Parser ()
_closeBracket = reservedChar ']'
_openAngle :: Parser ()
_openAngle = reservedChar '<'
_closeAngle :: Parser ()
_closeAngle = reservedChar '>'
_bar :: Parser ()
_bar = reservedChar '|'
_comma :: Parser ()
_comma = reservedChar ',' <?> "\',\'"
_openParens :: Parser ()
_openParens = reservedChar '('
_closeParens :: Parser ()
_closeParens = reservedChar ')'
_colon :: Parser ()
_colon = reservedChar ':'
_at :: Parser ()
_at = reservedChar '@' <?> "\"@\""
_equivalent :: Parser ()
_equivalent = (void (char '≡' <?> "\"≡\"") <|> void (text "===")) <?> "operator"
_missing :: Parser ()
_missing =
keyword "missing"
*> Text.Megaparsec.notFollowedBy (Text.Parser.Char.satisfy tailCharacter)
_importAlt :: Parser ()
_importAlt = operatorChar '?'
_combine :: Parser ()
_combine = (void (char '∧' <?> "\"∧\"") <|> void (text "/\\")) <?> "operator"
_combineTypes :: Parser ()
_combineTypes = (void (char '⩓' <?> "\"⩓\"") <|> void (text "//\\\\")) <?> "operator"
_prefer :: Parser ()
_prefer = (void (char '⫽' <?> "\"⫽\"") <|> void (text "//")) <?> "operator"
_lambda :: Parser ()
_lambda = void (Text.Parser.Char.satisfy predicate) <?> "\\"
where
predicate 'λ' = True
predicate '\\' = True
predicate _ = False
_forall :: Parser ()
_forall = (void (char '∀' <?> "\"∀\"") <|> void (text "forall")) <?> "forall"
_arrow :: Parser ()
_arrow = (void (char '→' <?> "\"→\"") <|> void (text "->")) <?> "->"
_doubleColon :: Parser ()
_doubleColon = operator "::"