{-# LANGUAGE CPP #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Dhall.Parser.Expression where
import Control.Applicative (Alternative(..), optional)
import Data.ByteArray.Encoding (Base(..))
import Data.Foldable (foldl')
import Data.Functor (void)
import Data.List.NonEmpty (NonEmpty(..))
import Data.Semigroup (Semigroup(..))
import Data.Text (Text)
import Dhall.Syntax
import Dhall.Src (Src(..))
import Prelude hiding (const, pi)
import Text.Parser.Combinators (choice, try, (<?>))
import qualified Control.Monad
import qualified Data.ByteArray.Encoding
import qualified Data.ByteString
import qualified Data.Char as Char
import qualified Data.Foldable
import qualified Data.List
import qualified Data.List.NonEmpty
import qualified Data.Sequence
import qualified Data.Text
import qualified Data.Text.Encoding
import qualified Dhall.Crypto
import qualified Text.Megaparsec
import Dhall.Parser.Combinators
import Dhall.Parser.Token
getSourcePos :: Text.Megaparsec.MonadParsec e s m =>
m Text.Megaparsec.SourcePos
getSourcePos =
Text.Megaparsec.getSourcePos
{-# INLINE getSourcePos #-}
getOffset :: Text.Megaparsec.MonadParsec e s m => m Int
getOffset = Text.Megaparsec.stateOffset <$> Text.Megaparsec.getParserState
{-# INLINE getOffset #-}
setOffset :: Text.Megaparsec.MonadParsec e s m => Int -> m ()
setOffset o = Text.Megaparsec.updateParserState $ \state ->
state
{ Text.Megaparsec.stateOffset = o }
{-# INLINE setOffset #-}
src :: Parser a -> Parser Src
src parser = do
before <- getSourcePos
(tokens, _) <- Text.Megaparsec.match parser
after <- getSourcePos
return (Src before after tokens)
noted :: Parser (Expr Src a) -> Parser (Expr Src a)
noted parser = do
before <- getSourcePos
(tokens, e) <- Text.Megaparsec.match parser
after <- getSourcePos
let src₀ = Src before after tokens
case e of
Note src₁ _ | laxSrcEq src₀ src₁ -> return e
_ -> return (Note src₀ e)
completeExpression :: Parser a -> Parser (Expr Src a)
completeExpression embedded = completeExpression_
where
Parsers {..} = parsers embedded
importExpression :: Parser a -> Parser (Expr Src a)
importExpression embedded = importExpression_
where
Parsers {..} = parsers embedded
data Parsers a = Parsers
{ completeExpression_ :: Parser (Expr Src a)
, importExpression_ :: Parser (Expr Src a)
}
parsers :: Parser a -> Parsers a
parsers embedded = Parsers {..}
where
completeExpression_ = whitespace *> expression <* whitespace
expression =
noted
( choice
[ alternative0
, alternative1
, alternative2
, alternative3
, alternative4
, alternative5
]
) <?> "expression"
where
alternative0 = do
_lambda
whitespace
_openParens
whitespace
a <- label
whitespace
_colon
nonemptyWhitespace
b <- expression
whitespace
_closeParens
whitespace
_arrow
whitespace
c <- expression
return (Lam a b c)
alternative1 = do
try (_if *> nonemptyWhitespace)
a <- expression
whitespace
try (_then *> nonemptyWhitespace)
b <- expression
whitespace
try (_else *> nonemptyWhitespace)
c <- expression
return (BoolIf a b c)
alternative2 = do
let binding = do
src0 <- try (_let *> src nonemptyWhitespace)
c <- label
src1 <- src whitespace
d <- optional (do
_colon
src2 <- src nonemptyWhitespace
e <- expression
whitespace
return (Just src2, e) )
_equal
src3 <- src whitespace
f <- expression
whitespace
return (Binding (Just src0) c (Just src1) d (Just src3) f)
as <- Data.List.NonEmpty.some1 binding
try (_in *> nonemptyWhitespace)
b <- expression
return (Dhall.Syntax.wrapInLets as b)
alternative3 = do
try (_forall *> whitespace *> _openParens)
whitespace
a <- label
whitespace
_colon
nonemptyWhitespace
b <- expression
whitespace
_closeParens
whitespace
_arrow
whitespace
c <- expression
return (Pi a b c)
alternative4 = do
try (_assert *> whitespace *> _colon)
nonemptyWhitespace
a <- expression
return (Assert a)
alternative5 = do
a <- operatorExpression
let alternative4A = do
_arrow
whitespace
b <- expression
whitespace
return (Pi "_" a b)
let alternative4B = do
_colon
nonemptyWhitespace
b <- expression
case shallowDenote a of
ListLit Nothing [] ->
return (ListLit (Just b) [])
Merge c d Nothing ->
return (Merge c d (Just b))
ToMap c Nothing ->
return (ToMap c (Just b))
_ -> return (Annot a b)
alternative4A <|> alternative4B <|> pure a
operatorExpression =
foldr makeOperatorExpression applicationExpression operatorParsers
makeOperatorExpression operatorParser subExpression =
noted (do
a <- subExpression
whitespace
b <- Text.Megaparsec.many $ do
op <- operatorParser
r <- subExpression
whitespace
return (\l -> l `op` r)
return (foldl' (\x f -> f x) a b))
operatorParsers :: [Parser (Expr s a -> Expr s a -> Expr s a)]
operatorParsers =
[ ImportAlt <$ _importAlt <* nonemptyWhitespace
, BoolOr <$ _or <* whitespace
, NaturalPlus <$ _plus <* nonemptyWhitespace
, TextAppend <$ _textAppend <* whitespace
, ListAppend <$ _listAppend <* whitespace
, BoolAnd <$ _and <* whitespace
, Combine <$ _combine <* whitespace
, Prefer <$ _prefer <* whitespace
, CombineTypes <$ _combineTypes <* whitespace
, NaturalTimes <$ _times <* whitespace
, BoolEQ <$ _doubleEqual <* whitespace
, BoolNE <$ _notEqual <* whitespace
, Equivalent <$ _equivalent <* whitespace
]
applicationExpression = do
f <- (Some <$ try (_Some <* nonemptyWhitespace))
<|> return id
a <- noted importExpression_
bs <- Text.Megaparsec.many . try $ do
(sep, _) <- Text.Megaparsec.match nonemptyWhitespace
b <- importExpression_
return (sep, b)
return (foldl' app (f a) bs)
where
app a (sep, b)
| Note (Src left _ bytesL) _ <- a
, Note (Src _ right bytesR) _ <- b
= Note (Src left right (bytesL <> sep <> bytesR)) (App a b)
app a (_, b) =
App a b
importExpression_ = noted (choice [ alternative0, alternative1 ])
where
alternative0 = do
a <- embedded
return (Embed a)
alternative1 = completionExpression
completionExpression = noted (do
a <- selectorExpression
mb <- optional (do
try (whitespace *> _doubleColon)
whitespace
selectorExpression )
case mb of
Nothing -> return a
Just b -> return (RecordCompletion a b) )
selectorExpression = noted (do
a <- primitiveExpression
let recordType = _openParens *> whitespace *> expression <* whitespace <* _closeParens
let field x e = Field e x
let projectBySet xs e = Project e (Left xs)
let projectByExpression xs e = Project e (Right xs)
let alternatives =
fmap field anyLabel
<|> fmap projectBySet labels
<|> fmap projectByExpression recordType
b <- Text.Megaparsec.many (try (whitespace *> _dot *> whitespace *> alternatives))
return (foldl' (\e k -> k e) a b) )
primitiveExpression =
noted
( choice
[ alternative00
, alternative01
, alternative02
, alternative03
, alternative04
, alternative05
, alternative06
, alternative07
, alternative08
, alternative37
, alternative09
, builtin
]
)
<|> alternative38
where
alternative00 = do
n <- getOffset
a <- try doubleLiteral
b <- if isInfinite a
then setOffset n *> fail "double out of bounds"
else return a
return (DoubleLit (DhallDouble b))
alternative01 = do
a <- try naturalLiteral
return (NaturalLit a)
alternative02 = do
a <- try integerLiteral
return (IntegerLit a)
alternative03 = textLiteral
alternative04 = (do
_openBrace
whitespace
_ <- optional (_comma *> whitespace)
a <- recordTypeOrLiteral
whitespace
_closeBrace
return a ) <?> "literal"
alternative05 = unionType
alternative06 = listLiteral
alternative07 = do
try (_merge *> nonemptyWhitespace)
a <- importExpression_
nonemptyWhitespace
b <- importExpression_ <?> "second argument to ❰merge❱"
return (Merge a b Nothing)
alternative08 = do
try (_toMap *> nonemptyWhitespace)
a <- importExpression_
return (ToMap a Nothing)
alternative09 = do
a <- try doubleInfinity
return (DoubleLit (DhallDouble a))
builtin = do
let predicate c =
c == 'N'
|| c == 'I'
|| c == 'D'
|| c == 'L'
|| c == 'O'
|| c == 'B'
|| c == 'S'
|| c == 'T'
|| c == 'F'
|| c == 'K'
let nan = DhallDouble (0.0/0.0)
c <- Text.Megaparsec.lookAhead (Text.Megaparsec.satisfy predicate)
case c of
'N' ->
choice
[ NaturalFold <$ _NaturalFold
, NaturalBuild <$ _NaturalBuild
, NaturalIsZero <$ _NaturalIsZero
, NaturalEven <$ _NaturalEven
, NaturalOdd <$ _NaturalOdd
, NaturalSubtract <$ _NaturalSubtract
, NaturalToInteger <$ _NaturalToInteger
, NaturalShow <$ _NaturalShow
, Natural <$ _Natural
, None <$ _None
, DoubleLit nan <$ _NaN
]
'I' ->
choice
[ IntegerClamp <$ _IntegerClamp
, IntegerNegate <$ _IntegerNegate
, IntegerShow <$ _IntegerShow
, IntegerToDouble <$ _IntegerToDouble
, Integer <$ _Integer
]
'D' ->
choice
[ DoubleShow <$ _DoubleShow
, Double <$ _Double
]
'L' ->
choice
[ ListBuild <$ _ListBuild
, ListFold <$ _ListFold
, ListLength <$ _ListLength
, ListHead <$ _ListHead
, ListLast <$ _ListLast
, ListIndexed <$ _ListIndexed
, ListReverse <$ _ListReverse
, List <$ _List
]
'O' ->
choice
[ OptionalFold <$ _OptionalFold
, OptionalBuild <$ _OptionalBuild
, Optional <$ _Optional
]
'B' -> Bool <$ _Bool
'S' -> Const Sort <$ _Sort
'T' ->
choice
[ TextShow <$ _TextShow
, Text <$ _Text
, BoolLit True <$ _True
, Const Type <$ _Type
]
'F' -> BoolLit False <$ _False
'K' -> Const Kind <$ _Kind
_ -> empty
alternative37 = do
a <- identifier
return (Var a)
alternative38 = do
_openParens
whitespace
a <- expression
whitespace
_closeParens
return a
doubleQuotedChunk =
choice
[ interpolation
, unescapedCharacterFast
, unescapedCharacterSlow
, escapedCharacter
]
where
interpolation = do
_ <- text "${"
e <- completeExpression_
_ <- char '}'
return (Chunks [(mempty, e)] mempty)
unescapedCharacterFast = do
t <- Text.Megaparsec.takeWhile1P Nothing predicate
return (Chunks [] t)
where
predicate c =
( ('\x20' <= c && c <= '\x21' )
|| ('\x23' <= c && c <= '\x5B' )
|| ('\x5D' <= c && c <= '\x10FFFF')
) && c /= '$'
unescapedCharacterSlow = do
_ <- char '$'
return (Chunks [] "$")
escapedCharacter = do
_ <- char '\\'
c <- choice
[ quotationMark
, dollarSign
, backSlash
, forwardSlash
, backSpace
, formFeed
, lineFeed
, carriageReturn
, tab
, unicode
]
return (Chunks [] (Data.Text.singleton c))
where
quotationMark = char '"'
dollarSign = char '$'
backSlash = char '\\'
forwardSlash = char '/'
backSpace = do _ <- char 'b'; return '\b'
formFeed = do _ <- char 'f'; return '\f'
lineFeed = do _ <- char 'n'; return '\n'
carriageReturn = do _ <- char 'r'; return '\r'
tab = do _ <- char 't'; return '\t'
unicode = do
_ <- char 'u';
let toNumber = Data.List.foldl' (\x y -> x * 16 + y) 0
let fourCharacterEscapeSequence = do
ns <- Control.Monad.replicateM 4 hexNumber
let number = toNumber ns
Control.Monad.guard (validCodepoint number)
<|> fail "Invalid Unicode code point"
return number
let bracedEscapeSequence = do
_ <- char '{'
ns <- some hexNumber
let number = toNumber ns
Control.Monad.guard (number <= 0x10FFFD && validCodepoint number)
<|> fail "Invalid Unicode code point"
_ <- char '}'
return number
n <- bracedEscapeSequence <|> fourCharacterEscapeSequence
return (Char.chr n)
doubleQuotedLiteral = do
_ <- char '"'
chunks <- Text.Megaparsec.many doubleQuotedChunk
_ <- char '"'
return (mconcat chunks)
singleQuoteContinue =
choice
[ escapeSingleQuotes
, interpolation
, escapeInterpolation
, endLiteral
, unescapedCharacterFast
, unescapedCharacterSlow
, tab
, endOfLine
]
where
escapeSingleQuotes = do
_ <- "'''" :: Parser Text
b <- singleQuoteContinue
return ("''" <> b)
interpolation = do
_ <- text "${"
a <- completeExpression_
_ <- char '}'
b <- singleQuoteContinue
return (Chunks [(mempty, a)] mempty <> b)
escapeInterpolation = do
_ <- text "''${"
b <- singleQuoteContinue
return ("${" <> b)
endLiteral = do
_ <- text "''"
return mempty
unescapedCharacterFast = do
a <- Text.Megaparsec.takeWhile1P Nothing predicate
b <- singleQuoteContinue
return (Chunks [] a <> b)
where
predicate c =
('\x20' <= c && c <= '\x10FFFF') && c /= '$' && c /= '\''
unescapedCharacterSlow = do
a <- satisfy predicate
b <- singleQuoteContinue
return (Chunks [] a <> b)
where
predicate c = c == '$' || c == '\''
endOfLine = do
a <- "\n" <|> "\r\n"
b <- singleQuoteContinue
return (Chunks [] a <> b)
tab = do
_ <- char '\t' <?> "tab"
b <- singleQuoteContinue
return ("\t" <> b)
singleQuoteLiteral = do
_ <- text "''"
_ <- endOfLine
a <- singleQuoteContinue
return (toDoubleQuoted a)
where
endOfLine = (void (char '\n') <|> void (text "\r\n")) <?> "newline"
textLiteral = (do
literal <- doubleQuotedLiteral <|> singleQuoteLiteral
return (TextLit literal) ) <?> "literal"
recordTypeOrLiteral =
choice
[ alternative0
, alternative1
, alternative2
]
where
alternative0 = do
_equal
return (RecordLit mempty)
alternative1 = nonEmptyRecordTypeOrLiteral
alternative2 = return (Record mempty)
nonEmptyRecordTypeOrLiteral = do
a <- anyLabel
whitespace
let nonEmptyRecordType = do
_colon
nonemptyWhitespace
b <- expression
whitespace
e <- Text.Megaparsec.many (do
_comma
whitespace
c <- anyLabel
whitespace
_colon
nonemptyWhitespace
d <- expression
whitespace
return (c, d) )
m <- toMap ((a, b) : e)
return (Record m)
let nonEmptyRecordLiteral = do
_equal
whitespace
b <- expression
whitespace
e <- Text.Megaparsec.many (do
_comma
whitespace
c <- anyLabel
whitespace
_equal
whitespace
d <- expression
whitespace
return (c, d) )
m <- toMap ((a, b) : e)
return (RecordLit m)
nonEmptyRecordType <|> nonEmptyRecordLiteral
unionType = (do
_openAngle
whitespace
_ <- optional (_bar *> whitespace)
let unionTypeEntry = do
a <- anyLabel
whitespace
b <- optional (_colon *> nonemptyWhitespace *> expression <* whitespace)
return (a, b)
kvs <- Text.Megaparsec.sepBy unionTypeEntry (_bar *> whitespace)
m <- toMap kvs
_closeAngle
return (Union m) ) <?> "literal"
listLiteral = (do
_openBracket
whitespace
_ <- optional (_comma *> whitespace)
a <- Text.Megaparsec.sepBy (expression <* whitespace) (_comma *> whitespace)
_closeBracket
return (ListLit Nothing (Data.Sequence.fromList a)) ) <?> "literal"
env :: Parser ImportType
env = do
_ <- text "env:"
a <- (alternative0 <|> alternative1)
return (Env a)
where
alternative0 = bashEnvironmentVariable
alternative1 = do
_ <- char '"'
a <- posixEnvironmentVariable
_ <- char '"'
return a
localOnly :: Parser ImportType
localOnly =
choice
[ parentPath
, herePath
, homePath
, try absolutePath
]
where
parentPath = do
_ <- ".." :: Parser Text
file <- file_ FileComponent
return (Local Parent file)
herePath = do
_ <- "." :: Parser Text
file <- file_ FileComponent
return (Local Here file)
homePath = do
_ <- "~" :: Parser Text
file <- file_ FileComponent
return (Local Home file)
absolutePath = do
file <- file_ FileComponent
return (Local Absolute file)
local :: Parser ImportType
local = do
a <- localOnly
return a
http :: Parser ImportType
http = do
url <- httpRaw
headers <- optional (do
try (whitespace *> _using *> nonemptyWhitespace)
importExpression import_ )
return (Remote (url { headers }))
missing :: Parser ImportType
missing = do
_missing
return Missing
importType_ :: Parser ImportType
importType_ = do
let predicate c =
c == '~' || c == '.' || c == '/' || c == 'h' || c == 'e' || c == 'm'
_ <- Text.Megaparsec.lookAhead (Text.Megaparsec.satisfy predicate)
choice [ local, http, env, missing ]
importHash_ :: Parser Dhall.Crypto.SHA256Digest
importHash_ = do
_ <- text "sha256:"
t <- count 64 (satisfy hexdig <?> "hex digit")
let strictBytes16 = Data.Text.Encoding.encodeUtf8 t
strictBytes <- case Data.ByteArray.Encoding.convertFromBase Base16 strictBytes16 of
Left string -> fail string
Right strictBytes -> return (strictBytes :: Data.ByteString.ByteString)
case Dhall.Crypto.sha256DigestFromByteString strictBytes of
Nothing -> fail "Invalid sha256 hash"
Just h -> pure h
importHashed_ :: Parser ImportHashed
importHashed_ = do
importType <- importType_
hash <- optional (try (nonemptyWhitespace *> importHash_))
return (ImportHashed {..})
import_ :: Parser Import
import_ = (do
importHashed <- importHashed_
importMode <- alternative <|> pure Code
return (Import {..}) ) <?> "import"
where
alternative = do
try (whitespace *> _as *> nonemptyWhitespace)
(_Text >> pure RawText) <|> (_Location >> pure Location)
splitOn :: Text -> Text -> NonEmpty Text
splitOn needle haystack =
case Data.Text.splitOn needle haystack of
[] -> "" :| []
t : ts -> t :| ts
linesLiteral :: Chunks s a -> NonEmpty (Chunks s a)
linesLiteral (Chunks [] suffix) =
fmap (Chunks []) (splitOn "\n" suffix)
linesLiteral (Chunks ((prefix, interpolation) : pairs₀) suffix₀) =
foldr
Data.List.NonEmpty.cons
(Chunks ((lastLine, interpolation) : pairs₁) suffix₁ :| chunks)
(fmap (Chunks []) initLines)
where
splitLines = splitOn "\n" prefix
initLines = Data.List.NonEmpty.init splitLines
lastLine = Data.List.NonEmpty.last splitLines
Chunks pairs₁ suffix₁ :| chunks = linesLiteral (Chunks pairs₀ suffix₀)
unlinesLiteral :: NonEmpty (Chunks s a) -> Chunks s a
unlinesLiteral chunks =
Data.Foldable.fold (Data.List.NonEmpty.intersperse "\n" chunks)
emptyLine :: Chunks s a -> Bool
emptyLine (Chunks [] "" ) = True
emptyLine (Chunks [] "\r") = True
emptyLine _ = False
leadingSpaces :: Chunks s a -> Text
leadingSpaces chunks = Data.Text.takeWhile isSpace firstText
where
isSpace c = c == '\x20' || c == '\x09'
firstText =
case chunks of
Chunks [] suffix -> suffix
Chunks ((prefix, _) : _ ) _ -> prefix
dropLiteral :: Int -> Chunks s a -> Chunks s a
dropLiteral n (Chunks [] suffix) =
Chunks [] (Data.Text.drop n suffix)
dropLiteral n (Chunks ((prefix, interpolation) : rest) suffix) =
Chunks ((Data.Text.drop n prefix, interpolation) : rest) suffix
toDoubleQuoted :: Chunks Src a -> Chunks Src a
toDoubleQuoted literal =
unlinesLiteral (fmap (dropLiteral indent) literals)
where
literals = linesLiteral literal
sharedPrefix ab ac =
case Data.Text.commonPrefixes ab ac of
Just (a, _b, _c) -> a
Nothing -> ""
filteredLines = newInit <> pure oldLast
where
oldInit = Data.List.NonEmpty.init literals
oldLast = Data.List.NonEmpty.last literals
newInit = filter (not . emptyLine) oldInit
longestSharedPrefix =
case filteredLines of
l : ls ->
Data.Foldable.foldl' sharedPrefix (leadingSpaces l) (fmap leadingSpaces ls)
[] ->
""
indent = Data.Text.length longestSharedPrefix