{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveDataTypeable #-}
module Language.Docker.Parser
( parseText
, parseFile
, Parser
, Error
, DockerfileError(..)
) where
import Control.Monad (void)
import qualified Data.ByteString as B
import Data.Data
import Data.List.NonEmpty (NonEmpty, fromList)
import Data.Maybe (listToMaybe)
import Data.Semigroup ((<>))
import qualified Data.Set as S
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as E
import qualified Data.Text.Encoding.Error as E
import Data.Time.Clock (secondsToDiffTime)
import Text.Megaparsec hiding (Label, label)
import Text.Megaparsec.Char hiding (eol)
import qualified Text.Megaparsec.Char.Lexer as L
import Language.Docker.Normalize
import Language.Docker.Syntax
data DockerfileError
= DuplicateFlagError String
| NoValueFlagError String
| InvalidFlagError String
| FileListError String
| QuoteError String
String
deriving (Eq, Data, Typeable, Ord, Read, Show)
type Parser = Parsec DockerfileError Text
type Error = ParseError Char DockerfileError
type Instr = Instruction Text
data CopyFlag
= FlagChown Chown
| FlagSource CopySource
| FlagInvalid (Text, Text)
data CheckFlag
= FlagInterval Duration
| FlagTimeout Duration
| FlagStartPeriod Duration
| FlagRetries Retries
| CFlagInvalid (Text, Text)
instance ShowErrorComponent DockerfileError where
showErrorComponent (DuplicateFlagError f) = "duplicate flag: " ++ f
showErrorComponent (FileListError f) =
"unexpected end of line. At least two arguments are required for " ++ f
showErrorComponent (NoValueFlagError f) = "unexpected flag " ++ f ++ " with no value"
showErrorComponent (InvalidFlagError f) = "invalid flag: " ++ f
showErrorComponent (QuoteError t str) =
"unexpected end of " ++ t ++ " quoted string " ++ str ++ " (unmatched quote)"
customError :: DockerfileError -> Parser a
customError = fancyFailure . S.singleton . ErrorCustom
eol :: Parser ()
eol = void $ takeWhile1P (Just "whitespace") isSpaceNl
reserved :: Text -> Parser ()
reserved name = void (lexeme (string' name) <?> T.unpack name)
natural :: Parser Integer
natural = L.decimal <?> "positive number"
commaSep :: Parser a -> Parser [a]
commaSep p = sepBy p (symbol ",")
stringLiteral :: Parser Text
stringLiteral = do
void (char '"')
lit <- manyTill L.charLiteral (char '"')
return (T.pack lit)
brackets :: Parser a -> Parser a
brackets = between (symbol "[") (symbol "]")
spaces1 :: Parser ()
spaces1 = void (takeWhile1P (Just "at least one space") (\c -> c == ' ' || c == '\t'))
spaces :: Parser ()
spaces = void (takeWhileP (Just "at least one space") (\c -> c == ' ' || c == '\t'))
symbol :: Text -> Parser Text
symbol name = do
x <- string name
spaces
return x
caseInsensitiveString :: Text -> Parser Text
caseInsensitiveString = string'
charsWithEscapedSpaces :: String -> Parser Text
charsWithEscapedSpaces stopChars = do
buf <- takeWhile1P Nothing (`notElem` ("\n\t\\ " ++ stopChars))
try (jumpEscapeSequence buf) <|> try (backslashFollowedByChars buf) <|> return buf
where
backslashFollowedByChars buf = do
backslashes <- takeWhile1P Nothing (== '\\')
notFollowedBy (char ' ')
rest <- charsWithEscapedSpaces stopChars
return $ T.concat [buf, backslashes, rest]
jumpEscapeSequence buf = do
void $ string "\\ "
rest <- charsWithEscapedSpaces stopChars
return $ T.concat [buf, " ", rest]
lexeme :: Parser a -> Parser a
lexeme p = do
x <- p
spaces1
return x
isNl :: Char -> Bool
isNl c = c == '\n'
isSpaceNl :: Char -> Bool
isSpaceNl c = c == ' ' || c == '\t' || c == '\n'
anyUnless :: (Char -> Bool) -> Parser Text
anyUnless predicate = takeWhileP Nothing (\c -> not (isSpaceNl c || predicate c))
someUnless :: String -> (Char -> Bool) -> Parser Text
someUnless name predicate = takeWhile1P (Just name) (\c -> not (isSpaceNl c || predicate c))
comment :: Parser Instr
comment = do
void $ char '#'
text <- takeWhileP Nothing (not . isNl)
return $ Comment text
parseRegistry :: Parser Registry
parseRegistry = do
domain <- someUnless "a domain name" (== '.')
void $ char '.'
tld <- someUnless "a TLD" (== '/')
void $ char '/'
return $ Registry (domain <> "." <> tld)
taggedImage :: Parser BaseImage
taggedImage = do
registryName <- (Just <$> try parseRegistry) <|> return Nothing
name <- someUnless "the image name with a tag" (\c -> c == '@' || c == ':')
void $ char ':'
tag <- someUnless "the image tag" (== ':')
maybeAlias <- maybeImageAlias
return $ TaggedImage (Image registryName name) (Tag tag) maybeAlias
digestedImage :: Parser BaseImage
digestedImage = do
name <- someUnless "the image name with a digest" (\c -> c == '@' || c == ':')
void $ char '@'
digest <- someUnless "the image digest" (== '@')
maybeAlias <- maybeImageAlias
return $ DigestedImage (Image Nothing name) digest maybeAlias
untaggedImage :: Parser BaseImage
untaggedImage = do
registryName <- (Just <$> try parseRegistry) <|> return Nothing
name <- someUnless "just the image name" (\c -> c == '@' || c == ':')
notInvalidTag name
notInvalidDigest name
maybeAlias <- maybeImageAlias
return $ UntaggedImage (Image registryName name) maybeAlias
where
notInvalidTag :: Text -> Parser ()
notInvalidTag name =
try (notFollowedBy $ string ":") <?> "no ':' or a valid image tag string (example: " ++
T.unpack name ++ ":valid-tag)"
notInvalidDigest :: Text -> Parser ()
notInvalidDigest name =
try (notFollowedBy $ string "@") <?> "no '@' or a valid digest hash (example: " ++
T.unpack name ++ "@a3f42f2de)"
maybeImageAlias :: Parser (Maybe ImageAlias)
maybeImageAlias = Just <$> (spaces1 >> imageAlias) <|> return Nothing
imageAlias :: Parser ImageAlias
imageAlias = do
void (try (reserved "AS") <?> "AS followed by the image alias")
alias <- someUnless "the image alias" (== '\n')
return $ ImageAlias alias
baseImage :: Parser BaseImage
baseImage =
try digestedImage <|>
try taggedImage <|>
untaggedImage
from :: Parser Instr
from = do
reserved "FROM"
image <- baseImage
return $ From image
cmd :: Parser Instr
cmd = do
reserved "CMD"
args <- arguments
return $ Cmd args
copy :: Parser Instr
copy = do
reserved "COPY"
flags <- copyFlag `sepEndBy` spaces1
let chownFlags = [c | FlagChown c <- flags]
let sourceFlags = [f | FlagSource f <- flags]
let invalid = [i | FlagInvalid i <- flags]
case (invalid, chownFlags, sourceFlags) of
((k, v):_, _, _) -> unexpectedFlag k v
(_, _:_:_, _) -> customError $ DuplicateFlagError "--chown"
(_, _, _:_:_) -> customError $ DuplicateFlagError "--from"
_ -> do
let ch =
case chownFlags of
[] -> NoChown
c:_ -> c
let fr =
case sourceFlags of
[] -> NoSource
f:_ -> f
fileList "COPY" (\src dest -> Copy (CopyArgs src dest ch fr))
copyFlag :: Parser CopyFlag
copyFlag =
(FlagChown <$> try chown <?> "only one --chown") <|>
(FlagSource <$> try copySource <?> "only one --from") <|>
(FlagInvalid <$> try anyFlag <?> "no other flags")
chown :: Parser Chown
chown = do
void $ string "--chown="
ch <- someUnless "the user and group for chown" (== ' ')
return $ Chown ch
copySource :: Parser CopySource
copySource = do
void $ string "--from="
src <- someUnless "the copy source path" isNl
return $ CopySource src
anyFlag :: Parser (Text, Text)
anyFlag = do
void $ string "--"
name <- someUnless "the flag value" (== '=')
void $ char '='
val <- anyUnless (== ' ')
return (T.append "--" name, val)
fileList :: Text -> (NonEmpty SourcePath -> TargetPath -> Instr) -> Parser Instr
fileList name constr = do
paths <-
(try stringList <?> "an array of strings [\"src_file\", \"dest_file\"]") <|>
(try spaceSeparated <?> "a space separated list of file paths")
case paths of
[_] -> customError $ FileListError (T.unpack name)
_ -> return $ constr (SourcePath <$> fromList (init paths)) (TargetPath $ last paths)
where
spaceSeparated = anyUnless (== ' ') `sepBy1` (try spaces1 <?> "at least another file path")
stringList = brackets $ commaSep stringLiteral
unexpectedFlag :: Text -> Text -> Parser a
unexpectedFlag name "" = customFailure $ NoValueFlagError (T.unpack name)
unexpectedFlag name _ = customFailure $ InvalidFlagError (T.unpack name)
shell :: Parser Instr
shell = do
reserved "SHELL"
args <- arguments
return $ Shell args
stopsignal :: Parser Instr
stopsignal = do
reserved "STOPSIGNAL"
args <- untilEol "the stop signal"
return $ Stopsignal args
doubleQuotedValue :: Parser Text
doubleQuotedValue =
between (string "\"") (string "\"") (takeWhileP Nothing (\c -> c /= '"' && c /= '\n'))
singleQuotedValue :: Parser Text
singleQuotedValue =
between (string "'") (string "'") (takeWhileP Nothing (\c -> c /= '\'' && c /= '\n'))
unquotedString :: String -> Parser Text
unquotedString stopChars = do
str <- charsWithEscapedSpaces stopChars
checkFaults str
where
checkFaults str
| T.null str = return str
| T.head str == '\'' = customError $ QuoteError "single" (T.unpack str)
| T.head str == '\"' = customError $ QuoteError "double" (T.unpack str)
| otherwise = return str
singleValue :: String -> Parser Text
singleValue stopChars =
try doubleQuotedValue <|>
try singleQuotedValue <|>
(try (unquotedString stopChars) <?> "a string with no quotes")
pair :: Parser (Text, Text)
pair = do
key <- singleValue "="
void $ char '='
value <- singleValue ""
return (key, value)
pairsList :: Parser Pairs
pairsList = pair `sepBy1` spaces1
label :: Parser Instr
label = do
reserved "LABEL"
p <- pairs
return $ Label p
arg :: Parser Instr
arg = do
reserved "ARG"
(try nameWithDefault <?> "the arg name") <|>
Arg <$> untilEol "the argument name" <*> pure Nothing
where
nameWithDefault = do
name <- someUnless "the argument name" (== '=')
void $ char '='
def <- untilEol "the argument value"
return $ Arg name (Just def)
env :: Parser Instr
env = do
reserved "ENV"
p <- pairs
return $ Env p
pairs :: Parser Pairs
pairs = try pairsList <|> try singlePair
singlePair :: Parser Pairs
singlePair = do
key <- anyUnless (== '=')
spaces1 <?> "a space followed by the value for the variable '" ++ T.unpack key ++ "'"
val <- untilEol "the variable value"
return [(key, val)]
user :: Parser Instr
user = do
reserved "USER"
username <- untilEol "the user"
return $ User username
add :: Parser Instr
add = do
reserved "ADD"
flag <- lexeme copyFlag <|> return (FlagChown NoChown)
notFollowedBy (string "--") <?> "only the --chown flag or the src and dest paths"
case flag of
FlagChown ch -> fileList "ADD" (\src dest -> Add (AddArgs src dest ch))
FlagSource _ -> customError $ InvalidFlagError "--from"
FlagInvalid (k, v) -> unexpectedFlag k v
expose :: Parser Instr
expose = do
reserved "EXPOSE"
ps <- ports
return $ Expose ps
port :: Parser Port
port =
(try portVariable <?> "a variable") <|>
(try portRange <?> "a port range optionally followed by the protocol (udp/tcp)") <|>
(try portWithProtocol <?> "a port with its protocol (udp/tcp)") <|>
(try portInt <?> "a valid port number")
ports :: Parser Ports
ports = Ports <$> port `sepEndBy1` (char ' ' <|> char '\t')
portRange :: Parser Port
portRange = do
start <- natural
void $ char '-'
finish <- try natural
proto <- try protocol <|> return TCP
return $ PortRange (fromIntegral start) (fromIntegral finish) proto
protocol :: Parser Protocol
protocol = do
void (char '/')
tcp <|> udp
where
tcp = caseInsensitiveString "tcp" >> return TCP
udp = caseInsensitiveString "udp" >> return UDP
portInt :: Parser Port
portInt = do
portNumber <- natural
notFollowedBy (string "/" <|> string "-")
return $ Port (fromIntegral portNumber) TCP
portWithProtocol :: Parser Port
portWithProtocol = do
portNumber <- natural
proto <- protocol
return $ Port (fromIntegral portNumber) proto
portVariable :: Parser Port
portVariable = do
void (char '$')
variable <- someUnless "the variable name" (== '$')
return $ PortStr (T.append "$" variable)
run :: Parser Instr
run = do
reserved "RUN"
c <- arguments
return $ Run c
untilEol :: String -> Parser Text
untilEol name = takeWhile1P (Just name) (not . isNl)
workdir :: Parser Instr
workdir = do
reserved "WORKDIR"
directory <- untilEol "the workdir path"
return $ Workdir directory
volume :: Parser Instr
volume = do
reserved "VOLUME"
directory <- untilEol "the volume path"
return $ Volume directory
maintainer :: Parser Instr
maintainer = do
reserved "MAINTAINER"
name <- untilEol "the maintainer name"
return $ Maintainer name
argumentsExec :: Parser (Arguments Text)
argumentsExec = do
args <- brackets $ commaSep stringLiteral
return $ ArgumentsList (T.unwords args)
argumentsShell :: Parser (Arguments Text)
argumentsShell = ArgumentsText <$> toEnd
where
toEnd = untilEol "the shell arguments"
arguments :: Parser (Arguments Text)
arguments = try argumentsExec <|> try argumentsShell
entrypoint :: Parser Instr
entrypoint = do
reserved "ENTRYPOINT"
args <- arguments
return $ Entrypoint args
onbuild :: Parser Instr
onbuild = do
reserved "ONBUILD"
i <- parseInstruction
return $ OnBuild i
healthcheck :: Parser Instr
healthcheck = do
reserved "HEALTHCHECK"
Healthcheck <$> (fullCheck <|> noCheck)
where
noCheck = string "NONE" >> return NoCheck
allFlags = do
flags <- someFlags
spaces1 <?> "another flag"
return flags
someFlags = do
x <- checkFlag
cont <- try (spaces1 >> lookAhead (string "--") >> return True) <|> return False
if cont
then do
xs <- someFlags
return (x : xs)
else return [x]
fullCheck = do
flags <- allFlags <|> return []
let intervals = [x | FlagInterval x <- flags]
let timeouts = [x | FlagTimeout x <- flags]
let startPeriods = [x | FlagStartPeriod x <- flags]
let retriesD = [x | FlagRetries x <- flags]
let invalid = [x | CFlagInvalid x <- flags]
case (invalid, intervals, timeouts, startPeriods, retriesD) of
((k, v):_, _, _, _, _) -> unexpectedFlag k v
(_, _:_:_, _, _, _) -> customError $ DuplicateFlagError "--interval"
(_, _, _:_:_, _, _) -> customError $ DuplicateFlagError "--timeout"
(_, _, _, _:_:_, _) -> customError $ DuplicateFlagError "--start-period"
(_, _, _, _, _:_:_) -> customError $ DuplicateFlagError "--retries"
_ -> do
Cmd checkCommand <- cmd
let interval = listToMaybe intervals
let timeout = listToMaybe timeouts
let startPeriod = listToMaybe startPeriods
let retries = listToMaybe retriesD
return $ Check CheckArgs {..}
checkFlag :: Parser CheckFlag
checkFlag =
(FlagInterval <$> durationFlag "--interval=" <?> "--interval") <|>
(FlagTimeout <$> durationFlag "--timeout=" <?> "--timeout") <|>
(FlagStartPeriod <$> durationFlag "--start-period=" <?> "--start-period") <|>
(FlagRetries <$> retriesFlag <?> "--retries") <|>
(CFlagInvalid <$> anyFlag <?> "no flags")
durationFlag :: Text -> Parser Duration
durationFlag flagName = do
void $ try (string flagName)
scale <- natural
unit <- char 's' <|> char 'm' <|> char 'h' <?> "either 's', 'm' or 'h' as the unit"
case unit of
's' -> return $ Duration (secondsToDiffTime scale)
'm' -> return $ Duration (secondsToDiffTime (scale * 60))
'h' -> return $ Duration (secondsToDiffTime (scale * 60 * 60))
_ -> fail "only 's', 'm' or 'h' are allowed as the duration"
retriesFlag :: Parser Retries
retriesFlag = do
void $ try (string "--retries=")
n <- try natural <?> "the number of retries"
return $ Retries (fromIntegral n)
parseInstruction :: Parser Instr
parseInstruction =
onbuild <|>
from <|>
copy <|>
run <|>
workdir <|>
entrypoint <|>
volume <|>
expose <|>
env <|>
arg <|>
user <|>
label <|>
stopsignal <|>
cmd <|>
shell <|>
maintainer <|>
add <|>
comment <|>
healthcheck
contents :: Parser a -> Parser a
contents p = do
void $ takeWhileP Nothing isSpaceNl
r <- p
eof
return r
dockerfile :: Parser Dockerfile
dockerfile =
many $ do
pos <- getPosition
i <- parseInstruction
eol <|> eof <?> "a new line followed by the next instruction"
return $ InstructionPos i (T.pack . sourceName $ pos) (unPos . sourceLine $ pos)
parseText :: Text -> Either Error Dockerfile
parseText s = parse (contents dockerfile) "<string>" $ normalizeEscapedLines s
parseFile :: FilePath -> IO (Either Error Dockerfile)
parseFile file = doParse <$> B.readFile file
where
doParse =
parse (contents dockerfile) file . normalizeEscapedLines . E.decodeUtf8With E.lenientDecode