-- | A streaming parser for the NAR format {-# language GeneralizedNewtypeDeriving #-} {-# language ScopedTypeVariables #-} {-# language TypeFamilies #-} module System.Nix.Internal.Nar.Parser ( runParser , parseNar , testParser , testParser' ) where import qualified Relude.Unsafe as Unsafe import qualified Algebra.Graph as Graph import qualified Algebra.Graph.ToGraph as Graph import qualified Control.Concurrent as Concurrent import qualified Control.Exception.Lifted as Exception.Lifted import qualified Control.Monad.Except as Except import qualified Control.Monad.Fail as Fail import qualified Control.Monad.IO.Class as IO import qualified Control.Monad.Reader as Reader import qualified Control.Monad.State as State import qualified Control.Monad.Trans as Trans import qualified Control.Monad.Trans.Control as Base import qualified Data.ByteString as Bytes import qualified Data.List as List import qualified Data.Map as Map import qualified Data.Serialize as Serialize import qualified Data.Text as Text import qualified System.Directory as Directory import System.FilePath as FilePath import qualified System.IO as IO import qualified System.Nix.Internal.Nar.Effects as Nar -- | NarParser is a monad for parsing a Nar file as a byte stream -- and reconstructing the file system objects inside -- See the definitions of @NarEffects@ for a description -- of the actions the parser can take, and @ParserState@ for the -- internals of the parser newtype NarParser m a = NarParser { runNarParser :: State.StateT ParserState (Except.ExceptT String (Reader.ReaderT (Nar.NarEffects m) m ) ) a } deriving ( Functor, Applicative, Monad, Fail.MonadFail , Trans.MonadIO, State.MonadState ParserState , Except.MonadError String , Reader.MonadReader (Nar.NarEffects m) ) -- | Run a @NarParser@ over a byte stream -- This is suitable for testing the top-level NAR parser, or any of the -- smaller utilities parsers, if you have bytes appropriate for them runParser :: forall m a . (IO.MonadIO m, Base.MonadBaseControl IO m) => Nar.NarEffects m -- ^ Provide the effects set, usually @narEffectsIO@ -> NarParser m a -- ^ A parser to run, such as @parseNar@ -> IO.Handle -- ^ A handle the stream containg the NAR. It should already be -- open and in @ReadMode@ -> FilePath -- ^ The root file system object to be created by the NAR -> m (Either String a) runParser effs (NarParser action) h target = do unpackResult <- runReaderT (runExceptT $ State.evalStateT action state0) effs `Exception.Lifted.catch` exceptionHandler when (isLeft unpackResult) cleanup pure unpackResult where state0 :: ParserState state0 = ParserState { tokenStack = [] , handle = h , directoryStack = [target] , links = [] } exceptionHandler :: Exception.Lifted.SomeException -> m (Either String a) exceptionHandler e = pure $ Left $ "Exception while unpacking NAR file: " <> show e cleanup :: m () cleanup = (\ef trg -> do isDir <- Nar.narIsDir ef trg bool (Nar.narDeleteFile ef trg) (Nar.narDeleteDir ef trg) isDir ) effs target instance Trans.MonadTrans NarParser where lift act = NarParser $ (lift . lift . lift) act data ParserState = ParserState { tokenStack :: ![Text] -- ^ The parser can push tokens (words or punctuation) -- onto this stack. We use this for a very limited backtracking -- where the Nar format requires it , directoryStack :: ![String] -- ^ The parser knows the name of the current FSO it's targeting, -- and the relative directory path leading there , handle :: IO.Handle -- ^ Handle of the input byte stream , links :: [LinkInfo] -- ^ Unlike with files and directories, we collect symlinks -- from the NAR on } ------------------------------------------------------------------------------ -- * Parsers for NAR components -- | Parse a NAR byte string, producing @()@. -- Parsing a NAR is mostly used for its side-effect: producing -- the file system objects packed in the NAR. That's why we pure @()@ parseNar :: (IO.MonadIO m, Fail.MonadFail m) => NarParser m () parseNar = do expectStr "nix-archive-1" parens parseFSO createLinks parseFSO :: (IO.MonadIO m, Fail.MonadFail m) => NarParser m () parseFSO = do expectStr "type" matchStr [ ("symlink" , parseSymlink ) , ("regular" , parseFile ) , ("directory", parseDirectory) ] -- | Parse a symlink from a NAR, storing the link details in the parser state -- We remember links rather than immediately creating file system objects -- from them, because we might encounter a link in the NAR before we -- encountered its target, and in this case, creating the link will fail -- The final step of creating links is handle by @createLinks@ parseSymlink :: (IO.MonadIO m, Fail.MonadFail m) => NarParser m () parseSymlink = do expectStr "target" target <- parseStr (dir, file) <- currentDirectoryAndFile pushLink $ LinkInfo { linkTarget = toString target , linkFile = file , linkPWD = dir } where currentDirectoryAndFile :: Monad m => NarParser m (FilePath, FilePath) currentDirectoryAndFile = do dirStack <- State.gets directoryStack pure (List.foldr1 () (List.reverse $ drop 1 dirStack), Unsafe.head dirStack) -- | Internal data type representing symlinks encountered in the NAR data LinkInfo = LinkInfo { linkTarget :: String -- ^ path to the symlink target, relative to the root of the unpacking NAR , linkFile :: String -- ^ file name of the link being created , linkPWD :: String -- ^ directory in which to create the link (relative to unpacking root) } deriving Show -- | When the NAR includes a file, we read from the NAR handle in chunks and -- write the target in chunks. This lets us avoid reading the full contents -- of the encoded file into memory parseFile :: forall m . (IO.MonadIO m, Fail.MonadFail m) => NarParser m () parseFile = do s <- parseStr when (s `notElem` ["executable", "contents"]) $ Fail.fail $ "Parser found " <> show s <> " when expecting element from " <> (show :: [String] -> String) ["executable", "contents"] when (s == "executable") $ do expectStr "" expectStr "contents" fSize <- parseLength -- Set up for defining `getChunk` narHandle <- State.gets handle bytesLeftVar <- IO.liftIO $ newIORef fSize let -- getChunk tracks the number of total bytes we still need to get from the -- file (starting at the file size, and decrementing by the size of the -- chunk we read) getChunk :: m (Maybe ByteString) getChunk = do bytesLeft <- IO.liftIO $ readIORef bytesLeftVar if bytesLeft == 0 then pure Nothing else do chunk <- IO.liftIO $ Bytes.hGetSome narHandle $ fromIntegral $ min 10000 bytesLeft when (Bytes.null chunk) (Fail.fail "ZERO BYTES") IO.liftIO $ modifyIORef bytesLeftVar $ \n -> n - fromIntegral (Bytes.length chunk) -- This short pause is necessary for letting the garbage collector -- clean up chunks from previous runs. Without it, heap memory usage can -- quickly spike IO.liftIO $ Concurrent.threadDelay 10 pure $ Just chunk target <- currentFile streamFile <- asks Nar.narStreamFile lift (streamFile target getChunk) when (s == "executable") $ do effs :: Nar.NarEffects m <- ask lift $ do p <- Nar.narGetPerms effs target Nar.narSetPerms effs target (p { Directory.executable = True }) expectRawString (Bytes.replicate (padLen $ fromIntegral fSize) 0) -- | Parse a NAR encoded directory, being careful not to hold onto file -- handles for target files longer than needed parseDirectory :: (IO.MonadIO m, Fail.MonadFail m) => NarParser m () parseDirectory = do createDirectory <- asks Nar.narCreateDir target <- currentFile lift $ createDirectory target parseEntryOrFinish where parseEntryOrFinish :: (IO.MonadIO m, Fail.MonadFail m) => NarParser m () parseEntryOrFinish = -- If we reach a ")", we finished the directory's entries, and we have -- to put ")" back into the stream, because the outer call to @parens@ -- expects to consume it. -- Otherwise, parse an entry as a fresh file system object matchStr [ ( ")" , pushStr ")" ) , ("entry", parseEntry ) ] parseEntry :: (IO.MonadIO m, Fail.MonadFail m) => NarParser m () parseEntry = do parens $ do expectStr "name" fName <- parseStr pushFileName (toString fName) expectStr "node" parens parseFSO popFileName parseEntryOrFinish ------------------------------------------------------------------------------ -- * Utility parsers -- | Short strings guiding the NAR parsing are prefixed with their -- length, then encoded in ASCII, and padded to 8 bytes. @parseStr@ -- captures this logic parseStr :: (IO.MonadIO m, Fail.MonadFail m) => NarParser m Text parseStr = do cachedStr <- popStr case cachedStr of Just str -> pure str Nothing -> do len <- parseLength strBytes <- consume $ fromIntegral len expectRawString (Bytes.replicate (fromIntegral $ padLen $ fromIntegral len) 0) pure $ decodeUtf8 strBytes -- | Get an Int64 describing the length of the upcoming string, -- according to NAR's encoding of ints parseLength :: (IO.MonadIO m, Fail.MonadFail m) => NarParser m Int64 parseLength = do eightBytes <- consume 8 either (\e -> Fail.fail $ "parseLength failed to decode int64: " <> e) pure (Serialize.runGet Serialize.getInt64le eightBytes) -- | Consume a NAR string and assert that it matches an expectation expectStr :: (IO.MonadIO m, Fail.MonadFail m) => Text -> NarParser m () expectStr expected = do actual <- parseStr when (actual /= expected) $ Fail.fail $ "Expected " <> err expected <> ", got " <> err actual where err t = show $ bool t (Text.take 10 t <> "...") (Text.length t > 10) -- | Consume a raw string and assert that it equals some expectation. -- This is usually used when consuming padding 0's expectRawString :: (IO.MonadIO m, Fail.MonadFail m) => ByteString -> NarParser m () expectRawString expected = do actual <- consume $ Bytes.length expected when (actual /= expected) $ Fail.fail $ "Expected " <> err expected <> ", got " <> err actual where err bs = show $ bool bs (Bytes.take 10 bs <> "...") (Bytes.length bs > 10) -- | Consume a NAR string, and dispatch to a parser depending on which string -- matched matchStr :: (IO.MonadIO m, Fail.MonadFail m) => [(Text, NarParser m a)] -- ^ List of expected possible strings and the parsers they should run -> NarParser m a matchStr parsers = do str <- parseStr case List.lookup str parsers of Just p -> p Nothing -> Fail.fail $ "Expected one of " <> show (fst <$> parsers) <> " found " <> show str -- | Wrap any parser in NAR formatted parentheses -- (a parenthesis is a NAR string, so it needs length encoding and padding) parens :: (IO.MonadIO m, Fail.MonadFail m) => NarParser m a -> NarParser m a parens act = do expectStr "(" r <- act expectStr ")" pure r -- | Sort links in the symlink stack according to their connectivity -- (Targets must be created before the links that target them) createLinks :: IO.MonadIO m => NarParser m () createLinks = do createLink <- asks Nar.narCreateLink allLinks <- State.gets links sortedLinks <- IO.liftIO $ sortLinksIO allLinks forM_ sortedLinks $ \li -> do pwd <- IO.liftIO Directory.getCurrentDirectory IO.liftIO $ Directory.setCurrentDirectory (linkPWD li) lift $ createLink (linkTarget li) (linkFile li) IO.liftIO $ Directory.setCurrentDirectory pwd where -- Convert every target and link file to a filepath relative -- to NAR root, then @Graph.topSort@ it, and map from the -- relative filepaths back to the original @LinkInfo@. -- Relative paths are needed for sorting, but @LinkInfo@s -- are needed for creating the link files sortLinksIO :: [LinkInfo] -> IO [LinkInfo] sortLinksIO ls = do linkLocations <- fmap Map.fromList $ forM ls $ \li-> (,li) <$> Directory.canonicalizePath (linkFile li) canonicalLinks <- forM ls $ \l -> do targetAbsPath <- Directory.canonicalizePath (linkPWD l linkTarget l) fileAbsPath <- Directory.canonicalizePath (linkFile l) pure (fileAbsPath, targetAbsPath) let linkGraph = Graph.edges canonicalLinks case Graph.topSort linkGraph of Left _ -> error "Symlinks form a loop" Right sortedNodes -> let sortedLinks = flip Map.lookup linkLocations <$> sortedNodes in pure $ catMaybes sortedLinks ------------------------------------------------------------------------------ -- * State manipulation -- | Pull n bytes from the underlying handle, failing if fewer bytes -- are available consume :: (IO.MonadIO m, Fail.MonadFail m) => Int -> NarParser m ByteString consume 0 = pure "" consume n = do state0 <- State.get newBytes <- IO.liftIO $ Bytes.hGetSome (handle state0) (max 0 n) when (Bytes.length newBytes < n) $ Fail.fail $ "consume: Not enough bytes in handle. Wanted " <> show n <> " got " <> show (Bytes.length newBytes) pure newBytes -- | Pop a string off the token stack popStr :: Monad m => NarParser m (Maybe Text) popStr = do s <- State.get case uncons (tokenStack s) of Nothing -> pure Nothing Just (x, xs) -> do State.put $ s { tokenStack = xs } pure $ Just x -- | Push a string onto the token stack pushStr :: Monad m => Text -> NarParser m () pushStr str = State.modify $ \s -> -- s { loadedBytes = strBytes <> loadedBytes s } s { tokenStack = str : tokenStack s } -- | Push a level onto the directory stack pushFileName :: Monad m => FilePath -> NarParser m () pushFileName fName = State.modify (\s -> s { directoryStack = fName : directoryStack s }) -- | Go to the parent level in the directory stack popFileName :: Monad m => NarParser m () popFileName = State.modify (\s -> s { directoryStack = List.drop 1 (directoryStack s )}) -- | Convert the current directory stack into a filepath by interspersing -- the path components with "/" currentFile :: Monad m => NarParser m FilePath currentFile = do dirStack <- State.gets directoryStack pure $ List.foldr1 () $ List.reverse dirStack -- | Add a link to the collection of encountered symlinks pushLink :: Monad m => LinkInfo -> NarParser m () pushLink linkInfo = State.modify (\s -> s { links = linkInfo : links s }) ------------------------------------------------------------------------------ -- * Utilities testParser :: (m ~ IO) => NarParser m a -> ByteString -> m (Either String a) testParser p b = do Bytes.writeFile tmpFileName b withFile tmpFileName ReadMode $ \h -> runParser Nar.narEffectsIO p h tmpFileName where tmpFileName = "tmp" testParser' :: (m ~ IO) => FilePath -> IO (Either String ()) testParser' fp = withFile fp ReadMode $ \h -> runParser Nar.narEffectsIO parseNar h "tmp" -- | Distance to the next multiple of 8 padLen :: Int -> Int padLen n = (8 - n) `mod` 8 dbgState :: IO.MonadIO m => NarParser m () dbgState = do s <- State.get IO.liftIO $ print (tokenStack s, directoryStack s)