Fork me on GitHub

Usage

plash shallow-copy CONTAINER OUT

Description

Copy and hard link the filesystem to OUT.
This will export a root filesystem to OUT with minimal overhead. Directories
are copied with their metadata, but all files are hard linked. you can
imagine them pointing to the real file inside the plash data. This takes only
a short time and saves disk space. The downside is that you have to be very
carefull to not change files, since this would change the plashdata build
data and affect other containers.

Example

$ mkdir /tmp/mydir
$ plash b shallow-copy --from ubuntu -- /tmp/mydir/ubuntu
$ plash b shallow-copy --from ubuntu --apt git -- /tmp/mydir/ubuntu-with-git
$ plash sudo du -sh /tmp/mydir/ubuntu
413M    /tmp/mydir/ubuntu
$ plash sudo du -sh /tmp/mydir/ubuntu-with-git/
407M    /tmp/mydir/ubuntu-with-git/
$ plash sudo du -sh /tmp/mydir/
423M    /tmp/mydir/

See https://en.wikipedia.org/wiki/Object_copying#Shallow_copy

Tested behaviour

Preface

#!/bin/sh
set -eux

    
  
    
      
plash build -f 1 --run 'touch /da'

    
  
    
      
tmp=$(mktemp -d)
plash b shallow-copy -f 2 -- "$tmp"/rootfs

    
  
    
      
test "$(ls "$tmp"/rootfs)" = "$(echo 'bin da dev etc home proc root sys tmp usr var' | tr ' ' '\n')"

    
  
    
      
origi_inode=$(plash nodepath 2 | xargs -I{} stat --format '%i' {}/_data/root/da)
shallow_copy_inode=$(stat --format '%i' "$tmp"/rootfs/da)
test "$origi_inode" = "$shallow_copy_inode"

Source Code

from plash import utils
import subprocess
import tarfile
import sys
import os
import io

utils.unshare_user()
utils.unshare_mount()

try:
    container = sys.argv[1]
    outdir = sys.argv[2]
except IndexError:
    utils.die_with_usage()

nodepath = utils.plash_call("nodepath", container)
tmpout = utils.plash_call("mkdtemp")
plash_data = utils.plash_call("data")

mountpoint = os.path.join(plash_data, "mnt")

utils.plash_call("mount", container, mountpoint)

io_bytes = io.BytesIO()
dirs_only_tar = tarfile.open(fileobj=io_bytes, mode="w:xz")

file_list = []
for subdir, dirs, files in os.walk(mountpoint):

    rel_subdir = os.path.relpath(subdir, mountpoint)
    for dir in dirs:
        reldir = os.path.join(rel_subdir, dir)
        realdir = os.path.join(subdir, dir)
        dirs_only_tar.add(realdir, reldir, recursive=False)

    for file in files:
        file_list.append(os.path.join(rel_subdir, file))

dirs_only_tar.close()
io_bytes.seek(0)
subprocess.call(["umount", mountpoint])

# write all directories (empty) to tmpout
tar = tarfile.open(fileobj=io_bytes, mode="r")
tar.extractall(tmpout)

parts = list(reversed(nodepath.split("/layer/")[-1].split("/")))
resolve_paths = [os.path.join(plash_data, "index", i, "_data", "root") for i in parts]

for file in file_list:
    target = os.path.join(tmpout, file)
    for resolve_path in resolve_paths:
        src = os.path.join(resolve_path, file)
        try:
            os.link(src, target, follow_symlinks=False)
        except FileNotFoundError:
            pass
        else:
            break

with utils.catch_and_die([OSError]):
    os.rename(tmpout, outdir)