Fork me on GitHub

#!/usr/bin/env python3
# usage: plash shallow-copy CONTAINER OUT
# Copy and hard link the filesystem to OUT.
# This will export a root filesystem to OUT with minimal overhead. Directories
# are copied with their metadata, but all files are hard linked. you can
# imagine them pointing to the real file inside the plash data. This takes only
# a short time and saves disk space. The downside is that you have to be very
# carefull to not change files, since this would change the plashdata build
# data and affect other containers.
# Examples:
# $ mkdir /tmp/mydir
# $ plash shallow-copy --from ubuntu -- /tmp/mydir/ubuntu
# $ plash shallow-copy --from ubuntu --apt git -- /tmp/mydir/ubuntu-with-git
# $ plash sudo du -sh /tmp/mydir/ubuntu
# 413M    /tmp/mydir/ubuntu
# $ plash sudo du -sh /tmp/mydir/ubuntu-with-git/
# 407M    /tmp/mydir/ubuntu-with-git/
# $ plash sudo du -sh /tmp/mydir/
# 423M    /tmp/mydir/
# See

from plash import utils
import subprocess
import tarfile
import sys
import os
import io


    container = sys.argv[1]
    outdir = sys.argv[2]
except IndexError:
nodepath = utils.nodepath_or_die(container)
tmpout = utils.mkdtemp()

mountpoint = os.path.join(os.environ["PLASH_DATA"], 'mnt')

with utils.catch_and_die([subprocess.CalledProcessError], silent=True):
    subprocess.check_call(['plash', 'mount', container, mountpoint])

io_bytes = io.BytesIO()
dirs_only_tar =, mode='w:xz')

file_list = []
for subdir, dirs, files in os.walk(mountpoint):

    rel_subdir = os.path.relpath(subdir, mountpoint)
    for dir in dirs:
        reldir = os.path.join(rel_subdir, dir)
        realdir = os.path.join(subdir, dir)
        dirs_only_tar.add(realdir, reldir, recursive=False)

    for file in files:
        file_list.append(os.path.join(rel_subdir, file))

dirs_only_tar.close()['umount', mountpoint])

# write all directories (empty) to tmpout
tar =, mode='r')

plash_data = os.environ["PLASH_DATA"]
parts = list(reversed(nodepath.split('/layer/')[-1].split('/')))
resolve_paths = [
    os.path.join(plash_data, 'index', i, '_data', 'root') for i in parts

for file in file_list:
    target = os.path.join(tmpout, file)
    for resolve_path in resolve_paths:
        src = os.path.join(resolve_path, file)
  , target, follow_symlinks=False)
        except FileNotFoundError:

with utils.catch_and_die([OSError]):
    os.rename(tmpout, outdir)