Skip to content
This repository has been archived by the owner on Jun 4, 2021. It is now read-only.

Commit

Permalink
Merge pull request #132 from google/upstream-1544753093
Browse files Browse the repository at this point in the history
Add docker cache directory
  • Loading branch information
KaylaNguyen authored Dec 14, 2018
2 parents a6f2d42 + eeffdb4 commit f8b5ad8
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 13 deletions.
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ optional arguments:
from DOCKER_CONFIG
--stderrthreshold STDERRTHRESHOLD
Write log events at or above this level to stderr.
```

## pusher.par
Expand Down Expand Up @@ -107,7 +106,6 @@ optional arguments:
from DOCKER_CONFIG
--stderrthreshold STDERRTHRESHOLD
Write log events at or above this level to stderr.
```

## importer.par
Expand Down
60 changes: 54 additions & 6 deletions client/v2_2/save_.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from __future__ import print_function

import errno
import io
import json
import os
Expand Down Expand Up @@ -140,8 +141,10 @@ def tarball(name, image,
multi_image_tarball({name: image}, tar, {})


def fast(image, directory,
threads = 1):
def fast(image,
directory,
threads = 1,
cache_directory = None):
"""Produce a FromDisk compatible file layout under the provided directory.
After calling this, the following filesystem will exist:
Expand All @@ -162,6 +165,7 @@ def fast(image, directory,
image: a docker image to save.
directory: an existing empty directory under which to save the layout.
threads: the number of threads to use when performing the upload.
cache_directory: directory that stores file cache.
Returns:
A tuple whose first element is the path to the config file, and whose second
Expand All @@ -174,6 +178,38 @@ def write_file(name, accessor,
with io.open(name, u'wb') as f:
f.write(accessor(arg))

def write_file_and_store(name, accessor,
arg, cached_layer):
write_file(cached_layer, accessor, arg)
link(cached_layer, name)

def link(source, dest):
"""Creates a symbolic link dest pointing to source.
Unlinks first to remove "old" layers if needed
e.g., image A latest has layers 1, 2 and 3
after a while it has layers 1, 2 and 3'.
Since in both cases the layers are named 001, 002 and 003,
unlinking promises the correct layers are linked in the image directory.
Args:
source: image directory source.
dest: image directory destination.
"""
try:
os.symlink(source, dest)
except OSError as e:
if e.errno == errno.EEXIST:
os.unlink(dest)
os.symlink(source, dest)
else:
raise e

def valid(cached_layer, digest):
with io.open(cached_layer, u'rb') as f:
current_digest = docker_digest.SHA256(f.read(), '')
return current_digest == digest

with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
future_to_params = {}
config_file = os.path.join(directory, 'config.json')
Expand All @@ -192,18 +228,30 @@ def write_file(name, accessor,
layers = []
for blob in reversed(image.fs_layers()):
# Create a local copy
layer_name = os.path.join(directory, '%03d.tar.gz' % idx)
digest_name = os.path.join(directory, '%03d.sha256' % idx)
# Strip the sha256: prefix
digest = blob[7:].encode('utf8')
f = executor.submit(
write_file,
digest_name,
# Strip the sha256: prefix
lambda blob: blob[7:].encode('utf8'),
blob)
future_to_params[f] = digest_name

layer_name = os.path.join(directory, '%03d.tar.gz' % idx)
f = executor.submit(write_file, layer_name, image.blob, blob)
future_to_params[f] = layer_name
if cache_directory:
# Search for a local cached copy
cached_layer = os.path.join(cache_directory, digest)
if os.path.exists(cached_layer) and valid(cached_layer, digest):
f = executor.submit(link, cached_layer, layer_name)
future_to_params[f] = layer_name
else:
f = executor.submit(write_file_and_store, layer_name, image.blob,
blob, cached_layer)
future_to_params[f] = layer_name
else:
f = executor.submit(write_file, layer_name, image.blob, blob)
future_to_params[f] = layer_name

layers.append((digest_name, layer_name))
idx += 1
Expand Down
52 changes: 52 additions & 0 deletions puller_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
# Trick to chase the symlink before the docker build.
cp -f puller.par puller2.par

timing=-1

# Test pulling an image by just invoking the puller
function test_puller() {
local image=$1
Expand Down Expand Up @@ -81,6 +83,53 @@ function test_image() {
test_base "${image}" python2.7 gcr.io/cloud-builders/bazel
}

function test_puller_with_cache() {
local image=$1

# Test it in our current environment.
puller.par --name="${image}" --directory=/tmp/ --cache=/tmp/containerregistry_docker_cache_dir
}
function test_image_with_cache() {
local image=$1

test_image_with_timing "${image}"
local first_pull_timing=$timing
echo "TIMING: ${image} - First pull took ${first_pull_timing} seconds"

test_image_with_timing "${image}"
local second_pull_timing=$timing
echo "TIMING: ${image} - Second pull took ${second_pull_timing} seconds"
# TODO - is there a better way to test that the cache was used beside asserting the first_pull > second_pull???
}

function test_image_with_timing() {
local image=$1

echo "TESTING: ${image}"
local pull_start=$(date +%s)
test_puller_with_cache "${image}"
local pull_end=$(date +%s)
timing=$(($pull_end-$pull_start))

test_base "${image}" python2.7 python:2.7
test_base "${image}" python2.7 gcr.io/cloud-builders/bazel
}

function clear_cache_directory() {
rm -fr /tmp/containerregistry_docker_cache_dir
}

function create_cache_directory() {
mkdir -p /tmp/containerregistry_docker_cache_dir
}

clear_cache_directory

create_cache_directory

# Test pulling with cache
test_image_with_cache gcr.io/google-appengine/python:latest

# Test pulling a trivial image.
test_image gcr.io/google-containers/pause:2.0

Expand Down Expand Up @@ -125,3 +174,6 @@ test_puller_multiplatform index.docker.io/library/busybox:1.29.3 \
# TODO: add multiplatform test cases on --os-features and --features

# TODO(user): Add an authenticated pull test.

clear_cache_directory

20 changes: 17 additions & 3 deletions tools/fast_puller_.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
action='store',
help='The path to the directory where the client configuration files are '
'located. Overiddes the value from DOCKER_CONFIG')
parser.add_argument(
'--cache', action='store', help='Image\'s files cache directory.')

_THREADS = 8

Expand Down Expand Up @@ -108,20 +110,32 @@ def main():
platform = platform_args.FromArgs(args)
# pytype: disable=wrong-arg-types
with img_list.resolve(platform) as default_child:
save.fast(default_child, args.directory, threads=_THREADS)
save.fast(
default_child,
args.directory,
threads=_THREADS,
cache_directory=args.cache)
return
# pytype: enable=wrong-arg-types

logging.info('Pulling v2.2 image from %r ...', name)
with v2_2_image.FromRegistry(name, creds, transport, accept) as v2_2_img:
if v2_2_img.exists():
save.fast(v2_2_img, args.directory, threads=_THREADS)
save.fast(
v2_2_img,
args.directory,
threads=_THREADS,
cache_directory=args.cache)
return

logging.info('Pulling v2 image from %r ...', name)
with v2_image.FromRegistry(name, creds, transport) as v2_img:
with v2_compat.V22FromV2(v2_img) as v2_2_img:
save.fast(v2_2_img, args.directory, threads=_THREADS)
save.fast(
v2_2_img,
args.directory,
threads=_THREADS,
cache_directory=args.cache)
return
# pylint: disable=broad-except
except Exception as e:
Expand Down
4 changes: 2 additions & 2 deletions transform/v2_2/metadata_.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def Override(data,

output['config'] = defaults.get('config', {})

# pytype: disable=attribute-error
# pytype: disable=attribute-error,unsupported-operands
if options.entrypoint:
output['config']['Entrypoint'] = options.entrypoint
if options.cmd:
Expand Down Expand Up @@ -204,7 +204,7 @@ def Override(data,

if options.workdir:
output['config']['WorkingDir'] = options.workdir
# pytype: enable=attribute-error
# pytype: enable=attribute-error,unsupported-operands

# diff_ids are ordered from bottom-most to top-most
diff_ids = defaults.get('rootfs', {}).get('diff_ids', [])
Expand Down

0 comments on commit f8b5ad8

Please sign in to comment.