From e76691f18bd1aff8ed3e8d99f6360ccd2811e1e0 Mon Sep 17 00:00:00 2001 From: Campbell Barton Date: Thu, 19 Feb 2015 18:15:13 +1100 Subject: [PATCH] support for partial downloads (use local cache where possible) --- bam/blend/blendfile_pack.py | 15 +- bam/cli.py | 161 ++++++++++++++++-- .../application/modules/resources/__init__.py | 64 ++++++- 3 files changed, 219 insertions(+), 21 deletions(-) diff --git a/bam/blend/blendfile_pack.py b/bam/blend/blendfile_pack.py index eb427eb..6996c20 100755 --- a/bam/blend/blendfile_pack.py +++ b/bam/blend/blendfile_pack.py @@ -165,6 +165,9 @@ def pack( path_temp_files = set() path_copy_files = set() + # path_temp_files --> original-location + path_temp_files_orig = {} + TEMP_SUFFIX = b'@' if report is None: @@ -212,6 +215,7 @@ def pack( os.makedirs(os.path.dirname(filepath_tmp), exist_ok=True) shutil.copy(filepath, filepath_tmp) path_temp_files.add(filepath_tmp) + path_temp_files_orig[filepath_tmp] = filepath return filepath_tmp # ----------------- @@ -384,7 +388,7 @@ def pack( blendfile_dst_basename = os.path.basename(blendfile_dst).decode('utf-8') if blendfile_src_basename != blendfile_dst_basename: - if mode != 'ZIP': + if mode == 'FILE': deps_remap[blendfile_dst_basename] = deps_remap[blendfile_src_basename] del deps_remap[blendfile_src_basename] del blendfile_src_basename, blendfile_dst_basename @@ -403,6 +407,13 @@ def pack( # main file XXX, should have better way! paths_remap[os.path.basename(blendfile_src).decode('utf-8')] = relbase(blendfile_src).decode('utf-8') + # blend libs + for dst in path_temp_files: + src = path_temp_files_orig[dst] + k = os.path.relpath(dst[:-len(TEMP_SUFFIX)], base_dir_dst_temp).decode('utf-8') + paths_remap[k] = relbase(src).decode('utf-8') + del k + del relbase if paths_uuid is not None: @@ -494,6 +505,8 @@ def pack( del _compress_level_orig, _compress_mode yield report(" %s: %r\n" % (colorize("written", color='green'), blendfile_dst)) + elif mode == 'NONE': + pass else: raise Exception("%s not a known mode" % mode) diff --git a/bam/cli.py b/bam/cli.py index 0de685a..53f2ac7 100755 --- a/bam/cli.py +++ b/bam/cli.py @@ -491,6 +491,10 @@ class bam_commands: all_deps=False, ): + # --------- + # constants + CHUNK_SIZE = 1024 + cfg = bam_config.load(abort=True) if output_dir is None: @@ -519,6 +523,9 @@ class bam_commands: }), } + # -------------------------------------------------------------------- + # First request we simply get a list of files to download + # import requests r = requests.get( bam_session.request_url("file"), @@ -556,7 +563,7 @@ class bam_commands: break tot_size = 0 - for chunk in r.iter_content(chunk_size=1024): + for chunk in r.iter_content(chunk_size=CHUNK_SIZE): if chunk: # filter out keep-alive new chunks tot_size += len(chunk) f.write(chunk) @@ -564,6 +571,7 @@ class bam_commands: sys.stdout.write("\rdownload: [%03d%%]" % ((100 * tot_size) // msg_size)) sys.stdout.flush() + del struct # --------------- # extract the zip @@ -576,32 +584,151 @@ class bam_commands: os.remove(dst_dir_data) sys.stdout.write("\nwritten: %r\n" % session_rootdir) + # ---- + # Update cache + cachedir = os.path.join(bam_config.find_rootdir(cwd=session_rootdir, abort=True), ".cache") + # os.makedirs(cachedir, exist_ok=True) + + # -------------------------------------------------------------------- + # Second request we simply download the files.. + # + # which we don't have in cache, + # note that its possible we have all in cache and don't need to make a second request. + files = [] + with open(os.path.join(session_rootdir, ".bam_paths_remap.json")) as fp: + from bam.utils.system import uuid_from_file + paths_remap = json.load(fp) + + paths_uuid = bam_session.load_paths_uuid(session_rootdir) + print(paths_uuid) + + for f_src, f_dst in paths_remap.items(): + if f_src == ".": + continue + + uuid = paths_uuid.get(f_src) + if uuid is not None: + f_dst_abs = os.path.join(cachedir, f_dst) + if os.path.exists(f_dst_abs): + # check if we need to download this file? + uuid_exists = uuid_from_file(f_dst_abs) + assert(type(uuid) is type(uuid_exists)) + if uuid == uuid_exists: + continue + + files.append(f_dst) + + del uuid_from_file + + if files: + payload = { + "command": "checkout_download", + "arguments": json.dumps({ + "files": files, + }), + } + import requests + r = requests.get( + bam_session.request_url("file"), + params=payload, + auth=(cfg['user'], cfg['password']), + stream=True, + ) + + if r.status_code not in {200, }: + # TODO(cam), make into reusable function? + print("Error %d:\n%s" % (r.status_code, next(r.iter_content(chunk_size=1024)).decode('utf-8'))) + return + + # TODO(cam) how to tell if we get back a message payload? or real data??? + # needed so we don't read past buffer bounds + def iter_content_size(r, size, chunk_size=CHUNK_SIZE): + while size >= chunk_size: + size -= chunk_size + yield r.raw.read(chunk_size) + if size: + yield r.raw.read(size) + + + import struct + ID_MESSAGE = 1 + ID_PAYLOAD = 2 + ID_PAYLOAD_EMPTY = 3 + ID_DONE = 4 + head = r.raw.read(4) + if head != b'BAM\0': + fatal("bad header from server") + + file_index = 0 + while True: + msg_type, msg_size = struct.unpack("