support for partial downloads

(use local cache where possible)
This commit is contained in:
2015-02-19 18:15:13 +11:00
parent ed1eb6df6f
commit e76691f18b
3 changed files with 219 additions and 21 deletions

View File

@@ -165,6 +165,9 @@ def pack(
path_temp_files = set()
path_copy_files = set()
# path_temp_files --> original-location
path_temp_files_orig = {}
TEMP_SUFFIX = b'@'
if report is None:
@@ -212,6 +215,7 @@ def pack(
os.makedirs(os.path.dirname(filepath_tmp), exist_ok=True)
shutil.copy(filepath, filepath_tmp)
path_temp_files.add(filepath_tmp)
path_temp_files_orig[filepath_tmp] = filepath
return filepath_tmp
# -----------------
@@ -384,7 +388,7 @@ def pack(
blendfile_dst_basename = os.path.basename(blendfile_dst).decode('utf-8')
if blendfile_src_basename != blendfile_dst_basename:
if mode != 'ZIP':
if mode == 'FILE':
deps_remap[blendfile_dst_basename] = deps_remap[blendfile_src_basename]
del deps_remap[blendfile_src_basename]
del blendfile_src_basename, blendfile_dst_basename
@@ -403,6 +407,13 @@ def pack(
# main file XXX, should have better way!
paths_remap[os.path.basename(blendfile_src).decode('utf-8')] = relbase(blendfile_src).decode('utf-8')
# blend libs
for dst in path_temp_files:
src = path_temp_files_orig[dst]
k = os.path.relpath(dst[:-len(TEMP_SUFFIX)], base_dir_dst_temp).decode('utf-8')
paths_remap[k] = relbase(src).decode('utf-8')
del k
del relbase
if paths_uuid is not None:
@@ -494,6 +505,8 @@ def pack(
del _compress_level_orig, _compress_mode
yield report(" %s: %r\n" % (colorize("written", color='green'), blendfile_dst))
elif mode == 'NONE':
pass
else:
raise Exception("%s not a known mode" % mode)

View File

@@ -491,6 +491,10 @@ class bam_commands:
all_deps=False,
):
# ---------
# constants
CHUNK_SIZE = 1024
cfg = bam_config.load(abort=True)
if output_dir is None:
@@ -519,6 +523,9 @@ class bam_commands:
}),
}
# --------------------------------------------------------------------
# First request we simply get a list of files to download
#
import requests
r = requests.get(
bam_session.request_url("file"),
@@ -556,7 +563,7 @@ class bam_commands:
break
tot_size = 0
for chunk in r.iter_content(chunk_size=1024):
for chunk in r.iter_content(chunk_size=CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
tot_size += len(chunk)
f.write(chunk)
@@ -564,6 +571,7 @@ class bam_commands:
sys.stdout.write("\rdownload: [%03d%%]" % ((100 * tot_size) // msg_size))
sys.stdout.flush()
del struct
# ---------------
# extract the zip
@@ -576,32 +584,151 @@ class bam_commands:
os.remove(dst_dir_data)
sys.stdout.write("\nwritten: %r\n" % session_rootdir)
# ----
# Update cache
cachedir = os.path.join(bam_config.find_rootdir(cwd=session_rootdir, abort=True), ".cache")
# os.makedirs(cachedir, exist_ok=True)
# --------------------------------------------------------------------
# Second request we simply download the files..
#
# which we don't have in cache,
# note that its possible we have all in cache and don't need to make a second request.
files = []
with open(os.path.join(session_rootdir, ".bam_paths_remap.json")) as fp:
from bam.utils.system import uuid_from_file
paths_remap = json.load(fp)
paths_uuid = bam_session.load_paths_uuid(session_rootdir)
print(paths_uuid)
for f_src, f_dst in paths_remap.items():
if f_src == ".":
continue
uuid = paths_uuid.get(f_src)
if uuid is not None:
f_dst_abs = os.path.join(cachedir, f_dst)
if os.path.exists(f_dst_abs):
# check if we need to download this file?
uuid_exists = uuid_from_file(f_dst_abs)
assert(type(uuid) is type(uuid_exists))
if uuid == uuid_exists:
continue
files.append(f_dst)
del uuid_from_file
if files:
payload = {
"command": "checkout_download",
"arguments": json.dumps({
"files": files,
}),
}
import requests
r = requests.get(
bam_session.request_url("file"),
params=payload,
auth=(cfg['user'], cfg['password']),
stream=True,
)
if r.status_code not in {200, }:
# TODO(cam), make into reusable function?
print("Error %d:\n%s" % (r.status_code, next(r.iter_content(chunk_size=1024)).decode('utf-8')))
return
# TODO(cam) how to tell if we get back a message payload? or real data???
# needed so we don't read past buffer bounds
def iter_content_size(r, size, chunk_size=CHUNK_SIZE):
while size >= chunk_size:
size -= chunk_size
yield r.raw.read(chunk_size)
if size:
yield r.raw.read(size)
import struct
ID_MESSAGE = 1
ID_PAYLOAD = 2
ID_PAYLOAD_EMPTY = 3
ID_DONE = 4
head = r.raw.read(4)
if head != b'BAM\0':
fatal("bad header from server")
file_index = 0
while True:
msg_type, msg_size = struct.unpack("<II", r.raw.read(8))
if msg_type == ID_MESSAGE:
sys.stdout.write(r.raw.read(msg_size).decode('utf-8'))
sys.stdout.flush()
elif msg_type == ID_PAYLOAD_EMPTY:
file_index += 1
elif msg_type == ID_PAYLOAD:
f_rel = files[file_index]
f_abs = os.path.join(cachedir, files[file_index])
file_index += 1
# server also prints... we could do this a bit different...
sys.stdout.write("file: %r" % f_rel)
sys.stdout.flush()
os.makedirs(os.path.dirname(f_abs), exist_ok=True)
with open(f_abs, "wb") as f:
tot_size = 0
# for chunk in r.iter_content(chunk_size=CHUNK_SIZE):
for chunk in iter_content_size(r, msg_size, chunk_size=CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
tot_size += len(chunk)
f.write(chunk)
f.flush()
sys.stdout.write("\rdownload: [%03d%%]" % ((100 * tot_size) // msg_size))
sys.stdout.flush()
assert(tot_size == msg_size)
elif msg_type == ID_DONE:
break
else:
raise Exception("Unknown message-type %d" % msg_type)
del struct
del files
# ------------
# Update Cache
#
# TODO, remove stale cache
cachedir = os.path.join(bam_config.find_rootdir(cwd=session_rootdir, abort=True), ".cache")
# os.makedirs(cachedir, exist_ok=True)
# we need this to map to project level paths
#
# Copy cache into our session before applying binary edits.
with open(os.path.join(session_rootdir, ".bam_paths_remap.json")) as fp:
paths_remap = json.load(fp)
for f_src, f_dst in paths_remap.items():
if f_src == ".":
continue
f_src_abs = os.path.join(session_rootdir, f_src)
if not os.path.exists(f_src_abs):
for f_dst, f_src in paths_remap.items():
if f_dst == ".":
continue
f_dst_abs = os.path.join(cachedir, f_dst)
os.makedirs(os.path.dirname(f_dst_abs), exist_ok=True)
import shutil
# print("from ", f_src_abs, os.path.exists(f_src_abs))
# print("to ", f_dst_abs, os.path.exists(f_dst_abs))
# print("CREATING: ", f_dst_abs)
shutil.copyfile(f_src_abs, f_dst_abs)
del shutil
f_src_abs = os.path.join(cachedir, f_src)
# this should 'almost' always be true
if os.path.exists(f_src_abs):
f_dst_abs = os.path.join(session_rootdir, f_dst)
os.makedirs(os.path.dirname(f_dst_abs), exist_ok=True)
import shutil
# print("from ", f_dst_abs, os.path.exists(f_dst_abs))
# print("to ", f_src_abs, os.path.exists(f_src_abs))
# print("CREATING: ", f_src_abs)
shutil.copyfile(f_src_abs, f_dst_abs)
del shutil
# import time
# time.sleep(10000)
del paths_remap, cachedir
# ...done updating cache