support for partial downloads

(use local cache where possible)
This commit is contained in:
2015-02-19 18:15:13 +11:00
parent ed1eb6df6f
commit e76691f18b
3 changed files with 219 additions and 21 deletions

View File

@@ -165,6 +165,9 @@ def pack(
path_temp_files = set() path_temp_files = set()
path_copy_files = set() path_copy_files = set()
# path_temp_files --> original-location
path_temp_files_orig = {}
TEMP_SUFFIX = b'@' TEMP_SUFFIX = b'@'
if report is None: if report is None:
@@ -212,6 +215,7 @@ def pack(
os.makedirs(os.path.dirname(filepath_tmp), exist_ok=True) os.makedirs(os.path.dirname(filepath_tmp), exist_ok=True)
shutil.copy(filepath, filepath_tmp) shutil.copy(filepath, filepath_tmp)
path_temp_files.add(filepath_tmp) path_temp_files.add(filepath_tmp)
path_temp_files_orig[filepath_tmp] = filepath
return filepath_tmp return filepath_tmp
# ----------------- # -----------------
@@ -384,7 +388,7 @@ def pack(
blendfile_dst_basename = os.path.basename(blendfile_dst).decode('utf-8') blendfile_dst_basename = os.path.basename(blendfile_dst).decode('utf-8')
if blendfile_src_basename != blendfile_dst_basename: if blendfile_src_basename != blendfile_dst_basename:
if mode != 'ZIP': if mode == 'FILE':
deps_remap[blendfile_dst_basename] = deps_remap[blendfile_src_basename] deps_remap[blendfile_dst_basename] = deps_remap[blendfile_src_basename]
del deps_remap[blendfile_src_basename] del deps_remap[blendfile_src_basename]
del blendfile_src_basename, blendfile_dst_basename del blendfile_src_basename, blendfile_dst_basename
@@ -403,6 +407,13 @@ def pack(
# main file XXX, should have better way! # main file XXX, should have better way!
paths_remap[os.path.basename(blendfile_src).decode('utf-8')] = relbase(blendfile_src).decode('utf-8') paths_remap[os.path.basename(blendfile_src).decode('utf-8')] = relbase(blendfile_src).decode('utf-8')
# blend libs
for dst in path_temp_files:
src = path_temp_files_orig[dst]
k = os.path.relpath(dst[:-len(TEMP_SUFFIX)], base_dir_dst_temp).decode('utf-8')
paths_remap[k] = relbase(src).decode('utf-8')
del k
del relbase del relbase
if paths_uuid is not None: if paths_uuid is not None:
@@ -494,6 +505,8 @@ def pack(
del _compress_level_orig, _compress_mode del _compress_level_orig, _compress_mode
yield report(" %s: %r\n" % (colorize("written", color='green'), blendfile_dst)) yield report(" %s: %r\n" % (colorize("written", color='green'), blendfile_dst))
elif mode == 'NONE':
pass
else: else:
raise Exception("%s not a known mode" % mode) raise Exception("%s not a known mode" % mode)

View File

@@ -491,6 +491,10 @@ class bam_commands:
all_deps=False, all_deps=False,
): ):
# ---------
# constants
CHUNK_SIZE = 1024
cfg = bam_config.load(abort=True) cfg = bam_config.load(abort=True)
if output_dir is None: if output_dir is None:
@@ -519,6 +523,9 @@ class bam_commands:
}), }),
} }
# --------------------------------------------------------------------
# First request we simply get a list of files to download
#
import requests import requests
r = requests.get( r = requests.get(
bam_session.request_url("file"), bam_session.request_url("file"),
@@ -556,7 +563,7 @@ class bam_commands:
break break
tot_size = 0 tot_size = 0
for chunk in r.iter_content(chunk_size=1024): for chunk in r.iter_content(chunk_size=CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks if chunk: # filter out keep-alive new chunks
tot_size += len(chunk) tot_size += len(chunk)
f.write(chunk) f.write(chunk)
@@ -564,6 +571,7 @@ class bam_commands:
sys.stdout.write("\rdownload: [%03d%%]" % ((100 * tot_size) // msg_size)) sys.stdout.write("\rdownload: [%03d%%]" % ((100 * tot_size) // msg_size))
sys.stdout.flush() sys.stdout.flush()
del struct
# --------------- # ---------------
# extract the zip # extract the zip
@@ -576,32 +584,151 @@ class bam_commands:
os.remove(dst_dir_data) os.remove(dst_dir_data)
sys.stdout.write("\nwritten: %r\n" % session_rootdir) sys.stdout.write("\nwritten: %r\n" % session_rootdir)
# ----
# Update cache
cachedir = os.path.join(bam_config.find_rootdir(cwd=session_rootdir, abort=True), ".cache")
# os.makedirs(cachedir, exist_ok=True)
# --------------------------------------------------------------------
# Second request we simply download the files..
#
# which we don't have in cache,
# note that its possible we have all in cache and don't need to make a second request.
files = []
with open(os.path.join(session_rootdir, ".bam_paths_remap.json")) as fp:
from bam.utils.system import uuid_from_file
paths_remap = json.load(fp)
paths_uuid = bam_session.load_paths_uuid(session_rootdir)
print(paths_uuid)
for f_src, f_dst in paths_remap.items():
if f_src == ".":
continue
uuid = paths_uuid.get(f_src)
if uuid is not None:
f_dst_abs = os.path.join(cachedir, f_dst)
if os.path.exists(f_dst_abs):
# check if we need to download this file?
uuid_exists = uuid_from_file(f_dst_abs)
assert(type(uuid) is type(uuid_exists))
if uuid == uuid_exists:
continue
files.append(f_dst)
del uuid_from_file
if files:
payload = {
"command": "checkout_download",
"arguments": json.dumps({
"files": files,
}),
}
import requests
r = requests.get(
bam_session.request_url("file"),
params=payload,
auth=(cfg['user'], cfg['password']),
stream=True,
)
if r.status_code not in {200, }:
# TODO(cam), make into reusable function?
print("Error %d:\n%s" % (r.status_code, next(r.iter_content(chunk_size=1024)).decode('utf-8')))
return
# TODO(cam) how to tell if we get back a message payload? or real data???
# needed so we don't read past buffer bounds
def iter_content_size(r, size, chunk_size=CHUNK_SIZE):
while size >= chunk_size:
size -= chunk_size
yield r.raw.read(chunk_size)
if size:
yield r.raw.read(size)
import struct
ID_MESSAGE = 1
ID_PAYLOAD = 2
ID_PAYLOAD_EMPTY = 3
ID_DONE = 4
head = r.raw.read(4)
if head != b'BAM\0':
fatal("bad header from server")
file_index = 0
while True:
msg_type, msg_size = struct.unpack("<II", r.raw.read(8))
if msg_type == ID_MESSAGE:
sys.stdout.write(r.raw.read(msg_size).decode('utf-8'))
sys.stdout.flush()
elif msg_type == ID_PAYLOAD_EMPTY:
file_index += 1
elif msg_type == ID_PAYLOAD:
f_rel = files[file_index]
f_abs = os.path.join(cachedir, files[file_index])
file_index += 1
# server also prints... we could do this a bit different...
sys.stdout.write("file: %r" % f_rel)
sys.stdout.flush()
os.makedirs(os.path.dirname(f_abs), exist_ok=True)
with open(f_abs, "wb") as f:
tot_size = 0
# for chunk in r.iter_content(chunk_size=CHUNK_SIZE):
for chunk in iter_content_size(r, msg_size, chunk_size=CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
tot_size += len(chunk)
f.write(chunk)
f.flush()
sys.stdout.write("\rdownload: [%03d%%]" % ((100 * tot_size) // msg_size))
sys.stdout.flush()
assert(tot_size == msg_size)
elif msg_type == ID_DONE:
break
else:
raise Exception("Unknown message-type %d" % msg_type)
del struct
del files
# ------------ # ------------
# Update Cache # Update Cache
# #
# TODO, remove stale cache # TODO, remove stale cache
cachedir = os.path.join(bam_config.find_rootdir(cwd=session_rootdir, abort=True), ".cache")
# os.makedirs(cachedir, exist_ok=True)
# we need this to map to project level paths # we need this to map to project level paths
#
# Copy cache into our session before applying binary edits.
with open(os.path.join(session_rootdir, ".bam_paths_remap.json")) as fp: with open(os.path.join(session_rootdir, ".bam_paths_remap.json")) as fp:
paths_remap = json.load(fp) paths_remap = json.load(fp)
for f_src, f_dst in paths_remap.items(): for f_dst, f_src in paths_remap.items():
if f_src == ".": if f_dst == ".":
continue
f_src_abs = os.path.join(session_rootdir, f_src)
if not os.path.exists(f_src_abs):
continue continue
f_dst_abs = os.path.join(cachedir, f_dst) f_src_abs = os.path.join(cachedir, f_src)
# this should 'almost' always be true
if os.path.exists(f_src_abs):
f_dst_abs = os.path.join(session_rootdir, f_dst)
os.makedirs(os.path.dirname(f_dst_abs), exist_ok=True) os.makedirs(os.path.dirname(f_dst_abs), exist_ok=True)
import shutil import shutil
# print("from ", f_src_abs, os.path.exists(f_src_abs)) # print("from ", f_dst_abs, os.path.exists(f_dst_abs))
# print("to ", f_dst_abs, os.path.exists(f_dst_abs)) # print("to ", f_src_abs, os.path.exists(f_src_abs))
# print("CREATING: ", f_dst_abs) # print("CREATING: ", f_src_abs)
shutil.copyfile(f_src_abs, f_dst_abs) shutil.copyfile(f_src_abs, f_dst_abs)
del shutil del shutil
# import time
# time.sleep(10000)
del paths_remap, cachedir del paths_remap, cachedir
# ...done updating cache # ...done updating cache

View File

@@ -116,7 +116,6 @@ class FileAPI(Resource):
super(FileAPI, self).__init__() super(FileAPI, self).__init__()
def get(self, project_name): def get(self, project_name):
filepath = request.args['filepath']
command = request.args['command'] command = request.args['command']
command_args = request.args.get('arguments') command_args = request.args.get('arguments')
if command_args is not None: if command_args is not None:
@@ -125,6 +124,7 @@ class FileAPI(Resource):
project = Project.query.filter_by(name=project_name).first() project = Project.query.filter_by(name=project_name).first()
if command == 'info': if command == 'info':
filepath = request.args['filepath']
r = svn.local.LocalClient(project.repository_path) r = svn.local.LocalClient(project.repository_path)
svn_log = r.log_default(None, None, 5, filepath) svn_log = r.log_default(None, None, 5, filepath)
@@ -147,6 +147,7 @@ class FileAPI(Resource):
bundle_status=bundle_status) bundle_status=bundle_status)
elif command == 'bundle': elif command == 'bundle':
filepath = request.args['filepath']
#return jsonify(filepath=filepath, status="building") #return jsonify(filepath=filepath, status="building")
filepath = os.path.join(project.repository_path, filepath) filepath = os.path.join(project.repository_path, filepath)
@@ -173,6 +174,7 @@ class FileAPI(Resource):
project.repository_path, project.repository_path,
True, True,
report, report,
'ZIP',
): ):
pass pass
@@ -218,6 +220,7 @@ class FileAPI(Resource):
return jsonify(filepath=filepath, status="building") return jsonify(filepath=filepath, status="building")
elif command == 'checkout': elif command == 'checkout':
filepath = request.args['filepath']
filepath = os.path.join(project.repository_path, filepath) filepath = os.path.join(project.repository_path, filepath)
if not os.path.exists(filepath): if not os.path.exists(filepath):
@@ -249,6 +252,10 @@ class FileAPI(Resource):
project.repository_path, project.repository_path,
command_args['all_deps'], command_args['all_deps'],
report, report,
# we don't infact pack any files here,
# only return a list of files we _would_ pack.
# see: checkout_download
'NONE',
) )
# TODO, handle fail # TODO, handle fail
@@ -270,6 +277,57 @@ class FileAPI(Resource):
# return Response(f, direct_passthrough=True) # return Response(f, direct_passthrough=True)
return Response(response_message_iter(), direct_passthrough=True) return Response(response_message_iter(), direct_passthrough=True)
elif command == 'checkout_download':
files = command_args['files']
def response_message_iter():
ID_MESSAGE = 1
ID_PAYLOAD = 2
ID_PAYLOAD_EMPTY = 3
ID_DONE = 4
# ID_PAYLOAD_APPEND = 3
import struct
def report(txt):
txt_bytes = txt.encode('utf-8')
return struct.pack('<II', ID_MESSAGE, len(txt_bytes)) + txt_bytes
yield b'BAM\0'
# pack the file!
import tempfile
# weak! (ignore original opened file)
'''
filepath_zip = tempfile.mkstemp(suffix=".zip")
os.close(filepath_zip[0])
filepath_zip = filepath_zip[1]
'''
for f_rel in files:
f_abs = os.path.join(project.repository_path, f_rel)
if os.path.exists(f_abs):
yield report("%s: %r\n" % ("downloading", f_rel))
# send over files
with open(f_abs, 'rb') as f:
f.seek(0, os.SEEK_END)
f_size = f.tell()
f.seek(0, os.SEEK_SET)
yield struct.pack('<II', ID_PAYLOAD, f_size)
while True:
data = f.read(1024)
if not data:
break
yield data
else:
yield report("%s: %r\n" % ("source missing", f_rel))
yield struct.pack('<II', ID_PAYLOAD_EMPTY, 0)
yield struct.pack('<II', ID_DONE, 0)
return Response(response_message_iter(), direct_passthrough=True)
else: else:
return jsonify(message="Command unknown") return jsonify(message="Command unknown")
@@ -379,7 +437,7 @@ class FileAPI(Resource):
return jsonify(message='File not allowed') return jsonify(message='File not allowed')
@staticmethod @staticmethod
def pack_fn(filepath, filepath_zip, paths_remap_relbase, all_deps, report): def pack_fn(filepath, filepath_zip, paths_remap_relbase, all_deps, report, mode):
""" """
'paths_remap_relbase' is the project path, 'paths_remap_relbase' is the project path,
we want all paths to be relative to this so we don't get server path included. we want all paths to be relative to this so we don't get server path included.
@@ -403,7 +461,7 @@ class FileAPI(Resource):
try: try:
yield from blendfile_pack.pack( yield from blendfile_pack.pack(
filepath.encode('utf-8'), filepath_zip.encode('utf-8'), mode='ZIP', filepath.encode('utf-8'), filepath_zip.encode('utf-8'), mode=mode,
paths_remap_relbase=paths_remap_relbase.encode('utf-8'), paths_remap_relbase=paths_remap_relbase.encode('utf-8'),
deps_remap=deps_remap, paths_remap=paths_remap, paths_uuid=paths_uuid, deps_remap=deps_remap, paths_remap=paths_remap, paths_uuid=paths_uuid,
all_deps=all_deps, all_deps=all_deps,