From e76691f18bd1aff8ed3e8d99f6360ccd2811e1e0 Mon Sep 17 00:00:00 2001
From: Campbell Barton <ideasman42@gmail.com>
Date: Thu, 19 Feb 2015 18:15:13 +1100
Subject: [PATCH] support for partial downloads

(use local cache where possible)
---
 bam/blend/blendfile_pack.py                   |  15 +-
 bam/cli.py                                    | 161 ++++++++++++++++--
 .../application/modules/resources/__init__.py |  64 ++++++-
 3 files changed, 219 insertions(+), 21 deletions(-)

diff --git a/bam/blend/blendfile_pack.py b/bam/blend/blendfile_pack.py
index eb427eb..6996c20 100755
--- a/bam/blend/blendfile_pack.py
+++ b/bam/blend/blendfile_pack.py
@@ -165,6 +165,9 @@ def pack(
     path_temp_files = set()
     path_copy_files = set()
 
+    # path_temp_files --> original-location
+    path_temp_files_orig = {}
+
     TEMP_SUFFIX = b'@'
 
     if report is None:
@@ -212,6 +215,7 @@ def pack(
             os.makedirs(os.path.dirname(filepath_tmp), exist_ok=True)
             shutil.copy(filepath, filepath_tmp)
             path_temp_files.add(filepath_tmp)
+            path_temp_files_orig[filepath_tmp] = filepath
         return filepath_tmp
 
     # -----------------
@@ -384,7 +388,7 @@ def pack(
         blendfile_dst_basename = os.path.basename(blendfile_dst).decode('utf-8')
 
         if blendfile_src_basename != blendfile_dst_basename:
-            if mode != 'ZIP':
+            if mode == 'FILE':
                 deps_remap[blendfile_dst_basename] = deps_remap[blendfile_src_basename]
                 del deps_remap[blendfile_src_basename]
         del blendfile_src_basename, blendfile_dst_basename
@@ -403,6 +407,13 @@ def pack(
         # main file XXX, should have better way!
         paths_remap[os.path.basename(blendfile_src).decode('utf-8')] = relbase(blendfile_src).decode('utf-8')
 
+        # blend libs
+        for dst in path_temp_files:
+            src = path_temp_files_orig[dst]
+            k = os.path.relpath(dst[:-len(TEMP_SUFFIX)], base_dir_dst_temp).decode('utf-8')
+            paths_remap[k] = relbase(src).decode('utf-8')
+            del k
+
         del relbase
 
     if paths_uuid is not None:
@@ -494,6 +505,8 @@ def pack(
         del _compress_level_orig, _compress_mode
 
         yield report("  %s: %r\n" % (colorize("written", color='green'), blendfile_dst))
+    elif mode == 'NONE':
+        pass
     else:
         raise Exception("%s not a known mode" % mode)
 
diff --git a/bam/cli.py b/bam/cli.py
index 0de685a..53f2ac7 100755
--- a/bam/cli.py
+++ b/bam/cli.py
@@ -491,6 +491,10 @@ class bam_commands:
             all_deps=False,
             ):
 
+        # ---------
+        # constants
+        CHUNK_SIZE = 1024
+
         cfg = bam_config.load(abort=True)
 
         if output_dir is None:
@@ -519,6 +523,9 @@ class bam_commands:
                 }),
             }
 
+        # --------------------------------------------------------------------
+        # First request we simply get a list of files to download
+        #
         import requests
         r = requests.get(
                 bam_session.request_url("file"),
@@ -556,7 +563,7 @@ class bam_commands:
                     break
 
             tot_size = 0
-            for chunk in r.iter_content(chunk_size=1024):
+            for chunk in r.iter_content(chunk_size=CHUNK_SIZE):
                 if chunk:  # filter out keep-alive new chunks
                     tot_size += len(chunk)
                     f.write(chunk)
@@ -564,6 +571,7 @@ class bam_commands:
 
                     sys.stdout.write("\rdownload: [%03d%%]" % ((100 * tot_size) // msg_size))
                     sys.stdout.flush()
+            del struct
 
         # ---------------
         # extract the zip
@@ -576,32 +584,151 @@ class bam_commands:
         os.remove(dst_dir_data)
         sys.stdout.write("\nwritten: %r\n" % session_rootdir)
 
+        # ----
+        # Update cache
+        cachedir = os.path.join(bam_config.find_rootdir(cwd=session_rootdir, abort=True), ".cache")
+        # os.makedirs(cachedir, exist_ok=True)
+
+        # --------------------------------------------------------------------
+        # Second request we simply download the files..
+        #
+        # which we don't have in cache,
+        # note that its possible we have all in cache and don't need to make a second request.
+        files = []
+        with open(os.path.join(session_rootdir, ".bam_paths_remap.json")) as fp:
+            from bam.utils.system import uuid_from_file
+            paths_remap = json.load(fp)
+
+            paths_uuid = bam_session.load_paths_uuid(session_rootdir)
+            print(paths_uuid)
+
+            for f_src, f_dst in paths_remap.items():
+                if f_src == ".":
+                    continue
+
+                uuid = paths_uuid.get(f_src)
+                if uuid is not None:
+                    f_dst_abs = os.path.join(cachedir, f_dst)
+                    if os.path.exists(f_dst_abs):
+                        # check if we need to download this file?
+                        uuid_exists = uuid_from_file(f_dst_abs)
+                        assert(type(uuid) is type(uuid_exists))
+                        if uuid == uuid_exists:
+                            continue
+
+                files.append(f_dst)
+
+            del uuid_from_file
+
+        if files:
+            payload = {
+                "command": "checkout_download",
+                "arguments": json.dumps({
+                    "files": files,
+                    }),
+                }
+            import requests
+            r = requests.get(
+                    bam_session.request_url("file"),
+                    params=payload,
+                    auth=(cfg['user'], cfg['password']),
+                    stream=True,
+                    )
+
+            if r.status_code not in {200, }:
+                # TODO(cam), make into reusable function?
+                print("Error %d:\n%s" % (r.status_code, next(r.iter_content(chunk_size=1024)).decode('utf-8')))
+                return
+
+            # TODO(cam) how to tell if we get back a message payload? or real data???
+            # needed so we don't read past buffer bounds
+            def iter_content_size(r, size, chunk_size=CHUNK_SIZE):
+                while size >= chunk_size:
+                    size -= chunk_size
+                    yield r.raw.read(chunk_size)
+                if size:
+                    yield r.raw.read(size)
+
+
+            import struct
+            ID_MESSAGE = 1
+            ID_PAYLOAD = 2
+            ID_PAYLOAD_EMPTY = 3
+            ID_DONE = 4
+            head = r.raw.read(4)
+            if head != b'BAM\0':
+                fatal("bad header from server")
+
+            file_index = 0
+            while True:
+                msg_type, msg_size = struct.unpack("<II", r.raw.read(8))
+                if msg_type == ID_MESSAGE:
+                    sys.stdout.write(r.raw.read(msg_size).decode('utf-8'))
+                    sys.stdout.flush()
+                elif msg_type == ID_PAYLOAD_EMPTY:
+                    file_index += 1
+                elif msg_type == ID_PAYLOAD:
+                    f_rel = files[file_index]
+                    f_abs = os.path.join(cachedir, files[file_index])
+                    file_index += 1
+
+                    # server also prints... we could do this a bit different...
+                    sys.stdout.write("file: %r" % f_rel)
+                    sys.stdout.flush()
+
+                    os.makedirs(os.path.dirname(f_abs), exist_ok=True)
+
+                    with open(f_abs, "wb") as f:
+                        tot_size = 0
+                        # for chunk in r.iter_content(chunk_size=CHUNK_SIZE):
+                        for chunk in iter_content_size(r, msg_size, chunk_size=CHUNK_SIZE):
+                            if chunk:  # filter out keep-alive new chunks
+                                tot_size += len(chunk)
+                                f.write(chunk)
+                                f.flush()
+
+                                sys.stdout.write("\rdownload: [%03d%%]" % ((100 * tot_size) // msg_size))
+                                sys.stdout.flush()
+                        assert(tot_size == msg_size)
+
+                elif msg_type == ID_DONE:
+                    break
+                else:
+                    raise Exception("Unknown message-type %d" % msg_type)
+            del struct
+
+
+        del files
 
         # ------------
         # Update Cache
         #
         # TODO, remove stale cache
-        cachedir = os.path.join(bam_config.find_rootdir(cwd=session_rootdir, abort=True), ".cache")
-        # os.makedirs(cachedir, exist_ok=True)
-
         # we need this to map to project level paths
+        #
+        # Copy cache into our session before applying binary edits.
         with open(os.path.join(session_rootdir, ".bam_paths_remap.json")) as fp:
             paths_remap = json.load(fp)
-            for f_src, f_dst in paths_remap.items():
-                if f_src == ".":
-                    continue
-                f_src_abs = os.path.join(session_rootdir, f_src)
-                if not os.path.exists(f_src_abs):
+            for f_dst, f_src in paths_remap.items():
+                if f_dst == ".":
                     continue
 
-                f_dst_abs = os.path.join(cachedir, f_dst)
-                os.makedirs(os.path.dirname(f_dst_abs), exist_ok=True)
-                import shutil
-                # print("from        ", f_src_abs, os.path.exists(f_src_abs))
-                # print("to          ", f_dst_abs, os.path.exists(f_dst_abs))
-                # print("CREATING:   ", f_dst_abs)
-                shutil.copyfile(f_src_abs, f_dst_abs)
-                del shutil
+                f_src_abs = os.path.join(cachedir, f_src)
+
+                # this should 'almost' always be true
+                if os.path.exists(f_src_abs):
+
+                    f_dst_abs = os.path.join(session_rootdir, f_dst)
+                    os.makedirs(os.path.dirname(f_dst_abs), exist_ok=True)
+
+                    import shutil
+                    # print("from        ", f_dst_abs, os.path.exists(f_dst_abs))
+                    # print("to          ", f_src_abs, os.path.exists(f_src_abs))
+                    # print("CREATING:   ", f_src_abs)
+                    shutil.copyfile(f_src_abs, f_dst_abs)
+                    del shutil
+        # import time
+        # time.sleep(10000)
 
         del paths_remap, cachedir
         # ...done updating cache
diff --git a/webservice/bam/application/modules/resources/__init__.py b/webservice/bam/application/modules/resources/__init__.py
index 4248703..780e67a 100644
--- a/webservice/bam/application/modules/resources/__init__.py
+++ b/webservice/bam/application/modules/resources/__init__.py
@@ -116,7 +116,6 @@ class FileAPI(Resource):
         super(FileAPI, self).__init__()
 
     def get(self, project_name):
-        filepath = request.args['filepath']
         command = request.args['command']
         command_args = request.args.get('arguments')
         if command_args is not None:
@@ -125,6 +124,7 @@ class FileAPI(Resource):
         project = Project.query.filter_by(name=project_name).first()
 
         if command == 'info':
+            filepath = request.args['filepath']
 
             r = svn.local.LocalClient(project.repository_path)
             svn_log = r.log_default(None, None, 5, filepath)
@@ -147,6 +147,7 @@ class FileAPI(Resource):
                 bundle_status=bundle_status)
 
         elif command == 'bundle':
+            filepath = request.args['filepath']
             #return jsonify(filepath=filepath, status="building")
             filepath = os.path.join(project.repository_path, filepath)
 
@@ -173,6 +174,7 @@ class FileAPI(Resource):
                         project.repository_path,
                         True,
                         report,
+                        'ZIP',
                         ):
                     pass
 
@@ -218,6 +220,7 @@ class FileAPI(Resource):
             return jsonify(filepath=filepath, status="building")
 
         elif command == 'checkout':
+            filepath = request.args['filepath']
             filepath = os.path.join(project.repository_path, filepath)
 
             if not os.path.exists(filepath):
@@ -249,6 +252,10 @@ class FileAPI(Resource):
                         project.repository_path,
                         command_args['all_deps'],
                         report,
+                        # we don't infact pack any files here,
+                        # only return a list of files we _would_ pack.
+                        # see: checkout_download
+                        'NONE',
                         )
 
                 # TODO, handle fail
@@ -270,6 +277,57 @@ class FileAPI(Resource):
 
             # return Response(f, direct_passthrough=True)
             return Response(response_message_iter(), direct_passthrough=True)
+        elif command == 'checkout_download':
+            files = command_args['files']
+
+            def response_message_iter():
+                ID_MESSAGE = 1
+                ID_PAYLOAD = 2
+                ID_PAYLOAD_EMPTY = 3
+                ID_DONE = 4
+                # ID_PAYLOAD_APPEND = 3
+                import struct
+
+                def report(txt):
+                    txt_bytes = txt.encode('utf-8')
+                    return struct.pack('<II', ID_MESSAGE, len(txt_bytes)) + txt_bytes
+
+                yield b'BAM\0'
+
+                # pack the file!
+                import tempfile
+
+                # weak! (ignore original opened file)
+                '''
+                filepath_zip = tempfile.mkstemp(suffix=".zip")
+                os.close(filepath_zip[0])
+                filepath_zip = filepath_zip[1]
+                '''
+
+                for f_rel in files:
+                    f_abs = os.path.join(project.repository_path, f_rel)
+                    if os.path.exists(f_abs):
+                        yield report("%s: %r\n" % ("downloading", f_rel))
+                        # send over files
+                        with open(f_abs, 'rb') as f:
+                            f.seek(0, os.SEEK_END)
+                            f_size = f.tell()
+                            f.seek(0, os.SEEK_SET)
+
+                            yield struct.pack('<II', ID_PAYLOAD, f_size)
+                            while True:
+                                data = f.read(1024)
+                                if not data:
+                                    break
+                                yield data
+                    else:
+                        yield report("%s: %r\n" % ("source missing", f_rel))
+                        yield struct.pack('<II', ID_PAYLOAD_EMPTY, 0)
+
+
+                yield struct.pack('<II', ID_DONE, 0)
+
+            return Response(response_message_iter(), direct_passthrough=True)
 
         else:
             return jsonify(message="Command unknown")
@@ -379,7 +437,7 @@ class FileAPI(Resource):
             return jsonify(message='File not allowed')
 
     @staticmethod
-    def pack_fn(filepath, filepath_zip, paths_remap_relbase, all_deps, report):
+    def pack_fn(filepath, filepath_zip, paths_remap_relbase, all_deps, report, mode):
         """
         'paths_remap_relbase' is the project path,
         we want all paths to be relative to this so we don't get server path included.
@@ -403,7 +461,7 @@ class FileAPI(Resource):
 
             try:
                 yield from blendfile_pack.pack(
-                        filepath.encode('utf-8'), filepath_zip.encode('utf-8'), mode='ZIP',
+                        filepath.encode('utf-8'), filepath_zip.encode('utf-8'), mode=mode,
                         paths_remap_relbase=paths_remap_relbase.encode('utf-8'),
                         deps_remap=deps_remap, paths_remap=paths_remap, paths_uuid=paths_uuid,
                         all_deps=all_deps,