From 2d2a96a33488c5a7259dd43804ba2ff5c8affcdb Mon Sep 17 00:00:00 2001 From: Martin Poirier Date: Sun, 14 Nov 2010 22:11:56 +0000 Subject: [PATCH] netrender Adding retries on initial slave connection. --- release/scripts/io/netrender/slave.py | 36 ++++++++++++++++++++------- release/scripts/io/netrender/utils.py | 27 ++++++++++++++++++-- 2 files changed, 52 insertions(+), 11 deletions(-) diff --git a/release/scripts/io/netrender/slave.py b/release/scripts/io/netrender/slave.py index 2613983e53b..12f8fdbf986 100644 --- a/release/scripts/io/netrender/slave.py +++ b/release/scripts/io/netrender/slave.py @@ -32,6 +32,7 @@ BLENDER_PATH = sys.argv[0] CANCEL_POLL_SPEED = 2 MAX_TIMEOUT = 10 INCREMENT_TIMEOUT = 1 +MAX_CONNECT_TRY = 10 try: system = platform.system() except UnicodeDecodeError: @@ -111,13 +112,36 @@ def testFile(conn, job_id, slave_id, rfile, JOB_PREFIX, main_path = None): return job_full_path +def breakable_timeout(timeout): + for i in range(timeout): + time.sleep(1) + if engine.test_break(): + break + def render_slave(engine, netsettings, threads): timeout = 1 + + bisleep = BreakableIncrementedSleep(INCREMENT_TIMEOUT, 1, MAX_TIMEOUT, engine.test_break) engine.update_stats("", "Network render node initiation") conn = clientConnection(netsettings.server_address, netsettings.server_port) - + + if not conn: + timeout = 1 + print("Connection failed, will try connecting again at most %i times" % MAX_CONNECT_TRY) + bisleep.reset() + + for i in range(MAX_CONNECT_TRY): + bisleep.sleep() + + conn = clientConnection(netsettings.server_address, netsettings.server_port) + + if conn or engine.test_break(): + break + + print("Retry %i failed, waiting %is before retrying" % (i + 1, bisleep.current)) + if conn: conn.request("POST", "/slave", json.dumps(slave_Info().serialize())) response = conn.getresponse() @@ -136,7 +160,7 @@ def render_slave(engine, netsettings, threads): response = conn.getresponse() if response.status == http.client.OK: - timeout = 1 # reset timeout on new job + bisleep.reset() job = netrender.model.RenderJob.materialize(json.loads(str(response.read(), encoding='utf8'))) engine.update_stats("", "Network render processing job from master") @@ -309,13 +333,7 @@ def render_slave(engine, netsettings, threads): engine.update_stats("", "Network render connected to master, waiting for jobs") else: - if timeout < MAX_TIMEOUT: - timeout += INCREMENT_TIMEOUT - - for i in range(timeout): - time.sleep(1) - if engine.test_break(): - break + bisleep.sleep() conn.close() diff --git a/release/scripts/io/netrender/utils.py b/release/scripts/io/netrender/utils.py index 2c735f9d5f7..ef0c96a907e 100644 --- a/release/scripts/io/netrender/utils.py +++ b/release/scripts/io/netrender/utils.py @@ -68,6 +68,28 @@ class DirectoryContext: def __exit__(self, exc_type, exc_value, traceback): os.chdir(self.curdir) +class BreakableIncrementedSleep: + def __init__(self, increment, default_timeout, max_timeout, break_fct): + self.increment = increment + self.default = default_timeout + self.max = max_timeout + self.current = self.default + self.break_fct = break_fct + + def reset(self): + self.current = self.default + + def increase(self): + self.current = min(self.current + self.increment, self.max) + + def sleep(self): + for i in range(self.current): + time.sleep(1) + if self.break_fct(): + break + + self.increase() + def responseStatus(conn): response = conn.getresponse() response.read() @@ -130,12 +152,13 @@ def clientConnection(address, port, report = None, scan = True): else: conn.close() reporting(report, "Incorrect master version", ValueError) - except Exception as err: + except BaseException as err: if report: report('ERROR', str(err)) return None else: - raise + print(err) + return None def clientVerifyVersion(conn): conn.request("GET", "/version")