From cb033673b6eb3dc8330d2ddea0fd358eae3b939a Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 15 Nov 2018 08:11:49 -0800 Subject: [PATCH] Unify intracluster sync and Drydock working copy construction timeouts as a repository "copy time limit" Summary: Depends on D19814. Ref T13216. See PHI885. For various eldritch reasons, `git fetch` can hang. Although we'd probably like to fix this with `git fetch --require-sustained-network-transfer-rate=512KB/5s` or similar, that flag doesn't exist and we don't have a reasonable way to build it. Short of that, move toward formalizing a repository "copy time limit": the longest amount of time anything may spend trying to make a copy of this repository. This grows out of the existing intracluster sync limit, which is effectively the same thing. Here, apply it to `git clone` and `git fetch` in Drydock working copy construction, too. A future change may make it configurable. Test Plan: - Set the limit to 0.001. - Tried to build and lease working copies, got sensible timeout errors (see D19815). ``` Lease activation failed: [CommandException] Command killed by timeout after running for more than 0.001 seconds. COMMAND ssh '-o' 'LogLevel=quiet' '-o' 'StrictHostKeyChecking=no' '-o' 'UserKnownHostsFile=/dev/null' '-o' 'BatchMode=yes' -l '********' -p '2222' -i '********' '127.0.0.1' -- '(cd '\''/var/drydock/workingcopy-163/repo/spellbook/'\'' && git clean -d --force && git fetch && git reset --hard)' ``` Reviewers: amckinley Reviewed By: amckinley Subscribers: yelirekim, PHID-OPKG-gm6ozazyms6q6i22gyam Maniphest Tasks: T13216 Differential Revision: https://secure.phabricator.com/D19816 --- .../protocol/DiffusionCommandEngine.php | 3 +- ...dockWorkingCopyBlueprintImplementation.php | 34 +++++++++++++++---- .../drydock/storage/DrydockSlotLock.php | 4 +-- .../storage/PhabricatorRepository.php | 13 +++++++ .../storage/PhabricatorRepositoryURIIndex.php | 10 +++--- 5 files changed, 50 insertions(+), 14 deletions(-) diff --git a/src/applications/diffusion/protocol/DiffusionCommandEngine.php b/src/applications/diffusion/protocol/DiffusionCommandEngine.php index d7da62b11d..9d52482b23 100644 --- a/src/applications/diffusion/protocol/DiffusionCommandEngine.php +++ b/src/applications/diffusion/protocol/DiffusionCommandEngine.php @@ -138,7 +138,8 @@ abstract class DiffusionCommandEngine extends Phobject { // See T13108. By default, don't let any cluster command run indefinitely // to try to avoid cases where `git fetch` hangs for some reason and we're // left sitting with a held lock forever. - $future->setTimeout(phutil_units('15 minutes in seconds')); + $repository = $this->getRepository(); + $future->setTimeout($repository->getCopyTimeLimit()); return $future; } diff --git a/src/applications/drydock/blueprint/DrydockWorkingCopyBlueprintImplementation.php b/src/applications/drydock/blueprint/DrydockWorkingCopyBlueprintImplementation.php index c1672f99fb..eb3a361ea8 100644 --- a/src/applications/drydock/blueprint/DrydockWorkingCopyBlueprintImplementation.php +++ b/src/applications/drydock/blueprint/DrydockWorkingCopyBlueprintImplementation.php @@ -173,6 +173,7 @@ final class DrydockWorkingCopyBlueprintImplementation $map = $resource->getAttribute('repositories.map'); + $futures = array(); $repositories = $this->loadRepositories(ipull($map, 'phid')); foreach ($map as $directory => $spec) { // TODO: Validate directory isn't goofy like "/etc" or "../../lol" @@ -181,11 +182,18 @@ final class DrydockWorkingCopyBlueprintImplementation $repository = $repositories[$spec['phid']]; $path = "{$root}/repo/{$directory}/"; - // TODO: Run these in parallel? - $interface->execx( + $future = $interface->getExecFuture( 'git clone -- %s %s', (string)$repository->getCloneURIObject(), $path); + + $future->setTimeout($repository->getCopyTimeLimit()); + + $futures[$directory] = $future; + } + + foreach (new FutureIterator($futures) as $key => $future) { + $future->resolvex(); } $resource @@ -240,8 +248,12 @@ final class DrydockWorkingCopyBlueprintImplementation $map = $lease->getAttribute('repositories.map'); $root = $resource->getAttribute('workingcopy.root'); + $repositories = $this->loadRepositories(ipull($map, 'phid')); + $default = null; foreach ($map as $directory => $spec) { + $repository = $repositories[$spec['phid']]; + $interface->pushWorkingDirectory("{$root}/repo/{$directory}/"); $cmd = array(); @@ -271,7 +283,9 @@ final class DrydockWorkingCopyBlueprintImplementation $arg[] = $branch; } - $this->execxv($interface, $cmd, $arg); + $this->newExecvFuture($interface, $cmd, $arg) + ->setTimeout($repository->getCopyTimeLimit()) + ->resolvex(); if (idx($spec, 'default')) { $default = $directory; @@ -295,7 +309,9 @@ final class DrydockWorkingCopyBlueprintImplementation $arg[] = $ref_ref; try { - $this->execxv($interface, $cmd, $arg); + $this->newExecvFuture($interface, $cmd, $arg) + ->setTimeout($repository->getCopyTimeLimit()) + ->resolvex(); } catch (CommandException $ex) { $display_command = csprintf( 'git fetch %R %R', @@ -509,12 +525,18 @@ final class DrydockWorkingCopyBlueprintImplementation DrydockCommandInterface $interface, array $commands, array $arguments) { + return $this->newExecvFuture($interface, $commands, $arguments)->resolvex(); + } + + private function newExecvFuture( + DrydockCommandInterface $interface, + array $commands, + array $arguments) { $commands = implode(' && ', $commands); $argv = array_merge(array($commands), $arguments); - return call_user_func_array(array($interface, 'execx'), $argv); + return call_user_func_array(array($interface, 'getExecFuture'), $argv); } - } diff --git a/src/applications/drydock/storage/DrydockSlotLock.php b/src/applications/drydock/storage/DrydockSlotLock.php index 7e4e320946..7a9bce8b3f 100644 --- a/src/applications/drydock/storage/DrydockSlotLock.php +++ b/src/applications/drydock/storage/DrydockSlotLock.php @@ -140,9 +140,9 @@ final class DrydockSlotLock extends DrydockDAO { try { queryfx( $conn_w, - 'INSERT INTO %T (ownerPHID, lockIndex, lockKey) VALUES %Q', + 'INSERT INTO %T (ownerPHID, lockIndex, lockKey) VALUES %LQ', $table->getTableName(), - implode(', ', $sql)); + $sql); } catch (AphrontDuplicateKeyQueryException $ex) { // Try to improve the readability of the exception. We might miss on // this query if the lock has already been released, but most of the diff --git a/src/applications/repository/storage/PhabricatorRepository.php b/src/applications/repository/storage/PhabricatorRepository.php index 52824c2aa4..2b0d63cf47 100644 --- a/src/applications/repository/storage/PhabricatorRepository.php +++ b/src/applications/repository/storage/PhabricatorRepository.php @@ -1889,6 +1889,19 @@ final class PhabricatorRepository extends PhabricatorRepositoryDAO } + /** + * Time limit for cloning or copying this repository. + * + * This limit is used to timeout operations like `git clone` or `git fetch` + * when doing intracluster synchronization, building working copies, etc. + * + * @return int Maximum number of seconds to spend copying this repository. + */ + public function getCopyTimeLimit() { + return phutil_units('15 minutes in seconds'); + } + + /** * Retrieve the service URI for the device hosting this repository. * diff --git a/src/applications/repository/storage/PhabricatorRepositoryURIIndex.php b/src/applications/repository/storage/PhabricatorRepositoryURIIndex.php index 2cfa0c4471..3ef41f8089 100644 --- a/src/applications/repository/storage/PhabricatorRepositoryURIIndex.php +++ b/src/applications/repository/storage/PhabricatorRepositoryURIIndex.php @@ -48,16 +48,16 @@ final class PhabricatorRepositoryURIIndex queryfx( $conn_w, - 'DELETE FROM %T WHERE repositoryPHID = %s', - $table->getTableName(), + 'DELETE FROM %R WHERE repositoryPHID = %s', + $table, $repository_phid); if ($sql) { queryfx( $conn_w, - 'INSERT INTO %T (repositoryPHID, repositoryURI) VALUES %Q', - $table->getTableName(), - implode(', ', $sql)); + 'INSERT INTO %R (repositoryPHID, repositoryURI) VALUES %LQ', + $table, + $sql); } $table->saveTransaction();