Make repository daemon locks more granular and forgiving
Summary: Ref T4292. Currently, we hold one big lock around the whole `bin/repository update` workflow. When running multiple daemons on different hosts, this lock can end up being contentious. In particular, we'll hold it during `git fetch` on every host globally, even though it's only useful to hold it locally per-device (that is, it's fine/good/expected if `repo001` and `repo002` happen to be fetching from a repository they are observing at the same time). Instead, split it into two locks: - One lock is scoped to the current device, and held during pull (usually `git fetch`). This just keeps multiple daemons accidentally running on the same host from making a mess when trying to initialize or update a working copy. - One lock is scoped globally, and held during discovery. This makes sure daemons on different hosts don't step on each other when updating the database. If we fail to acquire either lock, assume some other process is legitimately doing the work and bail more quietly instead of fataling. In approximately 100% of cases where users have hit this lock contention, that was the case: some other daemon was running somewhere doing the work and the error didn't actually represent an issue. If there's an actual problem, we still raise a diagnostically useful message if you run `bin/repository update` manually, so there are still tools to figure out that something is hung or whatever. Test Plan: - Ran `bin/repository update`, `pull`, `discover`. - Added `sleep(5)`, forced processes to contend, got lock exceptions and graceful exit with diagnostic message. Reviewers: chad Reviewed By: chad Maniphest Tasks: T4292 Differential Revision: https://secure.phabricator.com/D15903
This commit is contained in:
		| @@ -622,6 +622,7 @@ phutil_register_library_map(array( | ||||
|     'DiffusionController' => 'applications/diffusion/controller/DiffusionController.php', | ||||
|     'DiffusionCreateCommentConduitAPIMethod' => 'applications/diffusion/conduit/DiffusionCreateCommentConduitAPIMethod.php', | ||||
|     'DiffusionCreateRepositoriesCapability' => 'applications/diffusion/capability/DiffusionCreateRepositoriesCapability.php', | ||||
|     'DiffusionDaemonLockException' => 'applications/diffusion/exception/DiffusionDaemonLockException.php', | ||||
|     'DiffusionDefaultEditCapability' => 'applications/diffusion/capability/DiffusionDefaultEditCapability.php', | ||||
|     'DiffusionDefaultPushCapability' => 'applications/diffusion/capability/DiffusionDefaultPushCapability.php', | ||||
|     'DiffusionDefaultViewCapability' => 'applications/diffusion/capability/DiffusionDefaultViewCapability.php', | ||||
| @@ -4845,6 +4846,7 @@ phutil_register_library_map(array( | ||||
|     'DiffusionController' => 'PhabricatorController', | ||||
|     'DiffusionCreateCommentConduitAPIMethod' => 'DiffusionConduitAPIMethod', | ||||
|     'DiffusionCreateRepositoriesCapability' => 'PhabricatorPolicyCapability', | ||||
|     'DiffusionDaemonLockException' => 'Exception', | ||||
|     'DiffusionDefaultEditCapability' => 'PhabricatorPolicyCapability', | ||||
|     'DiffusionDefaultPushCapability' => 'PhabricatorPolicyCapability', | ||||
|     'DiffusionDefaultViewCapability' => 'PhabricatorPolicyCapability', | ||||
|   | ||||
| @@ -10,6 +10,14 @@ final class AlmanacKeys extends Phobject { | ||||
|   } | ||||
|  | ||||
|   public static function getDeviceID() { | ||||
|     // While running unit tests, ignore any configured device identity. | ||||
|     try { | ||||
|       PhabricatorTestCase::assertExecutingUnitTests(); | ||||
|       return null; | ||||
|     } catch (Exception $ex) { | ||||
|       // Continue normally. | ||||
|     } | ||||
|  | ||||
|     $device_id_path = self::getKeyPath('device.id'); | ||||
|  | ||||
|     if (Filesystem::pathExists($device_id_path)) { | ||||
|   | ||||
| @@ -0,0 +1,3 @@ | ||||
| <?php | ||||
|  | ||||
| final class DiffusionDaemonLockException extends Exception {} | ||||
| @@ -37,6 +37,33 @@ final class PhabricatorRepositoryDiscoveryEngine | ||||
|   public function discoverCommits() { | ||||
|     $repository = $this->getRepository(); | ||||
|  | ||||
|     $lock = $this->newRepositoryLock($repository, 'repo.look', false); | ||||
|  | ||||
|     try { | ||||
|       $lock->lock(); | ||||
|     } catch (PhutilLockException $ex) { | ||||
|       throw new DiffusionDaemonLockException( | ||||
|         pht( | ||||
|           'Another process is currently discovering repository "%s", '. | ||||
|           'skipping discovery.', | ||||
|           $repository->getDisplayName())); | ||||
|     } | ||||
|  | ||||
|     try { | ||||
|       $result = $this->discoverCommitsWithLock(); | ||||
|     } catch (Exception $ex) { | ||||
|       $lock->unlock(); | ||||
|       throw $ex; | ||||
|     } | ||||
|  | ||||
|     $lock->unlock(); | ||||
|  | ||||
|     return $result; | ||||
|   } | ||||
|  | ||||
|   private function discoverCommitsWithLock() { | ||||
|     $repository = $this->getRepository(); | ||||
|  | ||||
|     $vcs = $repository->getVersionControlSystem(); | ||||
|     switch ($vcs) { | ||||
|       case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: | ||||
|   | ||||
| @@ -51,6 +51,27 @@ abstract class PhabricatorRepositoryEngine extends Phobject { | ||||
|     return PhabricatorUser::getOmnipotentUser(); | ||||
|   } | ||||
|  | ||||
|   protected function newRepositoryLock( | ||||
|     PhabricatorRepository $repository, | ||||
|     $lock_key, | ||||
|     $lock_device_only) { | ||||
|  | ||||
|     $lock_parts = array(); | ||||
|     $lock_parts[] = $lock_key; | ||||
|     $lock_parts[] = $repository->getID(); | ||||
|  | ||||
|     if ($lock_device_only) { | ||||
|       $device = AlmanacKeys::getLiveDevice(); | ||||
|       if ($device) { | ||||
|         $lock_parts[] = $device->getID(); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     $lock_name = implode(':', $lock_parts); | ||||
|     return PhabricatorGlobalLock::newLock($lock_name); | ||||
|   } | ||||
|  | ||||
|  | ||||
|   /** | ||||
|    * Verify that the "origin" remote exists, and points at the correct URI. | ||||
|    * | ||||
|   | ||||
| @@ -23,6 +23,33 @@ final class PhabricatorRepositoryPullEngine | ||||
|  | ||||
|   public function pullRepository() { | ||||
|     $repository = $this->getRepository(); | ||||
|  | ||||
|     $lock = $this->newRepositoryLock($repository, 'repo.pull', true); | ||||
|  | ||||
|     try { | ||||
|       $lock->lock(); | ||||
|     } catch (PhutilLockException $ex) { | ||||
|       throw new DiffusionDaemonLockException( | ||||
|         pht( | ||||
|           'Another process is currently updating repository "%s", '. | ||||
|           'skipping pull.', | ||||
|           $repository->getDisplayName())); | ||||
|     } | ||||
|  | ||||
|     try { | ||||
|       $result = $this->pullRepositoryWithLock(); | ||||
|     } catch (Exception $ex) { | ||||
|       $lock->unlock(); | ||||
|       throw $ex; | ||||
|     } | ||||
|  | ||||
|     $lock->unlock(); | ||||
|  | ||||
|     return $result; | ||||
|   } | ||||
|  | ||||
|   private function pullRepositoryWithLock() { | ||||
|     $repository = $this->getRepository(); | ||||
|     $viewer = PhabricatorUser::getOmnipotentUser(); | ||||
|  | ||||
|     $is_hg = false; | ||||
|   | ||||
| @@ -53,35 +53,14 @@ final class PhabricatorRepositoryManagementUpdateWorkflow | ||||
|     $repository = head($repos); | ||||
|  | ||||
|     try { | ||||
|       $lock_name = 'repository.update:'.$repository->getID(); | ||||
|       $lock = PhabricatorGlobalLock::newLock($lock_name); | ||||
|  | ||||
|       try { | ||||
|         $lock->lock(); | ||||
|       } catch (PhutilLockException $ex) { | ||||
|         throw new PhutilProxyException( | ||||
|           pht( | ||||
|             'Another process is currently holding the update lock for '. | ||||
|             'repository "%s". Repositories may only be updated by one '. | ||||
|             'process at a time. This can happen if you are running multiple '. | ||||
|             'copies of the daemons. This can also happen if you manually '. | ||||
|             'update a repository while the daemons are also updating it '. | ||||
|             '(in this case, just try again in a few moments).', | ||||
|             $repository->getMonogram()), | ||||
|           $ex); | ||||
|       } | ||||
|  | ||||
|       try { | ||||
|         $no_discovery = $args->getArg('no-discovery'); | ||||
|  | ||||
|       id(new PhabricatorRepositoryPullEngine()) | ||||
|         ->setRepository($repository) | ||||
|         ->setVerbose($this->getVerbose()) | ||||
|         ->pullRepository(); | ||||
|  | ||||
|       $no_discovery = $args->getArg('no-discovery'); | ||||
|       if ($no_discovery) { | ||||
|           $lock->unlock(); | ||||
|           return; | ||||
|         return 0; | ||||
|       } | ||||
|  | ||||
|       // TODO: It would be nice to discover only if we pulled something, but | ||||
| @@ -103,10 +82,13 @@ final class PhabricatorRepositoryManagementUpdateWorkflow | ||||
|       $repository->writeStatusMessage( | ||||
|         PhabricatorRepositoryStatusMessage::TYPE_FETCH, | ||||
|         PhabricatorRepositoryStatusMessage::CODE_OKAY); | ||||
|       } catch (Exception $ex) { | ||||
|         $lock->unlock(); | ||||
|         throw $ex; | ||||
|       } | ||||
|     } catch (DiffusionDaemonLockException $ex) { | ||||
|       // If we miss a pull or discover because some other process is already | ||||
|       // doing the work, just bail out. | ||||
|       echo tsprintf( | ||||
|         "%s\n", | ||||
|         $ex->getMessage()); | ||||
|       return 0; | ||||
|     } catch (Exception $ex) { | ||||
|       $repository->writeStatusMessage( | ||||
|         PhabricatorRepositoryStatusMessage::TYPE_FETCH, | ||||
| @@ -118,12 +100,11 @@ final class PhabricatorRepositoryManagementUpdateWorkflow | ||||
|       throw $ex; | ||||
|     } | ||||
|  | ||||
|     $lock->unlock(); | ||||
|  | ||||
|     $console->writeOut( | ||||
|     echo tsprintf( | ||||
|       "%s\n", | ||||
|       pht( | ||||
|         'Updated repository **%s**.', | ||||
|         $repository->getMonogram())."\n"); | ||||
|         'Updated repository "%s".', | ||||
|         $repository->getDisplayName())); | ||||
|  | ||||
|     return 0; | ||||
|   } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 epriestley
					epriestley