From 2b0ad243d179f81baad24ccd9748a4ba59017d25 Mon Sep 17 00:00:00 2001 From: epriestley Date: Tue, 14 Mar 2017 14:56:21 -0700 Subject: [PATCH] Use "git ls-remote" to guess if "git fetch" is a no-op Summary: Ref T12296. Ref T12392. Currently, when we're observing a remote repository, we periodically run `git fetch ...`. Instead, periodically run `git ls-remote` (to list refs in the remote) and `git for-each-ref` (to list local refs) and only continue if the two lists are different. The motivations for this are: - In T12296, it appears that doing this is //faster// than doing a no-op `git fetch`. This effect seems to reproduce locally in a clean environment (900ms for `ls-remote` + 100ms for `for-each-ref` vs about 1.4s for `fetch`). I don't have any explanation for why this is, but there it is. This isn't a huge change, although the time we're saving does appear to mostly be local CPU time, which is good for us. - Because we control all writes, we could cache `git for-each-ref` in the future and do fewer disk operations. This doesn't necessarily seem too valuable, though. - This allows us to tell if a fetch will do anything or not, and make better decisions around clustering (in particular, simplify how observed repository versioning works). With `git fetch`, we can't easily distinguish between "fetch, but nothing changed" and "legitimate fetch". If a repository updates very regularly we end up doing slightly more work this way (that is, if `ls-remote` always comes back with changes, we do a little extra work), but this is normally very rare. This might not get non-bare repositories quite right in some cases (i.e., incorrectly detect them as changed when they are unchanged) but we haven't created non-bare repositories for many years. Test Plan: Ran `bin/repository update --trace --verbose PHABX`, saw sensible construction of local and remote maps and accurate detection of whether a fetch would do anything or not. Reviewers: chad Reviewed By: chad Maniphest Tasks: T12392, T12296 Differential Revision: https://secure.phabricator.com/D17497 --- .../PhabricatorRepositoryPullEngine.php | 88 ++++++++++++++++++- 1 file changed, 85 insertions(+), 3 deletions(-) diff --git a/src/applications/repository/engine/PhabricatorRepositoryPullEngine.php b/src/applications/repository/engine/PhabricatorRepositoryPullEngine.php index 610bbbfc55..8e6b61d759 100644 --- a/src/applications/repository/engine/PhabricatorRepositoryPullEngine.php +++ b/src/applications/repository/engine/PhabricatorRepositoryPullEngine.php @@ -120,6 +120,7 @@ final class PhabricatorRepositoryPullEngine pht( 'Updating the working copy for repository "%s".', $repository->getDisplayName())); + if ($is_git) { $this->verifyGitOrigin($repository); $this->executeGitUpdate(); @@ -157,7 +158,7 @@ final class PhabricatorRepositoryPullEngine } private function skipPull($message) { - $this->log('%s', $message); + $this->log($message); $this->donePull(); } @@ -172,7 +173,7 @@ final class PhabricatorRepositoryPullEngine } private function logPull($message) { - $this->log('%s', $message); + $this->log($message); } private function donePull() { @@ -190,7 +191,7 @@ final class PhabricatorRepositoryPullEngine } private function installHook($path, array $hook_argv = array()) { - $this->log('%s', pht('Installing commit hook to "%s"...', $path)); + $this->log(pht('Installing commit hook to "%s"...', $path)); $repository = $this->getRepository(); $identifier = $this->getHookContextIdentifier($repository); @@ -339,6 +340,18 @@ final class PhabricatorRepositoryPullEngine throw new Exception($message); } + $remote_refs = $this->loadGitRemoteRefs($repository); + $local_refs = $this->loadGitLocalRefs($repository); + if ($remote_refs === $local_refs) { + $this->log( + pht( + 'Skipping fetch because local and remote refs are already '. + 'identical.')); + return false; + } + + $this->logRefDifferences($remote_refs, $local_refs); + $retry = false; do { // This is a local command, but needs credentials. @@ -396,6 +409,75 @@ final class PhabricatorRepositoryPullEngine $this->installHook($root.$path); } + private function loadGitRemoteRefs(PhabricatorRepository $repository) { + $remote_envelope = $repository->getRemoteURIEnvelope(); + + list($stdout) = $repository->execxRemoteCommand( + 'ls-remote -- %P', + $remote_envelope); + + $map = array(); + $lines = phutil_split_lines($stdout, false); + foreach ($lines as $line) { + list($hash, $name) = preg_split('/\s+/', $line, 2); + + // If the remote has a HEAD, just ignore it. + if ($name == 'HEAD') { + continue; + } + + // If the remote ref is itself a remote ref, ignore it. + if (preg_match('(^refs/remotes/)', $name)) { + continue; + } + + $map[$name] = $hash; + } + + ksort($map); + + return $map; + } + + private function loadGitLocalRefs(PhabricatorRepository $repository) { + $refs = id(new DiffusionLowLevelGitRefQuery()) + ->setRepository($repository) + ->execute(); + + $map = array(); + foreach ($refs as $ref) { + $fields = $ref->getRawFields(); + $map[idx($fields, 'refname')] = $ref->getCommitIdentifier(); + } + + ksort($map); + + return $map; + } + + private function logRefDifferences(array $remote, array $local) { + $all = $local + $remote; + + $differences = array(); + foreach ($all as $key => $ignored) { + $remote_ref = idx($remote, $key, pht('')); + $local_ref = idx($local, $key, pht('')); + if ($remote_ref !== $local_ref) { + $differences[] = pht( + '%s (remote: "%s", local: "%s")', + $key, + $remote_ref, + $local_ref); + } + } + + $this->log( + pht( + "Updating repository after detecting ref differences:\n%s", + implode("\n", $differences))); + } + + /* -( Pulling Mercurial Working Copies )----------------------------------- */