From 0759b84d77c079374a3d24afc3b65b66554384ee Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 6 Jan 2016 15:34:05 -0800 Subject: [PATCH] Improve construction of commit queries from blame lookups Summary: Ref T2450. File blame tends to have the same commit a lot of times, and we don't do lookups like this efficiently right now. In particular, for a file like `__phutil_library_map__.php`, we would issue a query with ~9,000 clauses like this: ``` (repositoryID = 1 AND commitIdentifier LIKE "XYZ%") ``` ...but only a few hundred of those identifiers were unique. Instead, issue only one clause per unique identifier. MySQL also seems to do a little better on "commitIdentifier = X" if we have the full hash, so special case that slightly. Test Plan: - Issuing a query for only unique identifiers dropped the cost from 400ms to 100ms locally. - Swapping to `=` if we have the full hash dropped the cost from 100ms to 75ms locally. Reviewers: chad Reviewed By: chad Maniphest Tasks: T2450 Differential Revision: https://secure.phabricator.com/D14962 --- .../diffusion/query/DiffusionCommitQuery.php | 34 ++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/src/applications/diffusion/query/DiffusionCommitQuery.php b/src/applications/diffusion/query/DiffusionCommitQuery.php index e8f499955a..ec76468332 100644 --- a/src/applications/diffusion/query/DiffusionCommitQuery.php +++ b/src/applications/diffusion/query/DiffusionCommitQuery.php @@ -51,6 +51,11 @@ final class DiffusionCommitQuery * they queried for. */ public function withIdentifiers(array $identifiers) { + // Some workflows (like blame lookups) can pass in large numbers of + // duplicate identifiers. We only care about unique identifiers, so + // get rid of duplicates immediately. + $identifiers = array_fuse($identifiers); + $this->identifiers = $identifiers; return $this; } @@ -185,7 +190,7 @@ final class DiffusionCommitQuery // Build the identifierMap if ($this->identifiers !== null) { - $ids = array_fuse($this->identifiers); + $ids = $this->identifiers; $prefixes = array( 'r'.$commit->getRepository()->getCallsign(), 'r'.$commit->getRepository()->getCallsign().':', @@ -395,7 +400,6 @@ final class DiffusionCommitQuery $repos->execute(); $repos = $repos->getIdentifierMap(); - foreach ($refs as $key => $ref) { $repo = idx($repos, $ref['callsign']); @@ -404,7 +408,7 @@ final class DiffusionCommitQuery } if ($repo->isSVN()) { - if (!ctype_digit($ref['identifier'])) { + if (!ctype_digit((string)$ref['identifier'])) { continue; } $sql[] = qsprintf( @@ -419,11 +423,25 @@ final class DiffusionCommitQuery if (strlen($ref['identifier']) < $min_qualified) { continue; } - $sql[] = qsprintf( - $conn, - '(commit.repositoryID = %d AND commit.commitIdentifier LIKE %>)', - $repo->getID(), - $ref['identifier']); + + $identifier = $ref['identifier']; + if (strlen($identifier) == 40) { + // MySQL seems to do slightly better with this version if the + // clause, so issue it if we have a full commit hash. + $sql[] = qsprintf( + $conn, + '(commit.repositoryID = %d + AND commit.commitIdentifier = %s)', + $repo->getID(), + $identifier); + } else { + $sql[] = qsprintf( + $conn, + '(commit.repositoryID = %d + AND commit.commitIdentifier LIKE %>)', + $repo->getID(), + $identifier); + } } } }