Improve construction of commit queries from blame lookups
Summary: Ref T2450. File blame tends to have the same commit a lot of times, and we don't do lookups like this efficiently right now. In particular, for a file like `__phutil_library_map__.php`, we would issue a query with ~9,000 clauses like this: ``` (repositoryID = 1 AND commitIdentifier LIKE "XYZ%") ``` ...but only a few hundred of those identifiers were unique. Instead, issue only one clause per unique identifier. MySQL also seems to do a little better on "commitIdentifier = X" if we have the full hash, so special case that slightly. Test Plan: - Issuing a query for only unique identifiers dropped the cost from 400ms to 100ms locally. - Swapping to `=` if we have the full hash dropped the cost from 100ms to 75ms locally. Reviewers: chad Reviewed By: chad Maniphest Tasks: T2450 Differential Revision: https://secure.phabricator.com/D14962
This commit is contained in:
		| @@ -51,6 +51,11 @@ final class DiffusionCommitQuery | |||||||
|    * they queried for. |    * they queried for. | ||||||
|    */ |    */ | ||||||
|   public function withIdentifiers(array $identifiers) { |   public function withIdentifiers(array $identifiers) { | ||||||
|  |     // Some workflows (like blame lookups) can pass in large numbers of | ||||||
|  |     // duplicate identifiers. We only care about unique identifiers, so | ||||||
|  |     // get rid of duplicates immediately. | ||||||
|  |     $identifiers = array_fuse($identifiers); | ||||||
|  |  | ||||||
|     $this->identifiers = $identifiers; |     $this->identifiers = $identifiers; | ||||||
|     return $this; |     return $this; | ||||||
|   } |   } | ||||||
| @@ -185,7 +190,7 @@ final class DiffusionCommitQuery | |||||||
|  |  | ||||||
|       // Build the identifierMap |       // Build the identifierMap | ||||||
|       if ($this->identifiers !== null) { |       if ($this->identifiers !== null) { | ||||||
|         $ids = array_fuse($this->identifiers); |         $ids = $this->identifiers; | ||||||
|         $prefixes = array( |         $prefixes = array( | ||||||
|           'r'.$commit->getRepository()->getCallsign(), |           'r'.$commit->getRepository()->getCallsign(), | ||||||
|           'r'.$commit->getRepository()->getCallsign().':', |           'r'.$commit->getRepository()->getCallsign().':', | ||||||
| @@ -395,7 +400,6 @@ final class DiffusionCommitQuery | |||||||
|         $repos->execute(); |         $repos->execute(); | ||||||
|  |  | ||||||
|         $repos = $repos->getIdentifierMap(); |         $repos = $repos->getIdentifierMap(); | ||||||
|  |  | ||||||
|         foreach ($refs as $key => $ref) { |         foreach ($refs as $key => $ref) { | ||||||
|           $repo = idx($repos, $ref['callsign']); |           $repo = idx($repos, $ref['callsign']); | ||||||
|  |  | ||||||
| @@ -404,7 +408,7 @@ final class DiffusionCommitQuery | |||||||
|           } |           } | ||||||
|  |  | ||||||
|           if ($repo->isSVN()) { |           if ($repo->isSVN()) { | ||||||
|             if (!ctype_digit($ref['identifier'])) { |             if (!ctype_digit((string)$ref['identifier'])) { | ||||||
|               continue; |               continue; | ||||||
|             } |             } | ||||||
|             $sql[] = qsprintf( |             $sql[] = qsprintf( | ||||||
| @@ -419,11 +423,25 @@ final class DiffusionCommitQuery | |||||||
|             if (strlen($ref['identifier']) < $min_qualified) { |             if (strlen($ref['identifier']) < $min_qualified) { | ||||||
|               continue; |               continue; | ||||||
|             } |             } | ||||||
|             $sql[] = qsprintf( |  | ||||||
|               $conn, |             $identifier = $ref['identifier']; | ||||||
|               '(commit.repositoryID = %d AND commit.commitIdentifier LIKE %>)', |             if (strlen($identifier) == 40) { | ||||||
|               $repo->getID(), |               // MySQL seems to do slightly better with this version if the | ||||||
|               $ref['identifier']); |               // clause, so issue it if we have a full commit hash. | ||||||
|  |               $sql[] = qsprintf( | ||||||
|  |                 $conn, | ||||||
|  |                 '(commit.repositoryID = %d | ||||||
|  |                   AND commit.commitIdentifier = %s)', | ||||||
|  |                 $repo->getID(), | ||||||
|  |                 $identifier); | ||||||
|  |             } else { | ||||||
|  |               $sql[] = qsprintf( | ||||||
|  |                 $conn, | ||||||
|  |                 '(commit.repositoryID = %d | ||||||
|  |                   AND commit.commitIdentifier LIKE %>)', | ||||||
|  |                 $repo->getID(), | ||||||
|  |                 $identifier); | ||||||
|  |             } | ||||||
|           } |           } | ||||||
|         } |         } | ||||||
|       } |       } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 epriestley
					epriestley