Implement basic ngram search for Owners Package names
Summary: Ref T9979. This uses ngrams (specifically, trigrams) to build a reasonably efficient index for substring matching. Specifically, for a package like "Example", with ID 123, we store rows like this: ``` < ex, 123> <exa, 123> <xam, 123> <amp, 123> <mpl, 123> <ple, 123> <le , 123> ``` When the user searches for `exam`, we join this table for packages with tokens `exa` and `xam`. MySQL can do this a lot more efficiently than it can process a `LIKE "%exam%"` query against a huge table. When the user searches for a one-letter or two-letter string, we only search the beginnings of words. This is probably what they want, the only thing we can do quickly, and a reasonable/expected behavior for typeaheads. Test Plan: - Ran storage upgrades and search indexer. - Searched for stuff with "name contains". - Used typehaead and got sensible results. - Searched for `aabbccddeeffgghhiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz` and saw only 16 joins. Reviewers: chad Reviewed By: chad Maniphest Tasks: T9979 Differential Revision: https://secure.phabricator.com/D14846
This commit is contained in:
		| @@ -0,0 +1,7 @@ | |||||||
|  | CREATE TABLE {$NAMESPACE}_owners.owners_name_ngrams ( | ||||||
|  |   id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, | ||||||
|  |   objectID INT UNSIGNED NOT NULL, | ||||||
|  |   ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, | ||||||
|  |   KEY `key_object` (objectID), | ||||||
|  |   KEY `key_ngram` (ngram, objectID) | ||||||
|  | ) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; | ||||||
							
								
								
									
										11
									
								
								resources/sql/autopatches/20151221.search.3.reindex.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								resources/sql/autopatches/20151221.search.3.reindex.php
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | |||||||
|  | <?php | ||||||
|  |  | ||||||
|  | $table = new PhabricatorOwnersPackage(); | ||||||
|  |  | ||||||
|  | foreach (new LiskMigrationIterator($table) as $package) { | ||||||
|  |   PhabricatorSearchWorker::queueDocumentForIndexing( | ||||||
|  |     $package->getPHID(), | ||||||
|  |     array( | ||||||
|  |       'force' => true, | ||||||
|  |     )); | ||||||
|  | } | ||||||
| @@ -2548,6 +2548,8 @@ phutil_register_library_map(array( | |||||||
|     'PhabricatorNamedQueryQuery' => 'applications/search/query/PhabricatorNamedQueryQuery.php', |     'PhabricatorNamedQueryQuery' => 'applications/search/query/PhabricatorNamedQueryQuery.php', | ||||||
|     'PhabricatorNavigationRemarkupRule' => 'infrastructure/markup/rule/PhabricatorNavigationRemarkupRule.php', |     'PhabricatorNavigationRemarkupRule' => 'infrastructure/markup/rule/PhabricatorNavigationRemarkupRule.php', | ||||||
|     'PhabricatorNeverTriggerClock' => 'infrastructure/daemon/workers/clock/PhabricatorNeverTriggerClock.php', |     'PhabricatorNeverTriggerClock' => 'infrastructure/daemon/workers/clock/PhabricatorNeverTriggerClock.php', | ||||||
|  |     'PhabricatorNgramsIndexEngineExtension' => 'applications/search/engineextension/PhabricatorNgramsIndexEngineExtension.php', | ||||||
|  |     'PhabricatorNgramsInterface' => 'applications/search/interface/PhabricatorNgramsInterface.php', | ||||||
|     'PhabricatorNotificationBuilder' => 'applications/notification/builder/PhabricatorNotificationBuilder.php', |     'PhabricatorNotificationBuilder' => 'applications/notification/builder/PhabricatorNotificationBuilder.php', | ||||||
|     'PhabricatorNotificationClearController' => 'applications/notification/controller/PhabricatorNotificationClearController.php', |     'PhabricatorNotificationClearController' => 'applications/notification/controller/PhabricatorNotificationClearController.php', | ||||||
|     'PhabricatorNotificationClient' => 'applications/notification/client/PhabricatorNotificationClient.php', |     'PhabricatorNotificationClient' => 'applications/notification/client/PhabricatorNotificationClient.php', | ||||||
| @@ -2636,7 +2638,9 @@ phutil_register_library_map(array( | |||||||
|     'PhabricatorOwnersPackage' => 'applications/owners/storage/PhabricatorOwnersPackage.php', |     'PhabricatorOwnersPackage' => 'applications/owners/storage/PhabricatorOwnersPackage.php', | ||||||
|     'PhabricatorOwnersPackageDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageDatasource.php', |     'PhabricatorOwnersPackageDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageDatasource.php', | ||||||
|     'PhabricatorOwnersPackageEditEngine' => 'applications/owners/editor/PhabricatorOwnersPackageEditEngine.php', |     'PhabricatorOwnersPackageEditEngine' => 'applications/owners/editor/PhabricatorOwnersPackageEditEngine.php', | ||||||
|  |     'PhabricatorOwnersPackageFulltextEngine' => 'applications/owners/query/PhabricatorOwnersPackageFulltextEngine.php', | ||||||
|     'PhabricatorOwnersPackageFunctionDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageFunctionDatasource.php', |     'PhabricatorOwnersPackageFunctionDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageFunctionDatasource.php', | ||||||
|  |     'PhabricatorOwnersPackageNameNgrams' => 'applications/owners/storage/PhabricatorOwnersPackageNameNgrams.php', | ||||||
|     'PhabricatorOwnersPackageOwnerDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageOwnerDatasource.php', |     'PhabricatorOwnersPackageOwnerDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageOwnerDatasource.php', | ||||||
|     'PhabricatorOwnersPackagePHIDType' => 'applications/owners/phid/PhabricatorOwnersPackagePHIDType.php', |     'PhabricatorOwnersPackagePHIDType' => 'applications/owners/phid/PhabricatorOwnersPackagePHIDType.php', | ||||||
|     'PhabricatorOwnersPackageQuery' => 'applications/owners/query/PhabricatorOwnersPackageQuery.php', |     'PhabricatorOwnersPackageQuery' => 'applications/owners/query/PhabricatorOwnersPackageQuery.php', | ||||||
| @@ -3047,6 +3051,8 @@ phutil_register_library_map(array( | |||||||
|     'PhabricatorSearchManagementIndexWorkflow' => 'applications/search/management/PhabricatorSearchManagementIndexWorkflow.php', |     'PhabricatorSearchManagementIndexWorkflow' => 'applications/search/management/PhabricatorSearchManagementIndexWorkflow.php', | ||||||
|     'PhabricatorSearchManagementInitWorkflow' => 'applications/search/management/PhabricatorSearchManagementInitWorkflow.php', |     'PhabricatorSearchManagementInitWorkflow' => 'applications/search/management/PhabricatorSearchManagementInitWorkflow.php', | ||||||
|     'PhabricatorSearchManagementWorkflow' => 'applications/search/management/PhabricatorSearchManagementWorkflow.php', |     'PhabricatorSearchManagementWorkflow' => 'applications/search/management/PhabricatorSearchManagementWorkflow.php', | ||||||
|  |     'PhabricatorSearchNgrams' => 'applications/search/ngrams/PhabricatorSearchNgrams.php', | ||||||
|  |     'PhabricatorSearchNgramsDestructionEngineExtension' => 'applications/search/engineextension/PhabricatorSearchNgramsDestructionEngineExtension.php', | ||||||
|     'PhabricatorSearchOrderController' => 'applications/search/controller/PhabricatorSearchOrderController.php', |     'PhabricatorSearchOrderController' => 'applications/search/controller/PhabricatorSearchOrderController.php', | ||||||
|     'PhabricatorSearchOrderField' => 'applications/search/field/PhabricatorSearchOrderField.php', |     'PhabricatorSearchOrderField' => 'applications/search/field/PhabricatorSearchOrderField.php', | ||||||
|     'PhabricatorSearchPreferencesSettingsPanel' => 'applications/settings/panel/PhabricatorSearchPreferencesSettingsPanel.php', |     'PhabricatorSearchPreferencesSettingsPanel' => 'applications/settings/panel/PhabricatorSearchPreferencesSettingsPanel.php', | ||||||
| @@ -6802,6 +6808,7 @@ phutil_register_library_map(array( | |||||||
|     'PhabricatorNamedQueryQuery' => 'PhabricatorCursorPagedPolicyAwareQuery', |     'PhabricatorNamedQueryQuery' => 'PhabricatorCursorPagedPolicyAwareQuery', | ||||||
|     'PhabricatorNavigationRemarkupRule' => 'PhutilRemarkupRule', |     'PhabricatorNavigationRemarkupRule' => 'PhutilRemarkupRule', | ||||||
|     'PhabricatorNeverTriggerClock' => 'PhabricatorTriggerClock', |     'PhabricatorNeverTriggerClock' => 'PhabricatorTriggerClock', | ||||||
|  |     'PhabricatorNgramsIndexEngineExtension' => 'PhabricatorIndexEngineExtension', | ||||||
|     'PhabricatorNotificationBuilder' => 'Phobject', |     'PhabricatorNotificationBuilder' => 'Phobject', | ||||||
|     'PhabricatorNotificationClearController' => 'PhabricatorNotificationController', |     'PhabricatorNotificationClearController' => 'PhabricatorNotificationController', | ||||||
|     'PhabricatorNotificationClient' => 'Phobject', |     'PhabricatorNotificationClient' => 'Phobject', | ||||||
| @@ -6907,10 +6914,14 @@ phutil_register_library_map(array( | |||||||
|       'PhabricatorCustomFieldInterface', |       'PhabricatorCustomFieldInterface', | ||||||
|       'PhabricatorDestructibleInterface', |       'PhabricatorDestructibleInterface', | ||||||
|       'PhabricatorConduitResultInterface', |       'PhabricatorConduitResultInterface', | ||||||
|  |       'PhabricatorFulltextInterface', | ||||||
|  |       'PhabricatorNgramsInterface', | ||||||
|     ), |     ), | ||||||
|     'PhabricatorOwnersPackageDatasource' => 'PhabricatorTypeaheadDatasource', |     'PhabricatorOwnersPackageDatasource' => 'PhabricatorTypeaheadDatasource', | ||||||
|     'PhabricatorOwnersPackageEditEngine' => 'PhabricatorEditEngine', |     'PhabricatorOwnersPackageEditEngine' => 'PhabricatorEditEngine', | ||||||
|  |     'PhabricatorOwnersPackageFulltextEngine' => 'PhabricatorFulltextEngine', | ||||||
|     'PhabricatorOwnersPackageFunctionDatasource' => 'PhabricatorTypeaheadCompositeDatasource', |     'PhabricatorOwnersPackageFunctionDatasource' => 'PhabricatorTypeaheadCompositeDatasource', | ||||||
|  |     'PhabricatorOwnersPackageNameNgrams' => 'PhabricatorSearchNgrams', | ||||||
|     'PhabricatorOwnersPackageOwnerDatasource' => 'PhabricatorTypeaheadCompositeDatasource', |     'PhabricatorOwnersPackageOwnerDatasource' => 'PhabricatorTypeaheadCompositeDatasource', | ||||||
|     'PhabricatorOwnersPackagePHIDType' => 'PhabricatorPHIDType', |     'PhabricatorOwnersPackagePHIDType' => 'PhabricatorPHIDType', | ||||||
|     'PhabricatorOwnersPackageQuery' => 'PhabricatorCursorPagedPolicyAwareQuery', |     'PhabricatorOwnersPackageQuery' => 'PhabricatorCursorPagedPolicyAwareQuery', | ||||||
| @@ -7414,6 +7425,8 @@ phutil_register_library_map(array( | |||||||
|     'PhabricatorSearchManagementIndexWorkflow' => 'PhabricatorSearchManagementWorkflow', |     'PhabricatorSearchManagementIndexWorkflow' => 'PhabricatorSearchManagementWorkflow', | ||||||
|     'PhabricatorSearchManagementInitWorkflow' => 'PhabricatorSearchManagementWorkflow', |     'PhabricatorSearchManagementInitWorkflow' => 'PhabricatorSearchManagementWorkflow', | ||||||
|     'PhabricatorSearchManagementWorkflow' => 'PhabricatorManagementWorkflow', |     'PhabricatorSearchManagementWorkflow' => 'PhabricatorManagementWorkflow', | ||||||
|  |     'PhabricatorSearchNgrams' => 'PhabricatorSearchDAO', | ||||||
|  |     'PhabricatorSearchNgramsDestructionEngineExtension' => 'PhabricatorDestructionEngineExtension', | ||||||
|     'PhabricatorSearchOrderController' => 'PhabricatorSearchBaseController', |     'PhabricatorSearchOrderController' => 'PhabricatorSearchBaseController', | ||||||
|     'PhabricatorSearchOrderField' => 'PhabricatorSearchField', |     'PhabricatorSearchOrderField' => 'PhabricatorSearchField', | ||||||
|     'PhabricatorSearchPreferencesSettingsPanel' => 'PhabricatorSettingsPanel', |     'PhabricatorSearchPreferencesSettingsPanel' => 'PhabricatorSettingsPanel', | ||||||
|   | |||||||
| @@ -201,7 +201,8 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject { | |||||||
|  |  | ||||||
|     $is_binary = ($this->getUTF8Charset() == 'binary'); |     $is_binary = ($this->getUTF8Charset() == 'binary'); | ||||||
|     $matches = null; |     $matches = null; | ||||||
|     if (preg_match('/^(fulltext|sort|text)(\d+)?\z/', $data_type, $matches)) { |     $pattern = '/^(fulltext|sort|text|char)(\d+)?\z/'; | ||||||
|  |     if (preg_match($pattern, $data_type, $matches)) { | ||||||
|  |  | ||||||
|       // Limit the permitted column lengths under the theory that it would |       // Limit the permitted column lengths under the theory that it would | ||||||
|       // be nice to eventually reduce this to a small set of standard lengths. |       // be nice to eventually reduce this to a small set of standard lengths. | ||||||
| @@ -220,6 +221,7 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject { | |||||||
|         'text8' => true, |         'text8' => true, | ||||||
|         'text4' => true, |         'text4' => true, | ||||||
|         'text' => true, |         'text' => true, | ||||||
|  |         'char3' => true, | ||||||
|         'sort255' => true, |         'sort255' => true, | ||||||
|         'sort128' => true, |         'sort128' => true, | ||||||
|         'sort64' => true, |         'sort64' => true, | ||||||
| @@ -266,10 +268,14 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject { | |||||||
|           // the majority of cases. |           // the majority of cases. | ||||||
|           $column_type = 'longtext'; |           $column_type = 'longtext'; | ||||||
|           break; |           break; | ||||||
|  |         case 'char': | ||||||
|  |           $column_type = 'char('.$size.')'; | ||||||
|  |           break; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       switch ($type) { |       switch ($type) { | ||||||
|         case 'text': |         case 'text': | ||||||
|  |         case 'char': | ||||||
|           if ($is_binary) { |           if ($is_binary) { | ||||||
|             // We leave collation and character set unspecified in order to |             // We leave collation and character set unspecified in order to | ||||||
|             // generate valid SQL. |             // generate valid SQL. | ||||||
|   | |||||||
| @@ -334,4 +334,8 @@ final class PhabricatorOwnersPackageTransactionEditor | |||||||
|     return $body; |     return $body; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   protected function supportsSearch() { | ||||||
|  |     return true; | ||||||
|  |   } | ||||||
|  |  | ||||||
| } | } | ||||||
|   | |||||||
| @@ -0,0 +1,26 @@ | |||||||
|  | <?php | ||||||
|  |  | ||||||
|  | final class PhabricatorOwnersPackageFulltextEngine | ||||||
|  |   extends PhabricatorFulltextEngine { | ||||||
|  |  | ||||||
|  |   protected function buildAbstractDocument( | ||||||
|  |     PhabricatorSearchAbstractDocument $document, | ||||||
|  |     $object) { | ||||||
|  |  | ||||||
|  |     $package = $object; | ||||||
|  |     $document->setDocumentTitle($package->getName()); | ||||||
|  |  | ||||||
|  |     // TODO: These are bogus, but not currently stored on packages. | ||||||
|  |     $document->setDocumentCreated(PhabricatorTime::getNow()); | ||||||
|  |     $document->setDocumentModified(PhabricatorTime::getNow()); | ||||||
|  |  | ||||||
|  |     $document->addRelationship( | ||||||
|  |       $package->isArchived() | ||||||
|  |         ? PhabricatorSearchRelationship::RELATIONSHIP_CLOSED | ||||||
|  |         : PhabricatorSearchRelationship::RELATIONSHIP_OPEN, | ||||||
|  |       $package->getPHID(), | ||||||
|  |       PhabricatorOwnersPackagePHIDType::TYPECONST, | ||||||
|  |       PhabricatorTime::getNow()); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  | } | ||||||
| @@ -9,7 +9,6 @@ final class PhabricatorOwnersPackageQuery | |||||||
|   private $authorityPHIDs; |   private $authorityPHIDs; | ||||||
|   private $repositoryPHIDs; |   private $repositoryPHIDs; | ||||||
|   private $paths; |   private $paths; | ||||||
|   private $namePrefix; |  | ||||||
|   private $statuses; |   private $statuses; | ||||||
|  |  | ||||||
|   private $controlMap = array(); |   private $controlMap = array(); | ||||||
| @@ -78,9 +77,10 @@ final class PhabricatorOwnersPackageQuery | |||||||
|     return $this; |     return $this; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   public function withNamePrefix($prefix) { |   public function withNameNgrams($ngrams) { | ||||||
|     $this->namePrefix = $prefix; |     return $this->withNgramsConstraint( | ||||||
|     return $this; |       new PhabricatorOwnersPackageNameNgrams(), | ||||||
|  |       $ngrams); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   public function needPaths($need_paths) { |   public function needPaths($need_paths) { | ||||||
| @@ -208,15 +208,6 @@ final class PhabricatorOwnersPackageQuery | |||||||
|         $this->statuses); |         $this->statuses); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (strlen($this->namePrefix)) { |  | ||||||
|       // NOTE: This is a hacky mess, but this column is currently case |  | ||||||
|       // sensitive and unique. |  | ||||||
|       $where[] = qsprintf( |  | ||||||
|         $conn, |  | ||||||
|         'LOWER(p.name) LIKE %>', |  | ||||||
|         phutil_utf8_strtolower($this->namePrefix)); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if ($this->controlMap) { |     if ($this->controlMap) { | ||||||
|       $clauses = array(); |       $clauses = array(); | ||||||
|       foreach ($this->controlMap as $repository_phid => $paths) { |       foreach ($this->controlMap as $repository_phid => $paths) { | ||||||
|   | |||||||
| @@ -25,6 +25,10 @@ final class PhabricatorOwnersPackageSearchEngine | |||||||
|         ->setDescription( |         ->setDescription( | ||||||
|           pht('Search for packages with specific owners.')) |           pht('Search for packages with specific owners.')) | ||||||
|         ->setDatasource(new PhabricatorProjectOrUserDatasource()), |         ->setDatasource(new PhabricatorProjectOrUserDatasource()), | ||||||
|  |       id(new PhabricatorSearchTextField()) | ||||||
|  |         ->setLabel(pht('Name Contains')) | ||||||
|  |         ->setKey('name') | ||||||
|  |         ->setDescription(pht('Search for packages by name substrings.')), | ||||||
|       id(new PhabricatorSearchDatasourceField()) |       id(new PhabricatorSearchDatasourceField()) | ||||||
|         ->setLabel(pht('Repositories')) |         ->setLabel(pht('Repositories')) | ||||||
|         ->setKey('repositoryPHIDs') |         ->setKey('repositoryPHIDs') | ||||||
| @@ -69,6 +73,10 @@ final class PhabricatorOwnersPackageSearchEngine | |||||||
|       $query->withStatuses($map['statuses']); |       $query->withStatuses($map['statuses']); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     if (strlen($map['name'])) { | ||||||
|  |       $query->withNameNgrams($map['name']); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     return $query; |     return $query; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -7,7 +7,9 @@ final class PhabricatorOwnersPackage | |||||||
|     PhabricatorApplicationTransactionInterface, |     PhabricatorApplicationTransactionInterface, | ||||||
|     PhabricatorCustomFieldInterface, |     PhabricatorCustomFieldInterface, | ||||||
|     PhabricatorDestructibleInterface, |     PhabricatorDestructibleInterface, | ||||||
|     PhabricatorConduitResultInterface { |     PhabricatorConduitResultInterface, | ||||||
|  |     PhabricatorFulltextInterface, | ||||||
|  |     PhabricatorNgramsInterface { | ||||||
|  |  | ||||||
|   protected $name; |   protected $name; | ||||||
|   protected $originalName; |   protected $originalName; | ||||||
| @@ -46,7 +48,7 @@ final class PhabricatorOwnersPackage | |||||||
|       self::CONFIG_TIMESTAMPS => false, |       self::CONFIG_TIMESTAMPS => false, | ||||||
|       self::CONFIG_AUX_PHID => true, |       self::CONFIG_AUX_PHID => true, | ||||||
|       self::CONFIG_COLUMN_SCHEMA => array( |       self::CONFIG_COLUMN_SCHEMA => array( | ||||||
|         'name' => 'text128', |         'name' => 'sort128', | ||||||
|         'originalName' => 'text255', |         'originalName' => 'text255', | ||||||
|         'description' => 'text', |         'description' => 'text', | ||||||
|         'primaryOwnerPHID' => 'phid?', |         'primaryOwnerPHID' => 'phid?', | ||||||
| @@ -54,17 +56,6 @@ final class PhabricatorOwnersPackage | |||||||
|         'mailKey' => 'bytes20', |         'mailKey' => 'bytes20', | ||||||
|         'status' => 'text32', |         'status' => 'text32', | ||||||
|       ), |       ), | ||||||
|       self::CONFIG_KEY_SCHEMA => array( |  | ||||||
|         'key_phid' => null, |  | ||||||
|         'phid' => array( |  | ||||||
|           'columns' => array('phid'), |  | ||||||
|           'unique' => true, |  | ||||||
|         ), |  | ||||||
|         'name' => array( |  | ||||||
|           'columns' => array('name'), |  | ||||||
|           'unique' => true, |  | ||||||
|         ), |  | ||||||
|       ), |  | ||||||
|     ) + parent::getConfiguration(); |     ) + parent::getConfiguration(); | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -433,4 +424,23 @@ final class PhabricatorOwnersPackage | |||||||
|     ); |     ); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /* -(  PhabricatorFulltextInterface  )--------------------------------------- */ | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   public function newFulltextEngine() { | ||||||
|  |     return new PhabricatorOwnersPackageFulltextEngine(); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /* -(  PhabricatorNgramInterface  )------------------------------------------ */ | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   public function newNgrams() { | ||||||
|  |     return array( | ||||||
|  |       id(new PhabricatorOwnersPackageNameNgrams()) | ||||||
|  |         ->setValue($this->getName()), | ||||||
|  |     ); | ||||||
|  |   } | ||||||
|  |  | ||||||
| } | } | ||||||
|   | |||||||
| @@ -0,0 +1,18 @@ | |||||||
|  | <?php | ||||||
|  |  | ||||||
|  | final class PhabricatorOwnersPackageNameNgrams | ||||||
|  |   extends PhabricatorSearchNgrams { | ||||||
|  |  | ||||||
|  |   public function getNgramKey() { | ||||||
|  |     return 'name'; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   public function getColumnName() { | ||||||
|  |     return 'name'; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   public function getApplicationName() { | ||||||
|  |     return 'owners'; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  | } | ||||||
| @@ -27,6 +27,14 @@ final class PhabricatorOwnersPackageTransaction | |||||||
|  |  | ||||||
|     switch ($this->getTransactionType()) { |     switch ($this->getTransactionType()) { | ||||||
|       case self::TYPE_OWNERS: |       case self::TYPE_OWNERS: | ||||||
|  |         if (!is_array($old)) { | ||||||
|  |           $old = array(); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if (!is_array($new)) { | ||||||
|  |           $new = array(); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         $add = array_diff($new, $old); |         $add = array_diff($new, $old); | ||||||
|         foreach ($add as $phid) { |         foreach ($add as $phid) { | ||||||
|           $phids[] = $phid; |           $phids[] = $phid; | ||||||
|   | |||||||
| @@ -22,7 +22,7 @@ final class PhabricatorOwnersPackageDatasource | |||||||
|     $results = array(); |     $results = array(); | ||||||
|  |  | ||||||
|     $query = id(new PhabricatorOwnersPackageQuery()) |     $query = id(new PhabricatorOwnersPackageQuery()) | ||||||
|       ->withNamePrefix($raw_query) |       ->withNameNgrams($raw_query) | ||||||
|       ->setOrder('name'); |       ->setOrder('name'); | ||||||
|  |  | ||||||
|     $packages = $this->executeQuery($query); |     $packages = $this->executeQuery($query); | ||||||
|   | |||||||
| @@ -65,6 +65,9 @@ final class PhabricatorFulltextIndexEngineExtension | |||||||
|  |  | ||||||
|     try { |     try { | ||||||
|       $comment = $xaction->getApplicationTransactionCommentObject(); |       $comment = $xaction->getApplicationTransactionCommentObject(); | ||||||
|  |       if (!$comment) { | ||||||
|  |         return 'none'; | ||||||
|  |       } | ||||||
|     } catch (Exception $ex) { |     } catch (Exception $ex) { | ||||||
|       return 'none'; |       return 'none'; | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -0,0 +1,34 @@ | |||||||
|  | <?php | ||||||
|  |  | ||||||
|  | final class PhabricatorNgramsIndexEngineExtension | ||||||
|  |   extends PhabricatorIndexEngineExtension { | ||||||
|  |  | ||||||
|  |   const EXTENSIONKEY = 'ngrams'; | ||||||
|  |  | ||||||
|  |   public function getExtensionName() { | ||||||
|  |     return pht('Ngrams Engine'); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   public function getIndexVersion($object) { | ||||||
|  |     $ngrams = $object->newNgrams(); | ||||||
|  |     $map = mpull($ngrams, 'getValue', 'getNgramKey'); | ||||||
|  |     ksort($map); | ||||||
|  |     $serialized = serialize($map); | ||||||
|  |  | ||||||
|  |     return PhabricatorHash::digestForIndex($serialized); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   public function shouldIndexObject($object) { | ||||||
|  |     return ($object instanceof PhabricatorNgramsInterface); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   public function indexObject( | ||||||
|  |     PhabricatorIndexEngine $engine, | ||||||
|  |     $object) { | ||||||
|  |  | ||||||
|  |     foreach ($object->newNgrams() as $ngram) { | ||||||
|  |       $ngram->writeNgram($object->getID()); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  | } | ||||||
| @@ -0,0 +1,31 @@ | |||||||
|  | <?php | ||||||
|  |  | ||||||
|  | final class PhabricatorSearchNgramsDestructionEngineExtension | ||||||
|  |   extends PhabricatorDestructionEngineExtension { | ||||||
|  |  | ||||||
|  |   const EXTENSIONKEY = 'search.ngrams'; | ||||||
|  |  | ||||||
|  |   public function getExtensionName() { | ||||||
|  |     return pht('Search Ngram'); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   public function canDestroyObject( | ||||||
|  |     PhabricatorDestructionEngine $engine, | ||||||
|  |     $object) { | ||||||
|  |     return ($object instanceof PhabricatorNgramsInterface); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   public function destroyObject( | ||||||
|  |     PhabricatorDestructionEngine $engine, | ||||||
|  |     $object) { | ||||||
|  |  | ||||||
|  |     foreach ($object->newNgrams() as $ngram) { | ||||||
|  |       queryfx( | ||||||
|  |         $ngram->establishConnection('w'), | ||||||
|  |         'DELETE FROM %T WHERE objectID = %d', | ||||||
|  |         $ngram->getTableName(), | ||||||
|  |         $object->getID()); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  | } | ||||||
| @@ -0,0 +1,7 @@ | |||||||
|  | <?php | ||||||
|  |  | ||||||
|  | interface PhabricatorNgramsInterface { | ||||||
|  |  | ||||||
|  |   public function newNgrams(); | ||||||
|  |  | ||||||
|  | } | ||||||
							
								
								
									
										113
									
								
								src/applications/search/ngrams/PhabricatorSearchNgrams.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										113
									
								
								src/applications/search/ngrams/PhabricatorSearchNgrams.php
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,113 @@ | |||||||
|  | <?php | ||||||
|  |  | ||||||
|  | abstract class PhabricatorSearchNgrams | ||||||
|  |   extends PhabricatorSearchDAO { | ||||||
|  |  | ||||||
|  |   protected $objectID; | ||||||
|  |   protected $ngram; | ||||||
|  |  | ||||||
|  |   private $value; | ||||||
|  |  | ||||||
|  |   abstract public function getNgramKey(); | ||||||
|  |   abstract public function getColumnName(); | ||||||
|  |  | ||||||
|  |   final public function setValue($value) { | ||||||
|  |     $this->value = $value; | ||||||
|  |     return $this; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   final public function getValue() { | ||||||
|  |     return $this->value; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   protected function getConfiguration() { | ||||||
|  |     return array( | ||||||
|  |       self::CONFIG_TIMESTAMPS => false, | ||||||
|  |       self::CONFIG_COLUMN_SCHEMA => array( | ||||||
|  |         'objectID' => 'uint32', | ||||||
|  |         'ngram' => 'char3', | ||||||
|  |       ), | ||||||
|  |       self::CONFIG_KEY_SCHEMA => array( | ||||||
|  |         'key_ngram' => array( | ||||||
|  |           'columns' => array('ngram', 'objectID'), | ||||||
|  |         ), | ||||||
|  |         'key_object' => array( | ||||||
|  |           'columns' => array('objectID'), | ||||||
|  |         ), | ||||||
|  |       ), | ||||||
|  |     ) + parent::getConfiguration(); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   public function getTableName() { | ||||||
|  |     $application = $this->getApplicationName(); | ||||||
|  |     $key = $this->getNgramKey(); | ||||||
|  |     return "{$application}_{$key}_ngrams"; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   final public function tokenizeString($value) { | ||||||
|  |     $value = trim($value, ' '); | ||||||
|  |     $value = preg_split('/ +/', $value); | ||||||
|  |     return $value; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   final public function getNgramsFromString($value, $mode) { | ||||||
|  |     $tokens = $this->tokenizeString($value); | ||||||
|  |  | ||||||
|  |     $ngrams = array(); | ||||||
|  |     foreach ($tokens as $token) { | ||||||
|  |       $token = phutil_utf8_strtolower($token); | ||||||
|  |  | ||||||
|  |       switch ($mode) { | ||||||
|  |         case 'query': | ||||||
|  |           break; | ||||||
|  |         case 'index': | ||||||
|  |           $token = ' '.$token.' '; | ||||||
|  |           break; | ||||||
|  |         case 'prefix': | ||||||
|  |           $token = ' '.$token; | ||||||
|  |           break; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       $len = (strlen($token) - 2); | ||||||
|  |       for ($ii = 0; $ii < $len; $ii++) { | ||||||
|  |         $ngram = substr($token, $ii, 3); | ||||||
|  |         $ngrams[$ngram] = $ngram; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     ksort($ngrams); | ||||||
|  |  | ||||||
|  |     return array_keys($ngrams); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   final public function writeNgram($object_id) { | ||||||
|  |     $ngrams = $this->getNgramsFromString($this->getValue(), 'index'); | ||||||
|  |     $conn_w = $this->establishConnection('w'); | ||||||
|  |  | ||||||
|  |     $sql = array(); | ||||||
|  |     foreach ($ngrams as $ngram) { | ||||||
|  |       $sql[] = qsprintf( | ||||||
|  |         $conn_w, | ||||||
|  |         '(%d, %s)', | ||||||
|  |         $object_id, | ||||||
|  |         $ngram); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     queryfx( | ||||||
|  |       $conn_w, | ||||||
|  |       'DELETE FROM %T WHERE objectID = %d', | ||||||
|  |       $this->getTableName(), | ||||||
|  |       $object_id); | ||||||
|  |  | ||||||
|  |     if ($sql) { | ||||||
|  |       queryfx( | ||||||
|  |         $conn_w, | ||||||
|  |         'INSERT INTO %T (objectID, ngram) VALUES %Q', | ||||||
|  |         $this->getTableName(), | ||||||
|  |         implode(', ', $sql)); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return $this; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  | } | ||||||
| @@ -26,6 +26,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery | |||||||
|   private $edgeLogicConstraintsAreValid = false; |   private $edgeLogicConstraintsAreValid = false; | ||||||
|   private $spacePHIDs; |   private $spacePHIDs; | ||||||
|   private $spaceIsArchived; |   private $spaceIsArchived; | ||||||
|  |   private $ngrams = array(); | ||||||
|  |  | ||||||
|   protected function getPageCursors(array $page) { |   protected function getPageCursors(array $page) { | ||||||
|     return array( |     return array( | ||||||
| @@ -253,6 +254,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery | |||||||
|     $joins = array(); |     $joins = array(); | ||||||
|     $joins[] = $this->buildEdgeLogicJoinClause($conn); |     $joins[] = $this->buildEdgeLogicJoinClause($conn); | ||||||
|     $joins[] = $this->buildApplicationSearchJoinClause($conn); |     $joins[] = $this->buildApplicationSearchJoinClause($conn); | ||||||
|  |     $joins[] = $this->buildNgramsJoinClause($conn); | ||||||
|     return $joins; |     return $joins; | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -274,6 +276,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery | |||||||
|     $where[] = $this->buildPagingClause($conn); |     $where[] = $this->buildPagingClause($conn); | ||||||
|     $where[] = $this->buildEdgeLogicWhereClause($conn); |     $where[] = $this->buildEdgeLogicWhereClause($conn); | ||||||
|     $where[] = $this->buildSpacesWhereClause($conn); |     $where[] = $this->buildSpacesWhereClause($conn); | ||||||
|  |     $where[] = $this->buildNgramsWhereClause($conn); | ||||||
|     return $where; |     return $where; | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -324,6 +327,10 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery | |||||||
|       return true; |       return true; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     if ($this->shouldGroupNgramResultRows()) { | ||||||
|  |       return true; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     return false; |     return false; | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -1345,6 +1352,138 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery | |||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /* -(  Ngrams  )------------------------------------------------------------- */ | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   protected function withNgramsConstraint( | ||||||
|  |     PhabricatorSearchNgrams $index, | ||||||
|  |     $value) { | ||||||
|  |  | ||||||
|  |     if (strlen($value)) { | ||||||
|  |       $this->ngrams[] = array( | ||||||
|  |         'index' => $index, | ||||||
|  |         'value' => $value, | ||||||
|  |         'length' => count(phutil_utf8v($value)), | ||||||
|  |       ); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return $this; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   protected function buildNgramsJoinClause(AphrontDatabaseConnection $conn) { | ||||||
|  |     $flat = array(); | ||||||
|  |     foreach ($this->ngrams as $spec) { | ||||||
|  |       $index = $spec['index']; | ||||||
|  |       $value = $spec['value']; | ||||||
|  |       $length = $spec['length']; | ||||||
|  |  | ||||||
|  |       if ($length >= 3) { | ||||||
|  |         $ngrams = $index->getNgramsFromString($value, 'query'); | ||||||
|  |         $prefix = false; | ||||||
|  |       } else if ($length == 2) { | ||||||
|  |         $ngrams = $index->getNgramsFromString($value, 'prefix'); | ||||||
|  |         $prefix = false; | ||||||
|  |       } else { | ||||||
|  |         $ngrams = array(' '.$value); | ||||||
|  |         $prefix = true; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       foreach ($ngrams as $ngram) { | ||||||
|  |         $flat[] = array( | ||||||
|  |           'table' => $index->getTableName(), | ||||||
|  |           'ngram' => $ngram, | ||||||
|  |           'prefix' => $prefix, | ||||||
|  |         ); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // MySQL only allows us to join a maximum of 61 tables per query. Each | ||||||
|  |     // ngram is going to cost us a join toward that limit, so if the user | ||||||
|  |     // specified a very long query string, just pick 16 of the ngrams | ||||||
|  |     // at random. | ||||||
|  |     if (count($flat) > 16) { | ||||||
|  |       shuffle($flat); | ||||||
|  |       $flat = array_slice($flat, 0, 16); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     $alias = $this->getPrimaryTableAlias(); | ||||||
|  |     if ($alias) { | ||||||
|  |       $id_column = qsprintf($conn, '%T.%T', $alias, 'id'); | ||||||
|  |     } else { | ||||||
|  |       $id_column = qsprintf($conn, '%T', 'id'); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     $idx = 1; | ||||||
|  |     $joins = array(); | ||||||
|  |     foreach ($flat as $spec) { | ||||||
|  |       $table = $spec['table']; | ||||||
|  |       $ngram = $spec['ngram']; | ||||||
|  |       $prefix = $spec['prefix']; | ||||||
|  |  | ||||||
|  |       $alias = 'ngm'.$idx++; | ||||||
|  |  | ||||||
|  |       if ($prefix) { | ||||||
|  |         $joins[] = qsprintf( | ||||||
|  |           $conn, | ||||||
|  |           'JOIN %T %T ON %T.objectID = %Q AND %T.ngram LIKE %>', | ||||||
|  |           $table, | ||||||
|  |           $alias, | ||||||
|  |           $alias, | ||||||
|  |           $id_column, | ||||||
|  |           $alias, | ||||||
|  |           $ngram); | ||||||
|  |       } else { | ||||||
|  |         $joins[] = qsprintf( | ||||||
|  |           $conn, | ||||||
|  |           'JOIN %T %T ON %T.objectID = %Q AND %T.ngram = %s', | ||||||
|  |           $table, | ||||||
|  |           $alias, | ||||||
|  |           $alias, | ||||||
|  |           $id_column, | ||||||
|  |           $alias, | ||||||
|  |           $ngram); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return $joins; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   protected function buildNgramsWhereClause(AphrontDatabaseConnection $conn) { | ||||||
|  |     $where = array(); | ||||||
|  |  | ||||||
|  |     foreach ($this->ngrams as $ngram) { | ||||||
|  |       $index = $ngram['index']; | ||||||
|  |       $value = $ngram['value']; | ||||||
|  |  | ||||||
|  |       $column = $index->getColumnName(); | ||||||
|  |       $alias = $this->getPrimaryTableAlias(); | ||||||
|  |       if ($alias) { | ||||||
|  |         $column = qsprintf($conn, '%T.%T', $alias, $column); | ||||||
|  |       } else { | ||||||
|  |         $column = qsprintf($conn, '%T', $column); | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       $tokens = $index->tokenizeString($value); | ||||||
|  |       foreach ($tokens as $token) { | ||||||
|  |         $where[] = qsprintf( | ||||||
|  |           $conn, | ||||||
|  |           '%Q LIKE %~', | ||||||
|  |           $column, | ||||||
|  |           $token); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return $where; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   protected function shouldGroupNgramResultRows() { | ||||||
|  |     return (bool)$this->ngrams; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
| /* -(  Edge Logic  )--------------------------------------------------------- */ | /* -(  Edge Logic  )--------------------------------------------------------- */ | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 epriestley
					epriestley