Build a Conpherence thread index
Summary: Ref T3165. Builds a dedicated index for Conpherence to avoid scale/policy filtering concerns. - This is pretty one-off but I think it's generally OK. - There's no UI for it. - `ConpherenceFulltextQuery` is very low-level. You would need to do another query on the PHIDs it returns to actually show anything to the user. - The `previousTransactionPHID` is so you can load chat context efficiently. Specifically, if you want to show results like this: > previous line of context > **line of chat that matches the query** > next line of context ...you can read the previous lines out of `previousTransactionPHID` directly, and the next lines by issuing one query with `WHERE previousTransactionPHID IN (...)`. I'm not 100% sure this is useful, but it seemed like a reasonable thing to provide, since there's no way to query this efficiently otherwise and I figure a lot of chat might make way more sense with a couple of lines of context. Test Plan: - Indexed a thread manually (whole thing indexed). - Indexed a thread by updating it (just the new comment indexed). - Wrote a hacky test script and got reasonable-looking query results. Reviewers: btrahan Reviewed By: btrahan Subscribers: epriestley Maniphest Tasks: T3165 Differential Revision: https://secure.phabricator.com/D11234
This commit is contained in:
		
							
								
								
									
										14
									
								
								resources/sql/autopatches/20150105.conpsearch.sql
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								resources/sql/autopatches/20150105.conpsearch.sql
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | ||||
| CREATE TABLE {$NAMESPACE}_conpherence.conpherence_index ( | ||||
|   id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, | ||||
|   threadPHID VARBINARY(64) NOT NULL, | ||||
|   transactionPHID VARBINARY(64) NOT NULL, | ||||
|   previousTransactionPHID VARBINARY(64), | ||||
|   corpus longtext | ||||
|     CHARACTER SET {$CHARSET_FULLTEXT} | ||||
|     COLLATE {$COLLATE_FULLTEXT} | ||||
|     NOT NULL, | ||||
|   KEY `key_thread` (threadPHID), | ||||
|   UNIQUE KEY `key_transaction` (transactionPHID), | ||||
|   UNIQUE KEY `key_previous` (previousTransactionPHID), | ||||
|   FULLTEXT KEY `key_corpus` (corpus) | ||||
| ) ENGINE=MyISAM DEFAULT CHARSET={$CHARSET} COLLATE={$COLLATE_TEXT}; | ||||
| @@ -230,7 +230,9 @@ phutil_register_library_map(array( | ||||
|     'ConpherenceDAO' => 'applications/conpherence/storage/ConpherenceDAO.php', | ||||
|     'ConpherenceEditor' => 'applications/conpherence/editor/ConpherenceEditor.php', | ||||
|     'ConpherenceFileWidgetView' => 'applications/conpherence/view/ConpherenceFileWidgetView.php', | ||||
|     'ConpherenceFulltextQuery' => 'applications/conpherence/query/ConpherenceFulltextQuery.php', | ||||
|     'ConpherenceHovercardEventListener' => 'applications/conpherence/events/ConpherenceHovercardEventListener.php', | ||||
|     'ConpherenceIndex' => 'applications/conpherence/storage/ConpherenceIndex.php', | ||||
|     'ConpherenceLayoutView' => 'applications/conpherence/view/ConpherenceLayoutView.php', | ||||
|     'ConpherenceListController' => 'applications/conpherence/controller/ConpherenceListController.php', | ||||
|     'ConpherenceMenuItemView' => 'applications/conpherence/view/ConpherenceMenuItemView.php', | ||||
| @@ -247,6 +249,7 @@ phutil_register_library_map(array( | ||||
|     'ConpherenceSchemaSpec' => 'applications/conpherence/storage/ConpherenceSchemaSpec.php', | ||||
|     'ConpherenceSettings' => 'applications/conpherence/constants/ConpherenceSettings.php', | ||||
|     'ConpherenceThread' => 'applications/conpherence/storage/ConpherenceThread.php', | ||||
|     'ConpherenceThreadIndexer' => 'applications/conpherence/search/ConpherenceThreadIndexer.php', | ||||
|     'ConpherenceThreadListView' => 'applications/conpherence/view/ConpherenceThreadListView.php', | ||||
|     'ConpherenceThreadMailReceiver' => 'applications/conpherence/mail/ConpherenceThreadMailReceiver.php', | ||||
|     'ConpherenceThreadQuery' => 'applications/conpherence/query/ConpherenceThreadQuery.php', | ||||
| @@ -3290,7 +3293,9 @@ phutil_register_library_map(array( | ||||
|     'ConpherenceDAO' => 'PhabricatorLiskDAO', | ||||
|     'ConpherenceEditor' => 'PhabricatorApplicationTransactionEditor', | ||||
|     'ConpherenceFileWidgetView' => 'ConpherenceWidgetView', | ||||
|     'ConpherenceFulltextQuery' => 'PhabricatorOffsetPagedQuery', | ||||
|     'ConpherenceHovercardEventListener' => 'PhabricatorEventListener', | ||||
|     'ConpherenceIndex' => 'ConpherenceDAO', | ||||
|     'ConpherenceLayoutView' => 'AphrontView', | ||||
|     'ConpherenceListController' => 'ConpherenceController', | ||||
|     'ConpherenceMenuItemView' => 'AphrontTagView', | ||||
| @@ -3310,6 +3315,7 @@ phutil_register_library_map(array( | ||||
|       'ConpherenceDAO', | ||||
|       'PhabricatorPolicyInterface', | ||||
|     ), | ||||
|     'ConpherenceThreadIndexer' => 'PhabricatorSearchDocumentIndexer', | ||||
|     'ConpherenceThreadListView' => 'AphrontView', | ||||
|     'ConpherenceThreadMailReceiver' => 'PhabricatorObjectMailReceiver', | ||||
|     'ConpherenceThreadQuery' => 'PhabricatorCursorPagedPolicyAwareQuery', | ||||
| @@ -5603,6 +5609,7 @@ phutil_register_library_map(array( | ||||
|     'PhabricatorSearchDeleteController' => 'PhabricatorSearchBaseController', | ||||
|     'PhabricatorSearchDocument' => 'PhabricatorSearchDAO', | ||||
|     'PhabricatorSearchDocumentField' => 'PhabricatorSearchDAO', | ||||
|     'PhabricatorSearchDocumentIndexer' => 'Phobject', | ||||
|     'PhabricatorSearchDocumentQuery' => 'PhabricatorCursorPagedPolicyAwareQuery', | ||||
|     'PhabricatorSearchDocumentRelationship' => 'PhabricatorSearchDAO', | ||||
|     'PhabricatorSearchEditController' => 'PhabricatorSearchBaseController', | ||||
|   | ||||
| @@ -459,7 +459,23 @@ final class ConpherenceEditor extends PhabricatorApplicationTransactionEditor { | ||||
|   } | ||||
|  | ||||
|   protected function supportsSearch() { | ||||
|     return false; | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   protected function getSearchContextParameter( | ||||
|     PhabricatorLiskDAO $object, | ||||
|     array $xactions) { | ||||
|  | ||||
|     $comment_phids = array(); | ||||
|     foreach ($xactions as $xaction) { | ||||
|       if ($xaction->hasComment()) { | ||||
|         $comment_phids[] = $xaction->getPHID(); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     return array( | ||||
|       'commentPHIDs' => $comment_phids, | ||||
|     ); | ||||
|   } | ||||
|  | ||||
| } | ||||
|   | ||||
| @@ -0,0 +1,68 @@ | ||||
| <?php | ||||
|  | ||||
| final class ConpherenceFulltextQuery | ||||
|   extends PhabricatorOffsetPagedQuery { | ||||
|  | ||||
|   private $threadPHIDs; | ||||
|   private $fulltext; | ||||
|  | ||||
|   public function withThreadPHIDs(array $phids) { | ||||
|     $this->threadPHIDs = $phids; | ||||
|     return $this; | ||||
|   } | ||||
|  | ||||
|   public function withFulltext($fulltext) { | ||||
|     $this->fulltext = $fulltext; | ||||
|     return $this; | ||||
|   } | ||||
|  | ||||
|   public function execute() { | ||||
|     $table = new ConpherenceIndex(); | ||||
|     $conn_r = $table->establishConnection('r'); | ||||
|  | ||||
|     $rows = queryfx_all( | ||||
|       $conn_r, | ||||
|       'SELECT threadPHID, transactionPHID, previousTransactionPHID | ||||
|         FROM %T i %Q %Q %Q', | ||||
|       $table->getTableName(), | ||||
|       $this->buildWhereClause($conn_r), | ||||
|       $this->buildOrderByClause($conn_r), | ||||
|       $this->buildLimitClause($conn_r)); | ||||
|  | ||||
|     return $rows; | ||||
|   } | ||||
|  | ||||
|   private function buildWhereClause($conn_r) { | ||||
|     $where = array(); | ||||
|  | ||||
|     if ($this->threadPHIDs !== null) { | ||||
|       $where[] = qsprintf( | ||||
|         $conn_r, | ||||
|         'i.threadPHID IN (%Ls)', | ||||
|         $this->threadPHIDs); | ||||
|     } | ||||
|  | ||||
|     if (strlen($this->fulltext)) { | ||||
|       $where[] = qsprintf( | ||||
|         $conn_r, | ||||
|         'MATCH(i.corpus) AGAINST (%s IN BOOLEAN MODE)', | ||||
|         $this->fulltext); | ||||
|     } | ||||
|  | ||||
|     return $this->formatWhereClause($where); | ||||
|   } | ||||
|  | ||||
|   private function buildOrderByClause(AphrontDatabaseConnection $conn_r) { | ||||
|     if (strlen($this->fulltext)) { | ||||
|       return qsprintf( | ||||
|         $conn_r, | ||||
|         'ORDER BY MATCH(i.corpus) AGAINST (%s IN BOOLEAN MODE) DESC', | ||||
|         $this->fulltext); | ||||
|     } else { | ||||
|       return qsprintf( | ||||
|         $conn_r, | ||||
|         'ORDER BY id DESC'); | ||||
|     } | ||||
|   } | ||||
|  | ||||
| } | ||||
| @@ -0,0 +1,89 @@ | ||||
| <?php | ||||
|  | ||||
| final class ConpherenceThreadIndexer | ||||
|   extends PhabricatorSearchDocumentIndexer { | ||||
|  | ||||
|   public function getIndexableObject() { | ||||
|     return new ConpherenceThread(); | ||||
|   } | ||||
|  | ||||
|   protected function loadDocumentByPHID($phid) { | ||||
|     $object = id(new ConpherenceThreadQuery()) | ||||
|       ->setViewer($this->getViewer()) | ||||
|       ->withPHIDs(array($phid)) | ||||
|       ->executeOne(); | ||||
|  | ||||
|     if (!$object) { | ||||
|       throw new Exception(pht('No thread "%s" exists!', $phid)); | ||||
|     } | ||||
|  | ||||
|     return $object; | ||||
|   } | ||||
|  | ||||
|   protected function buildAbstractDocumentByPHID($phid) { | ||||
|     $thread = $this->loadDocumentByPHID($phid); | ||||
|  | ||||
|     // NOTE: We're explicitly not building a document here, only rebuilding | ||||
|     // the Conpherence search index. | ||||
|  | ||||
|     $context = nonempty($this->getContext(), array()); | ||||
|     $comment_phids = idx($context, 'commentPHIDs'); | ||||
|  | ||||
|     if (is_array($comment_phids) && !$comment_phids) { | ||||
|       // If this property is set, but empty, the transaction did not | ||||
|       // include any chat text. For example, a user might have left the | ||||
|       // conversation. | ||||
|       return null; | ||||
|     } | ||||
|  | ||||
|     $query = id(new ConpherenceTransactionQuery()) | ||||
|       ->setViewer($this->getViewer()) | ||||
|       ->withObjectPHIDs(array($thread->getPHID())) | ||||
|       ->withTransactionTypes(array(PhabricatorTransactions::TYPE_COMMENT)) | ||||
|       ->needComments(true); | ||||
|  | ||||
|     if ($comment_phids !== null) { | ||||
|       $query->withPHIDs($comment_phids); | ||||
|     } | ||||
|  | ||||
|     $xactions = $query->execute(); | ||||
|  | ||||
|     foreach ($xactions as $xaction) { | ||||
|       $this->indexComment($thread, $xaction); | ||||
|     } | ||||
|  | ||||
|     return null; | ||||
|   } | ||||
|  | ||||
|   private function indexComment( | ||||
|     ConpherenceThread $thread, | ||||
|     ConpherenceTransaction $xaction) { | ||||
|  | ||||
|     $previous = id(new ConpherenceTransactionQuery()) | ||||
|       ->setViewer($this->getViewer()) | ||||
|       ->withObjectPHIDs(array($thread->getPHID())) | ||||
|       ->withTransactionTypes(array(PhabricatorTransactions::TYPE_COMMENT)) | ||||
|       ->setAfterID($xaction->getID()) | ||||
|       ->setLimit(1) | ||||
|       ->executeOne(); | ||||
|  | ||||
|     $index = id(new ConpherenceIndex()) | ||||
|       ->setThreadPHID($thread->getPHID()) | ||||
|       ->setTransactionPHID($xaction->getPHID()) | ||||
|       ->setPreviousTransactionPHID($previous ? $previous->getPHID() : null) | ||||
|       ->setCorpus($xaction->getComment()->getContent()); | ||||
|  | ||||
|     queryfx( | ||||
|       $index->establishConnection('w'), | ||||
|       'INSERT INTO %T | ||||
|         (threadPHID, transactionPHID, previousTransactionPHID, corpus) | ||||
|         VALUES (%s, %s, %ns, %s) | ||||
|         ON DUPLICATE KEY UPDATE corpus = VALUES(corpus)', | ||||
|       $index->getTableName(), | ||||
|       $index->getThreadPHID(), | ||||
|       $index->getTransactionPHID(), | ||||
|       $index->getPreviousTransactionPHID(), | ||||
|       $index->getCorpus()); | ||||
|   } | ||||
|  | ||||
| } | ||||
							
								
								
									
										38
									
								
								src/applications/conpherence/storage/ConpherenceIndex.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								src/applications/conpherence/storage/ConpherenceIndex.php
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| <?php | ||||
|  | ||||
| final class ConpherenceIndex | ||||
|   extends ConpherenceDAO { | ||||
|  | ||||
|   protected $threadPHID; | ||||
|   protected $transactionPHID; | ||||
|   protected $previousTransactionPHID; | ||||
|   protected $corpus; | ||||
|  | ||||
|   public function getConfiguration() { | ||||
|     return array( | ||||
|       self::CONFIG_TIMESTAMPS => false, | ||||
|       self::CONFIG_COLUMN_SCHEMA => array( | ||||
|         'previousTransactionPHID' => 'phid?', | ||||
|         'corpus' => 'fulltext', | ||||
|       ), | ||||
|       self::CONFIG_KEY_SCHEMA => array( | ||||
|         'key_thread' => array( | ||||
|           'columns' => array('threadPHID'), | ||||
|         ), | ||||
|         'key_transaction' => array( | ||||
|           'columns' => array('transactionPHID'), | ||||
|           'unique' => true, | ||||
|         ), | ||||
|         'key_previous' => array( | ||||
|           'columns' => array('previousTransactionPHID'), | ||||
|           'unique' => true, | ||||
|         ), | ||||
|         'key_corpus' => array( | ||||
|           'columns' => array('corpus'), | ||||
|           'type' => 'FULLTEXT', | ||||
|         ), | ||||
|       ), | ||||
|     ) + parent::getConfiguration(); | ||||
|   } | ||||
|  | ||||
| } | ||||
| @@ -1,6 +1,17 @@ | ||||
| <?php | ||||
|  | ||||
| abstract class PhabricatorSearchDocumentIndexer { | ||||
| abstract class PhabricatorSearchDocumentIndexer extends Phobject { | ||||
|  | ||||
|   private $context; | ||||
|  | ||||
|   protected function setContext($context) { | ||||
|     $this->context = $context; | ||||
|     return $this; | ||||
|   } | ||||
|  | ||||
|   protected function getContext() { | ||||
|     return $this->context; | ||||
|   } | ||||
|  | ||||
|   abstract public function getIndexableObject(); | ||||
|   abstract protected function buildAbstractDocumentByPHID($phid); | ||||
| @@ -30,9 +41,15 @@ abstract class PhabricatorSearchDocumentIndexer { | ||||
|     return $object; | ||||
|   } | ||||
|  | ||||
|   public function indexDocumentByPHID($phid) { | ||||
|   public function indexDocumentByPHID($phid, $context) { | ||||
|     try { | ||||
|       $this->setContext($context); | ||||
|  | ||||
|       $document = $this->buildAbstractDocumentByPHID($phid); | ||||
|       if ($document === null) { | ||||
|         // This indexer doesn't build a document index, so we're done. | ||||
|         return $this; | ||||
|       } | ||||
|  | ||||
|       $object = $this->loadDocumentByPHID($phid); | ||||
|  | ||||
|   | ||||
| @@ -2,25 +2,26 @@ | ||||
|  | ||||
| final class PhabricatorSearchIndexer { | ||||
|  | ||||
|   public function queueDocumentForIndexing($phid) { | ||||
|   public function queueDocumentForIndexing($phid, $context = null) { | ||||
|     PhabricatorWorker::scheduleTask( | ||||
|       'PhabricatorSearchWorker', | ||||
|       array( | ||||
|         'documentPHID' => $phid, | ||||
|         'context' => $context, | ||||
|       ), | ||||
|       array( | ||||
|         'priority' => PhabricatorWorker::PRIORITY_IMPORT, | ||||
|       )); | ||||
|   } | ||||
|  | ||||
|   public function indexDocumentByPHID($phid) { | ||||
|   public function indexDocumentByPHID($phid, $context) { | ||||
|     $indexers = id(new PhutilSymbolLoader()) | ||||
|       ->setAncestorClass('PhabricatorSearchDocumentIndexer') | ||||
|       ->loadObjects(); | ||||
|  | ||||
|     foreach ($indexers as $indexer) { | ||||
|       if ($indexer->shouldIndexDocumentByPHID($phid)) { | ||||
|         $indexer->indexDocumentByPHID($phid); | ||||
|         $indexer->indexDocumentByPHID($phid, $context); | ||||
|         break; | ||||
|       } | ||||
|     } | ||||
|   | ||||
| @@ -114,16 +114,9 @@ final class PhabricatorSearchManagementIndexWorkflow | ||||
|   } | ||||
|  | ||||
|   private function loadPHIDsByTypes($type) { | ||||
|     $indexer_symbols = id(new PhutilSymbolLoader()) | ||||
|       ->setAncestorClass('PhabricatorSearchDocumentIndexer') | ||||
|       ->setConcreteOnly(true) | ||||
|       ->setType('class') | ||||
|       ->selectAndLoadSymbols(); | ||||
|  | ||||
|     $indexers = array(); | ||||
|     foreach ($indexer_symbols as $symbol) { | ||||
|       $indexers[] = newv($symbol['name'], array()); | ||||
|     } | ||||
|     $indexers = id(new PhutilSymbolLoader()) | ||||
|       ->setAncestorClass('PhabricatorSearchObjectIndexer') | ||||
|       ->loadObjects(); | ||||
|  | ||||
|     $phids = array(); | ||||
|     foreach ($indexers as $indexer) { | ||||
|   | ||||
| @@ -4,10 +4,12 @@ final class PhabricatorSearchWorker extends PhabricatorWorker { | ||||
|  | ||||
|   public function doWork() { | ||||
|     $data = $this->getTaskData(); | ||||
|  | ||||
|     $phid = idx($data, 'documentPHID'); | ||||
|     $context = idx($data, 'context'); | ||||
|  | ||||
|     id(new PhabricatorSearchIndexer()) | ||||
|       ->indexDocumentByPHID($phid); | ||||
|       ->indexDocumentByPHID($phid, $context); | ||||
|   } | ||||
|  | ||||
| } | ||||
|   | ||||
| @@ -800,7 +800,9 @@ abstract class PhabricatorApplicationTransactionEditor | ||||
|  | ||||
|     if ($this->supportsSearch()) { | ||||
|       id(new PhabricatorSearchIndexer()) | ||||
|         ->queueDocumentForIndexing($object->getPHID()); | ||||
|         ->queueDocumentForIndexing( | ||||
|           $object->getPHID(), | ||||
|           $this->getSearchContextParameter($object, $xactions)); | ||||
|     } | ||||
|  | ||||
|     if ($this->shouldPublishFeedStory($object, $xactions)) { | ||||
| @@ -2355,6 +2357,15 @@ abstract class PhabricatorApplicationTransactionEditor | ||||
|     return false; | ||||
|   } | ||||
|  | ||||
|   /** | ||||
|    * @task search | ||||
|    */ | ||||
|   protected function getSearchContextParameter( | ||||
|     PhabricatorLiskDAO $object, | ||||
|     array $xactions) { | ||||
|     return null; | ||||
|   } | ||||
|  | ||||
|  | ||||
| /* -(  Herald Integration )-------------------------------------------------- */ | ||||
|  | ||||
|   | ||||
| @@ -82,8 +82,8 @@ final class PhabricatorStorageManagementQuickstartWorkflow | ||||
|       '{$NAMESPACE}', | ||||
|       $dump); | ||||
|  | ||||
|     // NOTE: This is a hack. We can not use `binary` for this column, because | ||||
|     // it is part of a fulltext index. | ||||
|     // NOTE: This is a hack. We can not use `binary` for these columns, because | ||||
|     // they are a part of a fulltext index. | ||||
|     $old = $dump; | ||||
|     $dump = preg_replace( | ||||
|       '/`corpus` longtext CHARACTER SET .* COLLATE .*,/mi', | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 epriestley
					epriestley