Identify compound short search tokens in the form "xx.yy" as unqueryable in the search UI
Summary:
Ref T12928. The index doesn't work for these, so show the user that there's a problem and drop the terms.
This doesn't fix the problem, but makes the behavior more clear.
Test Plan:
{F5053703}
{F5053704}
Reviewers: chad
Reviewed By: chad
Maniphest Tasks: T12928
Differential Revision: https://secure.phabricator.com/D18254
			
			
This commit is contained in:
		| @@ -235,7 +235,7 @@ final class PhabricatorMySQLFulltextStorageEngine | |||||||
|           $value = $stemmer->stemToken($value); |           $value = $stemmer->stemToken($value); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         if (phutil_utf8_strlen($value) < $min_length) { |         if ($this->isShortToken($value, $min_length)) { | ||||||
|           $fulltext_token->setIsShort(true); |           $fulltext_token->setIsShort(true); | ||||||
|           continue; |           continue; | ||||||
|         } |         } | ||||||
| @@ -549,4 +549,22 @@ final class PhabricatorMySQLFulltextStorageEngine | |||||||
|     return array($min_len, $stopwords); |     return array($min_len, $stopwords); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   private function isShortToken($value, $min_length) { | ||||||
|  |     // NOTE: The engine tokenizes internally on periods, so terms in the form | ||||||
|  |     // "ab.cd", where short substrings are separated by periods, do not produce | ||||||
|  |     // any queryable tokens. These terms are meaningful if at least one | ||||||
|  |     // substring is longer than the minimum length, like "example.py". See | ||||||
|  |     // T12928. | ||||||
|  |  | ||||||
|  |     $parts = preg_split('/[.]+/', $value); | ||||||
|  |  | ||||||
|  |     foreach ($parts as $part) { | ||||||
|  |       if (phutil_utf8_strlen($part) >= $min_length) { | ||||||
|  |         return false; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return true; | ||||||
|  |   } | ||||||
|  |  | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 epriestley
					epriestley