Identify compound short search tokens in the form "xx.yy" as unqueryable in the search UI
Summary:
Ref T12928. The index doesn't work for these, so show the user that there's a problem and drop the terms.
This doesn't fix the problem, but makes the behavior more clear.
Test Plan:
{F5053703}
{F5053704}
Reviewers: chad
Reviewed By: chad
Maniphest Tasks: T12928
Differential Revision: https://secure.phabricator.com/D18254
This commit is contained in:
@@ -235,7 +235,7 @@ final class PhabricatorMySQLFulltextStorageEngine
|
||||
$value = $stemmer->stemToken($value);
|
||||
}
|
||||
|
||||
if (phutil_utf8_strlen($value) < $min_length) {
|
||||
if ($this->isShortToken($value, $min_length)) {
|
||||
$fulltext_token->setIsShort(true);
|
||||
continue;
|
||||
}
|
||||
@@ -549,4 +549,22 @@ final class PhabricatorMySQLFulltextStorageEngine
|
||||
return array($min_len, $stopwords);
|
||||
}
|
||||
|
||||
private function isShortToken($value, $min_length) {
|
||||
// NOTE: The engine tokenizes internally on periods, so terms in the form
|
||||
// "ab.cd", where short substrings are separated by periods, do not produce
|
||||
// any queryable tokens. These terms are meaningful if at least one
|
||||
// substring is longer than the minimum length, like "example.py". See
|
||||
// T12928.
|
||||
|
||||
$parts = preg_split('/[.]+/', $value);
|
||||
|
||||
foreach ($parts as $part) {
|
||||
if (phutil_utf8_strlen($part) >= $min_length) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user