Identify compound short search tokens in the form "xx.yy" as unqueryable in the search UI
Summary:
Ref T12928. The index doesn't work for these, so show the user that there's a problem and drop the terms.
This doesn't fix the problem, but makes the behavior more clear.
Test Plan:
{F5053703}
{F5053704}
Reviewers: chad
Reviewed By: chad
Maniphest Tasks: T12928
Differential Revision: https://secure.phabricator.com/D18254
This commit is contained in:
@@ -235,7 +235,7 @@ final class PhabricatorMySQLFulltextStorageEngine
|
|||||||
$value = $stemmer->stemToken($value);
|
$value = $stemmer->stemToken($value);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (phutil_utf8_strlen($value) < $min_length) {
|
if ($this->isShortToken($value, $min_length)) {
|
||||||
$fulltext_token->setIsShort(true);
|
$fulltext_token->setIsShort(true);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -549,4 +549,22 @@ final class PhabricatorMySQLFulltextStorageEngine
|
|||||||
return array($min_len, $stopwords);
|
return array($min_len, $stopwords);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function isShortToken($value, $min_length) {
|
||||||
|
// NOTE: The engine tokenizes internally on periods, so terms in the form
|
||||||
|
// "ab.cd", where short substrings are separated by periods, do not produce
|
||||||
|
// any queryable tokens. These terms are meaningful if at least one
|
||||||
|
// substring is longer than the minimum length, like "example.py". See
|
||||||
|
// T12928.
|
||||||
|
|
||||||
|
$parts = preg_split('/[.]+/', $value);
|
||||||
|
|
||||||
|
foreach ($parts as $part) {
|
||||||
|
if (phutil_utf8_strlen($part) >= $min_length) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user