Elastic: tweaked user indexing

This makes it a bit more "fuzzy", so users are also matched on N-grams
and not just N-grams-from-the-start-of-the-word.
This commit is contained in:
Sybren A. Stüvel 2018-01-12 15:28:33 +01:00
parent 31ca4f3d23
commit b2bd01117e
2 changed files with 20 additions and 8 deletions

View File

@ -24,10 +24,17 @@ edge_ngram_filter = analysis.token_filter(
max_gram=15
)
ngram_filter = analysis.token_filter(
'ngram_filter',
type='ngram',
min_gram=3,
max_gram=3,
)
autocomplete = es.analyzer(
'autocomplete',
tokenizer='standard',
filter=['lowercase', edge_ngram_filter]
filter=['standard', 'asciifolding', 'lowercase', ngram_filter, edge_ngram_filter]
)

View File

@ -118,15 +118,20 @@ def _common_user_search(query: str) -> (typing.List[Query], typing.List[Query]):
if not query:
return [], []
should = [
Q('match', username=query),
Q('match', full_name=query),
Q('match', email=query),
{'term': {'username_exact': {'value': query, 'boost': 50}}},
]
should = []
if '@' in query:
should.append(Q('term', email_exact=query))
should.append({'term': {'email_exact': {'value': query, 'boost': 50}}})
email_boost = 25
else:
email_boost = 1
should.extend([
Q('match', username=query),
Q('match', full_name=query),
{'match': {'email': {'query': query, 'boost': email_boost}}},
{'term': {'username_exact': {'value': query, 'boost': 50}}},
])
return [], should