From b2bd01117ecd77f41176c414344ec07bc5667abe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sybren=20A=2E=20St=C3=BCvel?= Date: Fri, 12 Jan 2018 15:28:33 +0100 Subject: [PATCH] Elastic: tweaked user indexing This makes it a bit more "fuzzy", so users are also matched on N-grams and not just N-grams-from-the-start-of-the-word. --- pillar/api/search/documents.py | 9 ++++++++- pillar/api/search/queries.py | 19 ++++++++++++------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/pillar/api/search/documents.py b/pillar/api/search/documents.py index 653b8e95..5ac72c69 100644 --- a/pillar/api/search/documents.py +++ b/pillar/api/search/documents.py @@ -24,10 +24,17 @@ edge_ngram_filter = analysis.token_filter( max_gram=15 ) +ngram_filter = analysis.token_filter( + 'ngram_filter', + type='ngram', + min_gram=3, + max_gram=3, +) + autocomplete = es.analyzer( 'autocomplete', tokenizer='standard', - filter=['lowercase', edge_ngram_filter] + filter=['standard', 'asciifolding', 'lowercase', ngram_filter, edge_ngram_filter] ) diff --git a/pillar/api/search/queries.py b/pillar/api/search/queries.py index 0de4e2fd..36d2610b 100644 --- a/pillar/api/search/queries.py +++ b/pillar/api/search/queries.py @@ -118,15 +118,20 @@ def _common_user_search(query: str) -> (typing.List[Query], typing.List[Query]): if not query: return [], [] - should = [ - Q('match', username=query), - Q('match', full_name=query), - Q('match', email=query), - {'term': {'username_exact': {'value': query, 'boost': 50}}}, - ] + should = [] if '@' in query: - should.append(Q('term', email_exact=query)) + should.append({'term': {'email_exact': {'value': query, 'boost': 50}}}) + email_boost = 25 + else: + email_boost = 1 + + should.extend([ + Q('match', username=query), + Q('match', full_name=query), + {'match': {'email': {'query': query, 'boost': email_boost}}}, + {'term': {'username_exact': {'value': query, 'boost': 50}}}, + ]) return [], should