Elastic: tweaked user indexing

This makes it a bit more "fuzzy", so users are also matched on N-grams and not just N-grams-from-the-start-of-the-word.
2018-01-12 15:28:33 +01:00 · 2018-01-12 15:28:33 +01:00 · b2bd01117e
commit b2bd01117e
parent 31ca4f3d23
2 changed files with 20 additions and 8 deletions
--- a/pillar/api/search/documents.py
+++ b/pillar/api/search/documents.py
@ -24,10 +24,17 @@ edge_ngram_filter = analysis.token_filter(
    max_gram=15
 )

+ngram_filter = analysis.token_filter(
+    'ngram_filter',
+    type='ngram',
+    min_gram=3,
+    max_gram=3,
+)
+
 autocomplete = es.analyzer(
    'autocomplete',
    tokenizer='standard',
-    filter=['lowercase', edge_ngram_filter]
+    filter=['standard', 'asciifolding', 'lowercase', ngram_filter, edge_ngram_filter]
 )


--- a/pillar/api/search/queries.py
+++ b/pillar/api/search/queries.py
@ -118,15 +118,20 @@ def _common_user_search(query: str) -> (typing.List[Query], typing.List[Query]):
    if not query:
        return [], []

-    should = [
-        Q('match', username=query),
-        Q('match', full_name=query),
-        Q('match', email=query),
-        {'term': {'username_exact': {'value': query, 'boost': 50}}},
-    ]
+    should = []

    if '@' in query:
-        should.append(Q('term', email_exact=query))
+        should.append({'term': {'email_exact': {'value': query, 'boost': 50}}})
+        email_boost = 25
+    else:
+        email_boost = 1
+
+    should.extend([
+        Q('match', username=query),
+        Q('match', full_name=query),
+        {'match': {'email': {'query': query, 'boost': email_boost}}},
+        {'term': {'username_exact': {'value': query, 'boost': 50}}},
+    ])

    return [], should