From b2bd01117ecd77f41176c414344ec07bc5667abe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sybren=20A=2E=20St=C3=BCvel?= <sybren@stuvel.eu>
Date: Fri, 12 Jan 2018 15:28:33 +0100
Subject: [PATCH] Elastic: tweaked user indexing

This makes it a bit more "fuzzy", so users are also matched on N-grams
and not just N-grams-from-the-start-of-the-word.
---
 pillar/api/search/documents.py |  9 ++++++++-
 pillar/api/search/queries.py   | 19 ++++++++++++-------
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/pillar/api/search/documents.py b/pillar/api/search/documents.py
index 653b8e95..5ac72c69 100644
--- a/pillar/api/search/documents.py
+++ b/pillar/api/search/documents.py
@@ -24,10 +24,17 @@ edge_ngram_filter = analysis.token_filter(
     max_gram=15
 )
 
+ngram_filter = analysis.token_filter(
+    'ngram_filter',
+    type='ngram',
+    min_gram=3,
+    max_gram=3,
+)
+
 autocomplete = es.analyzer(
     'autocomplete',
     tokenizer='standard',
-    filter=['lowercase', edge_ngram_filter]
+    filter=['standard', 'asciifolding', 'lowercase', ngram_filter, edge_ngram_filter]
 )
 
 
diff --git a/pillar/api/search/queries.py b/pillar/api/search/queries.py
index 0de4e2fd..36d2610b 100644
--- a/pillar/api/search/queries.py
+++ b/pillar/api/search/queries.py
@@ -118,15 +118,20 @@ def _common_user_search(query: str) -> (typing.List[Query], typing.List[Query]):
     if not query:
         return [], []
 
-    should = [
-        Q('match', username=query),
-        Q('match', full_name=query),
-        Q('match', email=query),
-        {'term': {'username_exact': {'value': query, 'boost': 50}}},
-    ]
+    should = []
 
     if '@' in query:
-        should.append(Q('term', email_exact=query))
+        should.append({'term': {'email_exact': {'value': query, 'boost': 50}}})
+        email_boost = 25
+    else:
+        email_boost = 1
+
+    should.extend([
+        Q('match', username=query),
+        Q('match', full_name=query),
+        {'match': {'email': {'query': query, 'boost': email_boost}}},
+        {'term': {'username_exact': {'value': query, 'boost': 50}}},
+    ])
 
     return [], should