From 2ff68c4b3e59cf1b7a8f0efdb9d046986f076f19 Mon Sep 17 00:00:00 2001 From: Oleg Komarov Date: Fri, 31 May 2024 15:50:56 +0200 Subject: [PATCH 1/9] use postgres SearchQuery and SearchVector for search --- blender_extensions/settings.py | 1 + extensions/views/public.py | 28 ++++++++++++++++++---------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/blender_extensions/settings.py b/blender_extensions/settings.py index 61abe392..70f2e8b6 100644 --- a/blender_extensions/settings.py +++ b/blender_extensions/settings.py @@ -75,6 +75,7 @@ INSTALLED_APPS = [ 'django.contrib.staticfiles', 'django.contrib.flatpages', 'django.contrib.humanize', + 'django.contrib.postgres', 'actstream', ] diff --git a/extensions/views/public.py b/extensions/views/public.py index 22e5abd2..336255ee 100644 --- a/extensions/views/public.py +++ b/extensions/views/public.py @@ -1,6 +1,8 @@ import logging +from django.conf import settings from django.contrib.auth import get_user_model +from django.contrib.postgres.search import SearchQuery, SearchVector from django.db.models import Q from django.shortcuts import get_object_or_404, redirect from django.views.generic.list import ListView @@ -89,16 +91,22 @@ class SearchView(ListedExtensionsView): _type = self._get_type_id_by_slug() queryset = queryset.filter(type=_type) if 'q' in self.request.GET: - qs = self.request.GET['q'].split() - search_query = Q() - for token in qs: - search_query &= ( - Q(slug__icontains=token) - | Q(name__icontains=token) - | Q(description__icontains=token) - | Q(latest_version__tags__name__icontains=token) - ) - queryset = queryset.filter(search_query).distinct() + # using DEBUG as a shortcut for checking if we run on postgres vs sqlite + if settings.DEBUG: + qs = self.request.GET['q'].split() + search_query = Q() + for token in qs: + search_query &= ( + Q(slug__icontains=token) + | Q(name__icontains=token) + | Q(description__icontains=token) + | Q(latest_version__tags__name__icontains=token) + ) + queryset = queryset.filter(search_query).distinct() + else: + query = SearchQuery(self.request.GET['q'], search_type='websearch') + vector = SearchVector('slug', 'name', 'description', 'latest_version__tags__name') + queryset = queryset.annotate(search=vector).filter(search=query).distinct() return queryset.prefetch_related( 'authors', 'latest_version__file', -- 2.30.2 From 867bdd90503303fcfecc17ce0bd2e7a949498197 Mon Sep 17 00:00:00 2001 From: Oleg Komarov Date: Fri, 31 May 2024 15:59:28 +0200 Subject: [PATCH 2/9] use rank --- extensions/views/public.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/extensions/views/public.py b/extensions/views/public.py index 336255ee..4e338432 100644 --- a/extensions/views/public.py +++ b/extensions/views/public.py @@ -2,7 +2,7 @@ import logging from django.conf import settings from django.contrib.auth import get_user_model -from django.contrib.postgres.search import SearchQuery, SearchVector +from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector from django.db.models import Q from django.shortcuts import get_object_or_404, redirect from django.views.generic.list import ListView @@ -105,8 +105,15 @@ class SearchView(ListedExtensionsView): queryset = queryset.filter(search_query).distinct() else: query = SearchQuery(self.request.GET['q'], search_type='websearch') - vector = SearchVector('slug', 'name', 'description', 'latest_version__tags__name') - queryset = queryset.annotate(search=vector).filter(search=query).distinct() + vector = ( + SearchVector('name', weight='A') + + SearchVector('description', weight='B') + + SearchVector('latest_version__tags__name', weight='C') + ) + rank = SearchRank(vector, query) + queryset = ( + queryset.annotate(rank=rank).filter(rank__gte=0.3).distinct().order_by('-rank') + ) return queryset.prefetch_related( 'authors', 'latest_version__file', -- 2.30.2 From 16e82fde289588f1097c868327f0a4340e712678 Mon Sep 17 00:00:00 2001 From: Oleg Komarov Date: Mon, 3 Jun 2024 18:45:47 +0200 Subject: [PATCH 3/9] fts + trgm fallback --- extensions/views/public.py | 119 ++++++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 47 deletions(-) diff --git a/extensions/views/public.py b/extensions/views/public.py index 4e338432..3e796f7d 100644 --- a/extensions/views/public.py +++ b/extensions/views/public.py @@ -2,7 +2,7 @@ import logging from django.conf import settings from django.contrib.auth import get_user_model -from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector +from django.db import connection from django.db.models import Q from django.shortcuts import get_object_or_404, redirect from django.views.generic.list import ListView @@ -24,7 +24,15 @@ log = logging.getLogger(__name__) class ListedExtensionsView(ListView): model = Extension - queryset = Extension.objects.listed + queryset = Extension.objects.listed.prefetch_related( + 'authors', + 'latest_version__file', + 'latest_version__tags', + 'preview_set', + 'preview_set__file', + 'ratings', + 'team', + ) context_object_name = 'extensions' @@ -34,19 +42,7 @@ class HomeView(ListedExtensionsView): def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) - q = ( - super() - .get_queryset() - .prefetch_related( - 'authors', - 'latest_version__file', - 'latest_version__tags', - 'preview_set', - 'preview_set__file', - 'ratings', - 'team', - ) - ) + q = super().get_queryset() context['addons'] = q.filter(type=EXTENSION_TYPE_CHOICES.BPY)[:8] context['themes'] = q.filter(type=EXTENSION_TYPE_CHOICES.THEME)[:8] return context @@ -90,39 +86,68 @@ class SearchView(ListedExtensionsView): if self.kwargs.get('type_slug'): _type = self._get_type_id_by_slug() queryset = queryset.filter(type=_type) - if 'q' in self.request.GET: - # using DEBUG as a shortcut for checking if we run on postgres vs sqlite - if settings.DEBUG: - qs = self.request.GET['q'].split() - search_query = Q() - for token in qs: - search_query &= ( - Q(slug__icontains=token) - | Q(name__icontains=token) - | Q(description__icontains=token) - | Q(latest_version__tags__name__icontains=token) - ) - queryset = queryset.filter(search_query).distinct() - else: - query = SearchQuery(self.request.GET['q'], search_type='websearch') - vector = ( - SearchVector('name', weight='A') - + SearchVector('description', weight='B') - + SearchVector('latest_version__tags__name', weight='C') + + search_query = self.request.GET.get('q') + if not search_query: + return queryset + + # WARNING: full-text search support only on postgres + # using DEBUG as a shortcut for checking if we run on postgres vs sqlite + if settings.DEBUG and 0: + filter = Q() + for token in search_query.split(): + filter &= ( + Q(slug__icontains=token) + | Q(name__icontains=token) + | Q(description__icontains=token) + | Q(latest_version__tags__name__icontains=token) ) - rank = SearchRank(vector, query) - queryset = ( - queryset.annotate(rank=rank).filter(rank__gte=0.3).distinct().order_by('-rank') - ) - return queryset.prefetch_related( - 'authors', - 'latest_version__file', - 'latest_version__tags', - 'preview_set', - 'preview_set__file', - 'ratings', - 'team', - ) + queryset = queryset.filter(filter).distinct() + else: + queryset = self.postgres_fts(queryset, search_query) + return queryset + + def postgres_fts(self, queryset, search_query): + """Postgres full text search (fast) and a fuzzy trigram search (slow) as a fallback. + + Searches Extension name and description only. If we need to extend the functionality, + it's better to consider using a different approach, e.g. introduce meilisearch. + + Limits the results size to 32 items (2 pages), assuming that nobody will click through many + pages if we failed to present the vital results on the first page. + + Relies on indexed expressions: + CREATE INDEX extensions_fts ON extensions_extension USING + gin ((to_tsvector('english', name) || ' ' || to_tsvector('english', description))); + CREATE INDEX extensions_trgm_gin ON extensions_extension USING + gin((((name)::text || ' '::text) || description) gin_trgm_ops); + + """ + with connection.cursor() as cursor: + sql = """ + select id + from extensions_extension + where ( + (to_tsvector('english', name) || ' ' || to_tsvector('english', description)) + @@ websearch_to_tsquery('english', %(query)s) + ) and is_listed + limit 32""" + cursor.execute(sql, {'query': search_query}) + pks = [row[0] for row in cursor.fetchall()] + if not pks: + # fallback to fuzzy trigram search + sql = """ + select id + from extensions_extension + where ((name || ' ' || description) %%> %(query)s) + and is_listed + order by %(query)s <<<-> (name || ' ' || description) + limit 32""" + cursor.execute(sql, {'query': search_query}) + pks = [row[0] for row in cursor.fetchall()] + # pks are ordered by ranking, keep that order + # this approach is fine under the assumption that the list is small + return sorted(queryset.filter(pk__in=pks).order_by(), key=lambda x: pks.index(x.pk)) def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) -- 2.30.2 From 4e9e91791f087708d72b1eabf3175cde529575a0 Mon Sep 17 00:00:00 2001 From: Oleg Komarov Date: Mon, 3 Jun 2024 18:57:07 +0200 Subject: [PATCH 4/9] remove condition for local testing --- extensions/views/public.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/views/public.py b/extensions/views/public.py index a09d8c93..dcd47d30 100644 --- a/extensions/views/public.py +++ b/extensions/views/public.py @@ -112,7 +112,7 @@ class SearchView(ListedExtensionsView): # WARNING: full-text search support only on postgres # using DEBUG as a shortcut for checking if we run on postgres vs sqlite - if settings.DEBUG and 0: + if settings.DEBUG: filter = Q() for token in search_query.split(): filter &= ( -- 2.30.2 From 318ba218ea11df454013c8e020186439505e9f16 Mon Sep 17 00:00:00 2001 From: Oleg Komarov Date: Mon, 3 Jun 2024 19:00:39 +0200 Subject: [PATCH 5/9] fix merge --- extensions/views/public.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/extensions/views/public.py b/extensions/views/public.py index dcd47d30..8d05e6b2 100644 --- a/extensions/views/public.py +++ b/extensions/views/public.py @@ -167,8 +167,6 @@ class SearchView(ListedExtensionsView): # pks are ordered by ranking, keep that order # this approach is fine under the assumption that the list is small return sorted(queryset.filter(pk__in=pks).order_by(), key=lambda x: pks.index(x.pk)) - queryset = queryset.filter(search_query).distinct() - return queryset def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) -- 2.30.2 From 090a2a6679e315d2a8907f7ea8d69898a6121099 Mon Sep 17 00:00:00 2001 From: Oleg Komarov Date: Mon, 3 Jun 2024 19:01:08 +0200 Subject: [PATCH 6/9] cleanup --- blender_extensions/settings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/blender_extensions/settings.py b/blender_extensions/settings.py index 70f2e8b6..61abe392 100644 --- a/blender_extensions/settings.py +++ b/blender_extensions/settings.py @@ -75,7 +75,6 @@ INSTALLED_APPS = [ 'django.contrib.staticfiles', 'django.contrib.flatpages', 'django.contrib.humanize', - 'django.contrib.postgres', 'actstream', ] -- 2.30.2 From 890c96986b1d0dd2267ab790a4cd0311cba85b51 Mon Sep 17 00:00:00 2001 From: Oleg Komarov Date: Mon, 3 Jun 2024 19:12:20 +0200 Subject: [PATCH 7/9] add ts_rank --- extensions/views/public.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/extensions/views/public.py b/extensions/views/public.py index 8d05e6b2..6e4d4551 100644 --- a/extensions/views/public.py +++ b/extensions/views/public.py @@ -129,8 +129,9 @@ class SearchView(ListedExtensionsView): def postgres_fts(self, queryset, search_query): """Postgres full text search (fast) and a fuzzy trigram search (slow) as a fallback. - Searches Extension name and description only. If we need to extend the functionality, - it's better to consider using a different approach, e.g. introduce meilisearch. + Searches Extension name and description only, ranking name matches higher. + If we need to extend the functionality, it's better to consider using a different approach, + e.g. introduce meilisearch. Limits the results size to 32 items (2 pages), assuming that nobody will click through many pages if we failed to present the vital results on the first page. @@ -150,6 +151,10 @@ class SearchView(ListedExtensionsView): (to_tsvector('english', name) || ' ' || to_tsvector('english', description)) @@ websearch_to_tsquery('english', %(query)s) ) and is_listed + order by ts_rank( + to_tsvector('english', name), + websearch_to_tsquery('english', %(query)s) + ) desc limit 32""" cursor.execute(sql, {'query': search_query}) pks = [row[0] for row in cursor.fetchall()] -- 2.30.2 From ac52988ea7c2631f19a54d3861bd0669f850eaea Mon Sep 17 00:00:00 2001 From: Oleg Komarov Date: Mon, 3 Jun 2024 19:36:44 +0200 Subject: [PATCH 8/9] add migration --- .../0033_extensions_fts_20240603_1918.py | 40 +++++++++++++++++++ extensions/views/public.py | 8 ++-- 2 files changed, 43 insertions(+), 5 deletions(-) create mode 100644 extensions/migrations/0033_extensions_fts_20240603_1918.py diff --git a/extensions/migrations/0033_extensions_fts_20240603_1918.py b/extensions/migrations/0033_extensions_fts_20240603_1918.py new file mode 100644 index 00000000..c8efe5fc --- /dev/null +++ b/extensions/migrations/0033_extensions_fts_20240603_1918.py @@ -0,0 +1,40 @@ +# Generated by Django 4.2.11 on 2024-06-03 17:18 + +from django.db import migrations + + +def create_indexes(apps, schema_editor): + if schema_editor.connection.vendor != 'postgresql': + return + with schema_editor.connection.cursor() as cursor: + cursor.execute( + """ + CREATE INDEX extensions_fts ON extensions_extension USING + gin ((to_tsvector('english', name) || ' ' || to_tsvector('english', description))) + """ + ) + cursor.execute( + """ + CREATE INDEX extensions_trgm_gin ON extensions_extension USING + gin((((name)::text || ' '::text) || description) gin_trgm_ops); + """ + ) + + +def delete_indexes(apps, schema_editor): + if schema_editor.connection.vendor != 'postgresql': + return + with schema_editor.connection.cursor() as cursor: + cursor.execute('drop index extensions_fts') + cursor.execute('drop index extensions_trgm_gin') + + +class Migration(migrations.Migration): + + dependencies = [ + ('extensions', '0032_extension_extensions__is_list_765936_idx_and_more'), + ] + + operations = [ + migrations.RunPython(create_indexes, delete_indexes) + ] diff --git a/extensions/views/public.py b/extensions/views/public.py index 6e4d4551..767d71d2 100644 --- a/extensions/views/public.py +++ b/extensions/views/public.py @@ -1,7 +1,6 @@ from collections import OrderedDict import logging -from django.conf import settings from django.contrib.auth import get_user_model from django.db import connection from django.db.models import Count, Q @@ -111,8 +110,9 @@ class SearchView(ListedExtensionsView): return queryset # WARNING: full-text search support only on postgres - # using DEBUG as a shortcut for checking if we run on postgres vs sqlite - if settings.DEBUG: + if connection.vendor == 'postgresql': + queryset = self.postgres_fts(queryset, search_query) + else: filter = Q() for token in search_query.split(): filter &= ( @@ -122,8 +122,6 @@ class SearchView(ListedExtensionsView): | Q(latest_version__tags__name__icontains=token) ) queryset = queryset.filter(filter).distinct() - else: - queryset = self.postgres_fts(queryset, search_query) return queryset def postgres_fts(self, queryset, search_query): -- 2.30.2 From d58bc451588be7c22a0bff3d9b652c6c7bbeecf1 Mon Sep 17 00:00:00 2001 From: Oleg Komarov Date: Mon, 3 Jun 2024 20:03:08 +0200 Subject: [PATCH 9/9] missing part of migration --- extensions/migrations/0033_extensions_fts_20240603_1918.py | 1 + 1 file changed, 1 insertion(+) diff --git a/extensions/migrations/0033_extensions_fts_20240603_1918.py b/extensions/migrations/0033_extensions_fts_20240603_1918.py index c8efe5fc..68856912 100644 --- a/extensions/migrations/0033_extensions_fts_20240603_1918.py +++ b/extensions/migrations/0033_extensions_fts_20240603_1918.py @@ -13,6 +13,7 @@ def create_indexes(apps, schema_editor): gin ((to_tsvector('english', name) || ' ' || to_tsvector('english', description))) """ ) + cursor.execute('create extension if not exists pg_trgm;') cursor.execute( """ CREATE INDEX extensions_trgm_gin ON extensions_extension USING -- 2.30.2