2024-06-03 20:07:23 +02:00
2 changed files with 114 additions and 31 deletions
--- a/extensions/migrations/0033_extensions_fts_20240603_1918.py
+++ b/extensions/migrations/0033_extensions_fts_20240603_1918.py
@ -0,0 +1,41 @@
 # Generated by Django 4.2.11 on 2024-06-03 17:18
 from django.db import migrations
 def create_indexes(apps, schema_editor):
    if schema_editor.connection.vendor != 'postgresql':
        return
    with schema_editor.connection.cursor() as cursor:
        cursor.execute(
            """
            CREATE INDEX extensions_fts ON extensions_extension USING
            gin ((to_tsvector('english', name) || ' ' || to_tsvector('english', description)))
            """
        )
        cursor.execute('create extension if not exists pg_trgm;')
        cursor.execute(
            """
            CREATE INDEX extensions_trgm_gin ON extensions_extension USING
            gin((((name)::text || ' '::text) || description) gin_trgm_ops);
            """
        )
 def delete_indexes(apps, schema_editor):
    if schema_editor.connection.vendor != 'postgresql':
        return
    with schema_editor.connection.cursor() as cursor:
        cursor.execute('drop index extensions_fts')
        cursor.execute('drop index extensions_trgm_gin')
 class Migration(migrations.Migration):
    dependencies = [
        ('extensions', '0032_extension_extensions__is_list_765936_idx_and_more'),
    ]
    operations = [
        migrations.RunPython(create_indexes, delete_indexes)
    ]
--- a/extensions/views/public.py
+++ b/extensions/views/public.py
@ -2,6 +2,7 @@ from collections import OrderedDict
 import logging
 from django.contrib.auth import get_user_model
 from django.db import connection
 from django.db.models import Count, Q
 from django.shortcuts import get_object_or_404, redirect
 from django.utils.translation import gettext_lazy as _
@ -24,7 +25,15 @@ log = logging.getLogger(__name__)
 class ListedExtensionsView(ListView):
    model = Extension
-    queryset = Extension.objects.listed
+    queryset = Extension.objects.listed.prefetch_related(
        'authors',
        'latest_version__file',
        'latest_version__tags',
        'preview_set',
        'preview_set__file',
        'ratings',
        'team',
    )
    context_object_name = 'extensions'
@ -34,20 +43,7 @@ class HomeView(ListedExtensionsView):
    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)
-        q = (
+        q = super().get_queryset().order_by('-average_score')
            super()
            .get_queryset()
            .prefetch_related(
                'authors',
                'latest_version__file',
                'latest_version__tags',
                'preview_set',
                'preview_set__file',
                'ratings',
                'team',
            )
            .order_by('-average_score')
        )
        context['addons'] = q.filter(type=EXTENSION_TYPE_CHOICES.BPY)[:8]
        context['themes'] = q.filter(type=EXTENSION_TYPE_CHOICES.THEME)[:8]
        return context
@ -94,7 +90,7 @@ class SearchView(ListedExtensionsView):
        return sort_by
    def get_queryset(self):
-        queryset = super().get_queryset()
+        queryset = super().get_queryset().order_by(self._get_sort_by())
        if self.kwargs.get('tag_slug'):
            queryset = queryset.filter(
                latest_version__tags__slug=self.kwargs['tag_slug']
@ -108,26 +104,72 @@ class SearchView(ListedExtensionsView):
        if self.kwargs.get('type_slug'):
            _type = self._get_type_id_by_slug()
            queryset = queryset.filter(type=_type)
-        if 'q' in self.request.GET:
+
-            qs = self.request.GET['q'].split()
+        search_query = self.request.GET.get('q')
-            search_query = Q()
+        if not search_query:
-            for token in qs:
+            return queryset
-                search_query &= (
+
        # WARNING: full-text search support only on postgres
        if connection.vendor == 'postgresql':
            queryset = self.postgres_fts(queryset, search_query)
        else:
            filter = Q()
            for token in search_query.split():
                filter &= (
                    Q(slug__icontains=token)
                    | Q(name__icontains=token)
                    | Q(description__icontains=token)
                    | Q(latest_version__tags__name__icontains=token)
                )
-            queryset = queryset.filter(search_query).distinct()
+            queryset = queryset.filter(filter).distinct()
-        return queryset.prefetch_related(
+        return queryset
-            'authors',
+
-            'latest_version__file',
+    def postgres_fts(self, queryset, search_query):
-            'latest_version__tags',
+        """Postgres full text search (fast) and a fuzzy trigram search (slow) as a fallback.
-            'preview_set',
+
-            'preview_set__file',
+        Searches Extension name and description only, ranking name matches higher.
-            'ratings',
+        If we need to extend the functionality, it's better to consider using a different approach,
-            'team',
+        e.g. introduce meilisearch.
-        ).order_by(self._get_sort_by())
+
        Limits the results size to 32 items (2 pages), assuming that nobody will click through many
        pages if we failed to present the vital results on the first page.
        Relies on indexed expressions:
            CREATE INDEX extensions_fts ON extensions_extension USING
            gin ((to_tsvector('english', name) || ' ' || to_tsvector('english', description)));
            CREATE INDEX extensions_trgm_gin ON extensions_extension USING
            gin((((name)::text || ' '::text) || description) gin_trgm_ops);
        """
        with connection.cursor() as cursor:
            sql = """
                select id
                from extensions_extension
                where (
                    (to_tsvector('english', name) || ' ' || to_tsvector('english', description))
                    @@ websearch_to_tsquery('english', %(query)s)
                ) and is_listed
                order by ts_rank(
                    to_tsvector('english', name),
                    websearch_to_tsquery('english', %(query)s)
                ) desc
                limit 32"""
            cursor.execute(sql, {'query': search_query})
            pks = [row[0] for row in cursor.fetchall()]
            if not pks:
                # fallback to fuzzy trigram search
                sql = """
                    select id
                    from extensions_extension
                    where ((name || ' ' || description) %%> %(query)s)
                    and is_listed
                    order by %(query)s <<<-> (name || ' ' || description)
                    limit 32"""
                cursor.execute(sql, {'query': search_query})
                pks = [row[0] for row in cursor.fetchall()]
            # pks are ordered by ranking, keep that order
            # this approach is fine under the assumption that the list is small
            return sorted(queryset.filter(pk__in=pks).order_by(), key=lambda x: pks.index(x.pk))
    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)