Full-text search using postgresql #162
41
extensions/migrations/0033_extensions_fts_20240603_1918.py
Normal file
41
extensions/migrations/0033_extensions_fts_20240603_1918.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
# Generated by Django 4.2.11 on 2024-06-03 17:18
|
||||||
|
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
def create_indexes(apps, schema_editor):
|
||||||
|
if schema_editor.connection.vendor != 'postgresql':
|
||||||
|
return
|
||||||
|
with schema_editor.connection.cursor() as cursor:
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
CREATE INDEX extensions_fts ON extensions_extension USING
|
||||||
|
gin ((to_tsvector('english', name) || ' ' || to_tsvector('english', description)))
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
cursor.execute('create extension if not exists pg_trgm;')
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
CREATE INDEX extensions_trgm_gin ON extensions_extension USING
|
||||||
|
gin((((name)::text || ' '::text) || description) gin_trgm_ops);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_indexes(apps, schema_editor):
|
||||||
|
if schema_editor.connection.vendor != 'postgresql':
|
||||||
|
return
|
||||||
|
with schema_editor.connection.cursor() as cursor:
|
||||||
|
cursor.execute('drop index extensions_fts')
|
||||||
|
cursor.execute('drop index extensions_trgm_gin')
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('extensions', '0032_extension_extensions__is_list_765936_idx_and_more'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RunPython(create_indexes, delete_indexes)
|
||||||
|
]
|
@ -2,6 +2,7 @@ from collections import OrderedDict
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
from django.contrib.auth import get_user_model
|
from django.contrib.auth import get_user_model
|
||||||
|
from django.db import connection
|
||||||
from django.db.models import Count, Q
|
from django.db.models import Count, Q
|
||||||
from django.shortcuts import get_object_or_404, redirect
|
from django.shortcuts import get_object_or_404, redirect
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
@ -24,7 +25,15 @@ log = logging.getLogger(__name__)
|
|||||||
|
|
||||||
class ListedExtensionsView(ListView):
|
class ListedExtensionsView(ListView):
|
||||||
model = Extension
|
model = Extension
|
||||||
queryset = Extension.objects.listed
|
queryset = Extension.objects.listed.prefetch_related(
|
||||||
|
'authors',
|
||||||
|
'latest_version__file',
|
||||||
|
'latest_version__tags',
|
||||||
|
'preview_set',
|
||||||
|
'preview_set__file',
|
||||||
|
'ratings',
|
||||||
|
'team',
|
||||||
|
)
|
||||||
context_object_name = 'extensions'
|
context_object_name = 'extensions'
|
||||||
|
|
||||||
|
|
||||||
@ -34,20 +43,7 @@ class HomeView(ListedExtensionsView):
|
|||||||
|
|
||||||
def get_context_data(self, **kwargs):
|
def get_context_data(self, **kwargs):
|
||||||
context = super().get_context_data(**kwargs)
|
context = super().get_context_data(**kwargs)
|
||||||
q = (
|
q = super().get_queryset().order_by('-average_score')
|
||||||
super()
|
|
||||||
.get_queryset()
|
|
||||||
.prefetch_related(
|
|
||||||
'authors',
|
|
||||||
'latest_version__file',
|
|
||||||
'latest_version__tags',
|
|
||||||
'preview_set',
|
|
||||||
'preview_set__file',
|
|
||||||
'ratings',
|
|
||||||
'team',
|
|
||||||
)
|
|
||||||
.order_by('-average_score')
|
|
||||||
)
|
|
||||||
context['addons'] = q.filter(type=EXTENSION_TYPE_CHOICES.BPY)[:8]
|
context['addons'] = q.filter(type=EXTENSION_TYPE_CHOICES.BPY)[:8]
|
||||||
context['themes'] = q.filter(type=EXTENSION_TYPE_CHOICES.THEME)[:8]
|
context['themes'] = q.filter(type=EXTENSION_TYPE_CHOICES.THEME)[:8]
|
||||||
return context
|
return context
|
||||||
@ -94,7 +90,7 @@ class SearchView(ListedExtensionsView):
|
|||||||
return sort_by
|
return sort_by
|
||||||
|
|
||||||
def get_queryset(self):
|
def get_queryset(self):
|
||||||
queryset = super().get_queryset()
|
queryset = super().get_queryset().order_by(self._get_sort_by())
|
||||||
if self.kwargs.get('tag_slug'):
|
if self.kwargs.get('tag_slug'):
|
||||||
queryset = queryset.filter(
|
queryset = queryset.filter(
|
||||||
latest_version__tags__slug=self.kwargs['tag_slug']
|
latest_version__tags__slug=self.kwargs['tag_slug']
|
||||||
@ -108,26 +104,72 @@ class SearchView(ListedExtensionsView):
|
|||||||
if self.kwargs.get('type_slug'):
|
if self.kwargs.get('type_slug'):
|
||||||
_type = self._get_type_id_by_slug()
|
_type = self._get_type_id_by_slug()
|
||||||
queryset = queryset.filter(type=_type)
|
queryset = queryset.filter(type=_type)
|
||||||
if 'q' in self.request.GET:
|
|
||||||
qs = self.request.GET['q'].split()
|
search_query = self.request.GET.get('q')
|
||||||
search_query = Q()
|
if not search_query:
|
||||||
for token in qs:
|
return queryset
|
||||||
search_query &= (
|
|
||||||
|
# WARNING: full-text search support only on postgres
|
||||||
|
if connection.vendor == 'postgresql':
|
||||||
|
queryset = self.postgres_fts(queryset, search_query)
|
||||||
|
else:
|
||||||
|
filter = Q()
|
||||||
|
for token in search_query.split():
|
||||||
|
filter &= (
|
||||||
Q(slug__icontains=token)
|
Q(slug__icontains=token)
|
||||||
| Q(name__icontains=token)
|
| Q(name__icontains=token)
|
||||||
| Q(description__icontains=token)
|
| Q(description__icontains=token)
|
||||||
| Q(latest_version__tags__name__icontains=token)
|
| Q(latest_version__tags__name__icontains=token)
|
||||||
)
|
)
|
||||||
queryset = queryset.filter(search_query).distinct()
|
queryset = queryset.filter(filter).distinct()
|
||||||
return queryset.prefetch_related(
|
return queryset
|
||||||
'authors',
|
|
||||||
'latest_version__file',
|
def postgres_fts(self, queryset, search_query):
|
||||||
'latest_version__tags',
|
"""Postgres full text search (fast) and a fuzzy trigram search (slow) as a fallback.
|
||||||
'preview_set',
|
|
||||||
'preview_set__file',
|
Searches Extension name and description only, ranking name matches higher.
|
||||||
'ratings',
|
If we need to extend the functionality, it's better to consider using a different approach,
|
||||||
'team',
|
e.g. introduce meilisearch.
|
||||||
).order_by(self._get_sort_by())
|
|
||||||
|
Limits the results size to 32 items (2 pages), assuming that nobody will click through many
|
||||||
|
pages if we failed to present the vital results on the first page.
|
||||||
|
|
||||||
|
Relies on indexed expressions:
|
||||||
|
CREATE INDEX extensions_fts ON extensions_extension USING
|
||||||
|
gin ((to_tsvector('english', name) || ' ' || to_tsvector('english', description)));
|
||||||
|
CREATE INDEX extensions_trgm_gin ON extensions_extension USING
|
||||||
|
gin((((name)::text || ' '::text) || description) gin_trgm_ops);
|
||||||
|
|
||||||
|
"""
|
||||||
|
with connection.cursor() as cursor:
|
||||||
|
sql = """
|
||||||
|
select id
|
||||||
|
from extensions_extension
|
||||||
|
where (
|
||||||
|
(to_tsvector('english', name) || ' ' || to_tsvector('english', description))
|
||||||
|
@@ websearch_to_tsquery('english', %(query)s)
|
||||||
|
) and is_listed
|
||||||
|
order by ts_rank(
|
||||||
|
to_tsvector('english', name),
|
||||||
|
websearch_to_tsquery('english', %(query)s)
|
||||||
|
) desc
|
||||||
|
limit 32"""
|
||||||
|
cursor.execute(sql, {'query': search_query})
|
||||||
|
pks = [row[0] for row in cursor.fetchall()]
|
||||||
|
if not pks:
|
||||||
|
# fallback to fuzzy trigram search
|
||||||
|
sql = """
|
||||||
|
select id
|
||||||
|
from extensions_extension
|
||||||
|
where ((name || ' ' || description) %%> %(query)s)
|
||||||
|
and is_listed
|
||||||
|
order by %(query)s <<<-> (name || ' ' || description)
|
||||||
|
limit 32"""
|
||||||
|
cursor.execute(sql, {'query': search_query})
|
||||||
|
pks = [row[0] for row in cursor.fetchall()]
|
||||||
|
# pks are ordered by ranking, keep that order
|
||||||
|
# this approach is fine under the assumption that the list is small
|
||||||
|
return sorted(queryset.filter(pk__in=pks).order_by(), key=lambda x: pks.index(x.pk))
|
||||||
|
|
||||||
def get_context_data(self, **kwargs):
|
def get_context_data(self, **kwargs):
|
||||||
context = super().get_context_data(**kwargs)
|
context = super().get_context_data(**kwargs)
|
||||||
|
Loading…
Reference in New Issue
Block a user