Full-text search using postgresql #162
@ -2,7 +2,7 @@ import logging
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector
|
||||
from django.db import connection
|
||||
from django.db.models import Q
|
||||
from django.shortcuts import get_object_or_404, redirect
|
||||
from django.views.generic.list import ListView
|
||||
@ -24,7 +24,15 @@ log = logging.getLogger(__name__)
|
||||
|
||||
class ListedExtensionsView(ListView):
|
||||
model = Extension
|
||||
queryset = Extension.objects.listed
|
||||
queryset = Extension.objects.listed.prefetch_related(
|
||||
'authors',
|
||||
'latest_version__file',
|
||||
'latest_version__tags',
|
||||
'preview_set',
|
||||
'preview_set__file',
|
||||
'ratings',
|
||||
'team',
|
||||
)
|
||||
context_object_name = 'extensions'
|
||||
|
||||
|
||||
@ -34,19 +42,7 @@ class HomeView(ListedExtensionsView):
|
||||
|
||||
def get_context_data(self, **kwargs):
|
||||
context = super().get_context_data(**kwargs)
|
||||
q = (
|
||||
super()
|
||||
.get_queryset()
|
||||
.prefetch_related(
|
||||
'authors',
|
||||
'latest_version__file',
|
||||
'latest_version__tags',
|
||||
'preview_set',
|
||||
'preview_set__file',
|
||||
'ratings',
|
||||
'team',
|
||||
)
|
||||
)
|
||||
q = super().get_queryset()
|
||||
context['addons'] = q.filter(type=EXTENSION_TYPE_CHOICES.BPY)[:8]
|
||||
context['themes'] = q.filter(type=EXTENSION_TYPE_CHOICES.THEME)[:8]
|
||||
return context
|
||||
@ -90,39 +86,68 @@ class SearchView(ListedExtensionsView):
|
||||
if self.kwargs.get('type_slug'):
|
||||
_type = self._get_type_id_by_slug()
|
||||
queryset = queryset.filter(type=_type)
|
||||
if 'q' in self.request.GET:
|
||||
# using DEBUG as a shortcut for checking if we run on postgres vs sqlite
|
||||
if settings.DEBUG:
|
||||
qs = self.request.GET['q'].split()
|
||||
search_query = Q()
|
||||
for token in qs:
|
||||
search_query &= (
|
||||
Q(slug__icontains=token)
|
||||
| Q(name__icontains=token)
|
||||
| Q(description__icontains=token)
|
||||
| Q(latest_version__tags__name__icontains=token)
|
||||
)
|
||||
queryset = queryset.filter(search_query).distinct()
|
||||
else:
|
||||
query = SearchQuery(self.request.GET['q'], search_type='websearch')
|
||||
vector = (
|
||||
SearchVector('name', weight='A')
|
||||
+ SearchVector('description', weight='B')
|
||||
+ SearchVector('latest_version__tags__name', weight='C')
|
||||
|
||||
search_query = self.request.GET.get('q')
|
||||
if not search_query:
|
||||
return queryset
|
||||
|
||||
# WARNING: full-text search support only on postgres
|
||||
# using DEBUG as a shortcut for checking if we run on postgres vs sqlite
|
||||
if settings.DEBUG and 0:
|
||||
filter = Q()
|
||||
for token in search_query.split():
|
||||
filter &= (
|
||||
Q(slug__icontains=token)
|
||||
| Q(name__icontains=token)
|
||||
| Q(description__icontains=token)
|
||||
| Q(latest_version__tags__name__icontains=token)
|
||||
)
|
||||
rank = SearchRank(vector, query)
|
||||
queryset = (
|
||||
queryset.annotate(rank=rank).filter(rank__gte=0.3).distinct().order_by('-rank')
|
||||
)
|
||||
return queryset.prefetch_related(
|
||||
'authors',
|
||||
'latest_version__file',
|
||||
'latest_version__tags',
|
||||
'preview_set',
|
||||
'preview_set__file',
|
||||
'ratings',
|
||||
'team',
|
||||
)
|
||||
queryset = queryset.filter(filter).distinct()
|
||||
else:
|
||||
queryset = self.postgres_fts(queryset, search_query)
|
||||
return queryset
|
||||
|
||||
def postgres_fts(self, queryset, search_query):
|
||||
"""Postgres full text search (fast) and a fuzzy trigram search (slow) as a fallback.
|
||||
|
||||
Searches Extension name and description only. If we need to extend the functionality,
|
||||
it's better to consider using a different approach, e.g. introduce meilisearch.
|
||||
|
||||
Limits the results size to 32 items (2 pages), assuming that nobody will click through many
|
||||
pages if we failed to present the vital results on the first page.
|
||||
|
||||
Relies on indexed expressions:
|
||||
CREATE INDEX extensions_fts ON extensions_extension USING
|
||||
gin ((to_tsvector('english', name) || ' ' || to_tsvector('english', description)));
|
||||
CREATE INDEX extensions_trgm_gin ON extensions_extension USING
|
||||
gin((((name)::text || ' '::text) || description) gin_trgm_ops);
|
||||
|
||||
"""
|
||||
with connection.cursor() as cursor:
|
||||
sql = """
|
||||
select id
|
||||
from extensions_extension
|
||||
where (
|
||||
(to_tsvector('english', name) || ' ' || to_tsvector('english', description))
|
||||
@@ websearch_to_tsquery('english', %(query)s)
|
||||
) and is_listed
|
||||
limit 32"""
|
||||
cursor.execute(sql, {'query': search_query})
|
||||
pks = [row[0] for row in cursor.fetchall()]
|
||||
if not pks:
|
||||
# fallback to fuzzy trigram search
|
||||
sql = """
|
||||
select id
|
||||
from extensions_extension
|
||||
where ((name || ' ' || description) %%> %(query)s)
|
||||
and is_listed
|
||||
order by %(query)s <<<-> (name || ' ' || description)
|
||||
limit 32"""
|
||||
cursor.execute(sql, {'query': search_query})
|
||||
pks = [row[0] for row in cursor.fetchall()]
|
||||
# pks are ordered by ranking, keep that order
|
||||
# this approach is fine under the assumption that the list is small
|
||||
return sorted(queryset.filter(pk__in=pks).order_by(), key=lambda x: pks.index(x.pk))
|
||||
|
||||
def get_context_data(self, **kwargs):
|
||||
context = super().get_context_data(**kwargs)
|
||||
|
Loading…
Reference in New Issue
Block a user