Quick-Search: Added Quick-search in the topbar

Changed how and what we store in elastic to unify it with how we store
things in mongodb so we can have more generic javascript code
to render the data.

Elastic changes:
  Added:
  Node.project.url

  Altered to store id instead of url
  Node.picture

  Made Post searchable

./manage.py elastic reset_index
./manage.py elastic reindex

Thanks to Pablo and Sybren
This commit is contained in:
2018-11-22 15:31:53 +01:00
parent a897e201ba
commit 6ae9a5ddeb
53 changed files with 1954 additions and 623 deletions

View File

@@ -81,6 +81,7 @@ class Node(es.DocType):
fields={
'id': es.Keyword(),
'name': es.Keyword(),
'url': es.Keyword(),
}
)
@@ -153,18 +154,21 @@ def create_doc_from_node_data(node_to_index: dict) -> typing.Optional[Node]:
doc.objectID = str(node_to_index['objectID'])
doc.node_type = node_to_index['node_type']
doc.name = node_to_index['name']
doc.description = node_to_index.get('description')
doc.user.id = str(node_to_index['user']['_id'])
doc.user.name = node_to_index['user']['full_name']
doc.project.id = str(node_to_index['project']['_id'])
doc.project.name = node_to_index['project']['name']
doc.project.url = node_to_index['project']['url']
if node_to_index['node_type'] == 'asset':
doc.media = node_to_index['media']
doc.picture = node_to_index.get('picture')
doc.picture = str(node_to_index.get('picture'))
doc.tags = node_to_index.get('tags')
doc.license_notes = node_to_index.get('license_notes')
doc.is_free = node_to_index.get('is_free')
doc.created_at = node_to_index['created']
doc.updated_at = node_to_index['updated']

View File

@@ -3,16 +3,18 @@ import logging
import typing
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search, Q
from elasticsearch_dsl import Search, Q, MultiSearch
from elasticsearch_dsl.query import Query
from pillar import current_app
log = logging.getLogger(__name__)
NODE_AGG_TERMS = ['node_type', 'media', 'tags', 'is_free']
BOOLEAN_TERMS = ['is_free']
NODE_AGG_TERMS = ['node_type', 'media', 'tags', *BOOLEAN_TERMS]
USER_AGG_TERMS = ['roles', ]
ITEMS_PER_PAGE = 10
USER_SOURCE_INCLUDE = ['full_name', 'objectID', 'username']
# Will be set in setup_app()
client: Elasticsearch = None
@@ -27,26 +29,25 @@ def add_aggs_to_search(search, agg_terms):
search.aggs.bucket(term, 'terms', field=term)
def make_must(must: list, terms: dict) -> list:
def make_filter(must: list, terms: dict) -> list:
""" Given term parameters append must queries to the must list """
for field, value in terms.items():
if value:
must.append({'match': {field: value}})
if value not in (None, ''):
must.append({'term': {field: value}})
return must
def nested_bool(must: list, should: list, terms: dict, *, index_alias: str) -> Search:
def nested_bool(filters: list, should: list, terms: dict, *, index_alias: str) -> Search:
"""
Create a nested bool, where the aggregation selection is a must.
:param index_alias: 'USER' or 'NODE', see ELASTIC_INDICES config.
"""
must = make_must(must, terms)
filters = make_filter(filters, terms)
bool_query = Q('bool', should=should)
must.append(bool_query)
bool_query = Q('bool', must=must)
bool_query = Q('bool', must=bool_query, filter=filters)
index = current_app.config['ELASTIC_INDICES'][index_alias]
search = Search(using=client, index=index)
@@ -55,12 +56,34 @@ def nested_bool(must: list, should: list, terms: dict, *, index_alias: str) -> S
return search
def do_multi_node_search(queries: typing.List[dict]) -> typing.List[dict]:
"""
Given user query input and term refinements
search for public published nodes
"""
search = create_multi_node_search(queries)
return _execute_multi(search)
def do_node_search(query: str, terms: dict, page: int, project_id: str='') -> dict:
"""
Given user query input and term refinements
search for public published nodes
"""
search = create_node_search(query, terms, page, project_id)
return _execute(search)
def create_multi_node_search(queries: typing.List[dict]) -> MultiSearch:
search = MultiSearch(using=client)
for q in queries:
search = search.add(create_node_search(**q))
return search
def create_node_search(query: str, terms: dict, page: int, project_id: str='') -> Search:
terms = _transform_terms(terms)
should = [
Q('match', name=query),
@@ -71,52 +94,30 @@ def do_node_search(query: str, terms: dict, page: int, project_id: str='') -> di
Q('term', media=query),
Q('term', tags=query),
]
must = []
filters = []
if project_id:
must.append({'term': {'project.id': project_id}})
filters.append({'term': {'project.id': project_id}})
if not query:
should = []
search = nested_bool(must, should, terms, index_alias='NODE')
search = nested_bool(filters, should, terms, index_alias='NODE')
if not query:
search = search.sort('-created_at')
add_aggs_to_search(search, NODE_AGG_TERMS)
search = paginate(search, page)
if log.isEnabledFor(logging.DEBUG):
log.debug(json.dumps(search.to_dict(), indent=4))
response = search.execute()
if log.isEnabledFor(logging.DEBUG):
log.debug(json.dumps(response.to_dict(), indent=4))
return response.to_dict()
return search
def do_user_search(query: str, terms: dict, page: int) -> dict:
""" return user objects represented in elasicsearch result dict"""
must, should = _common_user_search(query)
search = nested_bool(must, should, terms, index_alias='USER')
add_aggs_to_search(search, USER_AGG_TERMS)
search = paginate(search, page)
if log.isEnabledFor(logging.DEBUG):
log.debug(json.dumps(search.to_dict(), indent=4))
response = search.execute()
if log.isEnabledFor(logging.DEBUG):
log.debug(json.dumps(response.to_dict(), indent=4))
return response.to_dict()
search = create_user_search(query, terms, page)
return _execute(search)
def _common_user_search(query: str) -> (typing.List[Query], typing.List[Query]):
"""Construct (must,shoud) for regular + admin user search."""
"""Construct (filter,should) for regular + admin user search."""
if not query:
return [], []
@@ -144,8 +145,31 @@ def do_user_search_admin(query: str, terms: dict, page: int) -> dict:
search all user fields and provide aggregation information
"""
must, should = _common_user_search(query)
search = create_user_admin_search(query, terms, page)
return _execute(search)
def _execute(search: Search) -> dict:
if log.isEnabledFor(logging.DEBUG):
log.debug(json.dumps(search.to_dict(), indent=4))
resp = search.execute()
if log.isEnabledFor(logging.DEBUG):
log.debug(json.dumps(resp.to_dict(), indent=4))
return resp.to_dict()
def _execute_multi(search: typing.List[Search]) -> typing.List[dict]:
if log.isEnabledFor(logging.DEBUG):
log.debug(json.dumps(search.to_dict(), indent=4))
resp = search.execute()
if log.isEnabledFor(logging.DEBUG):
log.debug(json.dumps(resp.to_dict(), indent=4))
return [r.to_dict() for r in resp]
def create_user_admin_search(query: str, terms: dict, page: int) -> Search:
terms = _transform_terms(terms)
filters, should = _common_user_search(query)
if query:
# We most likely got and id field. we should find it.
if len(query) == len('563aca02c379cf0005e8e17d'):
@@ -155,26 +179,34 @@ def do_user_search_admin(query: str, terms: dict, page: int) -> dict:
'boost': 100, # how much more it counts for the score
}
}})
search = nested_bool(must, should, terms, index_alias='USER')
search = nested_bool(filters, should, terms, index_alias='USER')
add_aggs_to_search(search, USER_AGG_TERMS)
search = paginate(search, page)
return search
if log.isEnabledFor(logging.DEBUG):
log.debug(json.dumps(search.to_dict(), indent=4))
response = search.execute()
if log.isEnabledFor(logging.DEBUG):
log.debug(json.dumps(response.to_dict(), indent=4))
return response.to_dict()
def create_user_search(query: str, terms: dict, page: int) -> Search:
search = create_user_admin_search(query, terms, page)
return search.source(include=USER_SOURCE_INCLUDE)
def paginate(search: Search, page_idx: int) -> Search:
return search[page_idx * ITEMS_PER_PAGE:(page_idx + 1) * ITEMS_PER_PAGE]
def _transform_terms(terms: dict) -> dict:
"""
Ugly hack! Elastic uses 1/0 for boolean values in its aggregate response,
but expects true/false in queries.
"""
transformed = terms.copy()
for t in BOOLEAN_TERMS:
orig = transformed.get(t)
if orig in ('1', '0'):
transformed[t] = bool(int(orig))
return transformed
def setup_app(app):
global client

View File

@@ -18,7 +18,7 @@ TERMS = [
]
def _term_filters() -> dict:
def _term_filters(args) -> dict:
"""
Check if frontent wants to filter stuff
on specific fields AKA facets
@@ -26,35 +26,53 @@ def _term_filters() -> dict:
return mapping with term field name
and provided user term value
"""
return {term: request.args.get(term, '') for term in TERMS}
return {term: args.get(term, '') for term in TERMS}
def _page_index() -> int:
def _page_index(page) -> int:
"""Return the page index from the query string."""
try:
page_idx = int(request.args.get('page') or '0')
page_idx = int(page)
except TypeError:
log.info('invalid page number %r received', request.args.get('page'))
raise wz_exceptions.BadRequest()
return page_idx
@blueprint_search.route('/')
@blueprint_search.route('/', methods=['GET'])
def search_nodes():
searchword = request.args.get('q', '')
project_id = request.args.get('project', '')
terms = _term_filters()
page_idx = _page_index()
terms = _term_filters(request.args)
page_idx = _page_index(request.args.get('page', 0))
result = queries.do_node_search(searchword, terms, page_idx, project_id)
return jsonify(result)
@blueprint_search.route('/multisearch', methods=['GET'])
def multi_search_nodes():
import json
if len(request.args) != 1:
log.info(f'Expected 1 argument, received {len(request.args)}')
json_obj = json.loads([a for a in request.args][0])
q = []
for row in json_obj:
q.append({
'query': row.get('q', ''),
'project_id': row.get('project', ''),
'terms': _term_filters(row),
'page': _page_index(row.get('page', 0))
})
result = queries.do_multi_node_search(q)
return jsonify(result)
@blueprint_search.route('/user')
def search_user():
searchword = request.args.get('q', '')
terms = _term_filters()
page_idx = _page_index()
terms = _term_filters(request.args)
page_idx = _page_index(request.args.get('page', 0))
# result is the raw elasticseach output.
# we need to filter fields in case of user objects.
@@ -65,27 +83,6 @@ def search_user():
resp.status_code = 500
return resp
# filter sensitive stuff
# we only need. objectID, full_name, username
hits = result.get('hits', {})
new_hits = []
for hit in hits.get('hits'):
source = hit['_source']
single_hit = {
'_source': {
'objectID': source.get('objectID'),
'username': source.get('username'),
'full_name': source.get('full_name'),
}
}
new_hits.append(single_hit)
# replace search result with safe subset
result['hits']['hits'] = new_hits
return jsonify(result)
@@ -97,8 +94,8 @@ def search_user_admin():
"""
searchword = request.args.get('q', '')
terms = _term_filters()
page_idx = _page_index()
terms = _term_filters(request.args)
page_idx = _page_index(_page_index(request.args.get('page', 0)))
try:
result = queries.do_user_search_admin(searchword, terms, page_idx)

View File

@@ -11,7 +11,6 @@ from flask import Blueprint, current_app, request, url_for
import pillar
from pillar import shortcodes
from pillar.api.utils import jsonify, pretty_duration, str2id
from pillar.web.utils import pretty_date
blueprint = Blueprint('timeline', __name__)
@@ -209,7 +208,6 @@ class TimeLineBuilder:
@classmethod
def node_prettyfy(cls, node: dict)-> dict:
node['pretty_created'] = pretty_date(node['_created'])
duration_seconds = node['properties'].get('duration_seconds')
if duration_seconds is not None:
node['properties']['duration'] = pretty_duration(duration_seconds)

View File

@@ -1,4 +1,6 @@
import logging
import bleach
from bson import ObjectId
from pillar import current_app
@@ -10,7 +12,7 @@ from pillar.api.search import algolia_indexing
log = logging.getLogger(__name__)
INDEX_ALLOWED_NODE_TYPES = {'asset', 'texture', 'group', 'hdri'}
INDEX_ALLOWED_NODE_TYPES = {'asset', 'texture', 'group', 'hdri', 'post'}
SEARCH_BACKENDS = {
@@ -28,34 +30,6 @@ def _get_node_from_id(node_id: str):
return node
def _handle_picture(node: dict, to_index: dict):
"""Add picture URL in-place to the to-be-indexed node."""
picture_id = node.get('picture')
if not picture_id:
return
files_collection = current_app.data.driver.db['files']
lookup = {'_id': ObjectId(picture_id)}
picture = files_collection.find_one(lookup)
for item in picture.get('variations', []):
if item['size'] != 't':
continue
# Not all files have a project...
pid = picture.get('project')
if pid:
link = generate_link(picture['backend'],
item['file_path'],
str(pid),
is_public=True)
else:
link = item['link']
to_index['picture'] = link
break
def prepare_node_data(node_id: str, node: dict=None) -> dict:
"""Given a node id or a node document, return an indexable version of it.
@@ -86,25 +60,30 @@ def prepare_node_data(node_id: str, node: dict=None) -> dict:
users_collection = current_app.data.driver.db['users']
user = users_collection.find_one({'_id': ObjectId(node['user'])})
clean_description = bleach.clean(node.get('_description_html') or '', strip=True)
if not clean_description and node['node_type'] == 'post':
clean_description = bleach.clean(node['properties'].get('_content_html') or '', strip=True)
to_index = {
'objectID': node['_id'],
'name': node['name'],
'project': {
'_id': project['_id'],
'name': project['name']
'name': project['name'],
'url': project['url'],
},
'created': node['_created'],
'updated': node['_updated'],
'node_type': node['node_type'],
'picture': node.get('picture') or '',
'user': {
'_id': user['_id'],
'full_name': user['full_name']
},
'description': node.get('description'),
'description': clean_description or None,
'is_free': False
}
_handle_picture(node, to_index)
# If the node has world permissions, compute the Free permission
if 'world' in node.get('permissions', {}):
if 'GET' in node['permissions']['world']: