Quick-Search: Added Quick-search in the topbar

Changed how and what we store in elastic to unify it with how we store things in mongodb so we can have more generic javascript code to render the data. Elastic changes: Added: Node.project.url Altered to store id instead of url Node.picture Made Post searchable ./manage.py elastic reset_index ./manage.py elastic reindex Thanks to Pablo and Sybren
2018-11-22 15:31:53 +01:00
parent a897e201ba
commit 6ae9a5ddeb
53 changed files with 1954 additions and 623 deletions
--- a/pillar/api/search/documents.py
+++ b/pillar/api/search/documents.py
@@ -81,6 +81,7 @@ class Node(es.DocType):
        fields={
            'id': es.Keyword(),
            'name': es.Keyword(),
+            'url': es.Keyword(),
        }
    )

@@ -153,18 +154,21 @@ def create_doc_from_node_data(node_to_index: dict) -> typing.Optional[Node]:
    doc.objectID = str(node_to_index['objectID'])
    doc.node_type = node_to_index['node_type']
    doc.name = node_to_index['name']
+    doc.description = node_to_index.get('description')
    doc.user.id = str(node_to_index['user']['_id'])
    doc.user.name = node_to_index['user']['full_name']
    doc.project.id = str(node_to_index['project']['_id'])
    doc.project.name = node_to_index['project']['name']
+    doc.project.url = node_to_index['project']['url']

    if node_to_index['node_type'] == 'asset':
        doc.media = node_to_index['media']

-    doc.picture = node_to_index.get('picture')
+    doc.picture = str(node_to_index.get('picture'))

    doc.tags = node_to_index.get('tags')
    doc.license_notes = node_to_index.get('license_notes')
+    doc.is_free = node_to_index.get('is_free')

    doc.created_at = node_to_index['created']
    doc.updated_at = node_to_index['updated']
--- a/pillar/api/search/queries.py
+++ b/pillar/api/search/queries.py
@@ -3,16 +3,18 @@ import logging
 import typing

 from elasticsearch import Elasticsearch
-from elasticsearch_dsl import Search, Q
+from elasticsearch_dsl import Search, Q, MultiSearch
 from elasticsearch_dsl.query import Query

 from pillar import current_app

 log = logging.getLogger(__name__)

-NODE_AGG_TERMS = ['node_type', 'media', 'tags', 'is_free']
+BOOLEAN_TERMS = ['is_free']
+NODE_AGG_TERMS = ['node_type', 'media', 'tags', *BOOLEAN_TERMS]
 USER_AGG_TERMS = ['roles', ]
 ITEMS_PER_PAGE = 10
+USER_SOURCE_INCLUDE = ['full_name', 'objectID', 'username']

 # Will be set in setup_app()
 client: Elasticsearch = None
@@ -27,26 +29,25 @@ def add_aggs_to_search(search, agg_terms):
        search.aggs.bucket(term, 'terms', field=term)


-def make_must(must: list, terms: dict) -> list:
+def make_filter(must: list, terms: dict) -> list:
    """ Given term parameters append must queries to the must list """

    for field, value in terms.items():
-        if value:
-            must.append({'match': {field: value}})
+        if value not in (None, ''):
+            must.append({'term': {field: value}})

    return must


-def nested_bool(must: list, should: list, terms: dict, *, index_alias: str) -> Search:
+def nested_bool(filters: list, should: list, terms: dict, *, index_alias: str) -> Search:
    """
    Create a nested bool, where the aggregation selection is a must.

    :param index_alias: 'USER' or 'NODE', see ELASTIC_INDICES config.
    """
-    must = make_must(must, terms)
+    filters = make_filter(filters, terms)
    bool_query = Q('bool', should=should)
-    must.append(bool_query)
-    bool_query = Q('bool', must=must)
+    bool_query = Q('bool', must=bool_query, filter=filters)

    index = current_app.config['ELASTIC_INDICES'][index_alias]
    search = Search(using=client, index=index)
@@ -55,12 +56,34 @@ def nested_bool(must: list, should: list, terms: dict, *, index_alias: str) -> S
    return search


+def do_multi_node_search(queries: typing.List[dict]) -> typing.List[dict]:
+    """
+    Given user query input and term refinements
+    search for public published nodes
+    """
+    search = create_multi_node_search(queries)
+    return _execute_multi(search)
+
+
 def do_node_search(query: str, terms: dict, page: int, project_id: str='') -> dict:
    """
    Given user query input and term refinements
    search for public published nodes
    """
+    search = create_node_search(query, terms, page, project_id)
+    return _execute(search)

+
+def create_multi_node_search(queries: typing.List[dict]) -> MultiSearch:
+    search = MultiSearch(using=client)
+    for q in queries:
+        search = search.add(create_node_search(**q))
+
+    return search
+
+
+def create_node_search(query: str, terms: dict, page: int, project_id: str='') -> Search:
+    terms = _transform_terms(terms)
    should = [
        Q('match', name=query),

@@ -71,52 +94,30 @@ def do_node_search(query: str, terms: dict, page: int, project_id: str='') -> di
        Q('term', media=query),
        Q('term', tags=query),
    ]
-
-    must = []
+    filters = []
    if project_id:
-        must.append({'term': {'project.id': project_id}})
-
+        filters.append({'term': {'project.id': project_id}})
    if not query:
        should = []
-
-    search = nested_bool(must, should, terms, index_alias='NODE')
+    search = nested_bool(filters, should, terms, index_alias='NODE')
    if not query:
        search = search.sort('-created_at')
    add_aggs_to_search(search, NODE_AGG_TERMS)
    search = paginate(search, page)
-
    if log.isEnabledFor(logging.DEBUG):
        log.debug(json.dumps(search.to_dict(), indent=4))
-
-    response = search.execute()
-
-    if log.isEnabledFor(logging.DEBUG):
-        log.debug(json.dumps(response.to_dict(), indent=4))
-
-    return response.to_dict()
+    return search


 def do_user_search(query: str, terms: dict, page: int) -> dict:
    """ return user objects represented in elasicsearch result dict"""

-    must, should = _common_user_search(query)
-    search = nested_bool(must, should, terms, index_alias='USER')
-    add_aggs_to_search(search, USER_AGG_TERMS)
-    search = paginate(search, page)
-
-    if log.isEnabledFor(logging.DEBUG):
-        log.debug(json.dumps(search.to_dict(), indent=4))
-
-    response = search.execute()
-
-    if log.isEnabledFor(logging.DEBUG):
-        log.debug(json.dumps(response.to_dict(), indent=4))
-
-    return response.to_dict()
+    search = create_user_search(query, terms, page)
+    return _execute(search)


 def _common_user_search(query: str) -> (typing.List[Query], typing.List[Query]):
-    """Construct (must,shoud) for regular + admin user search."""
+    """Construct (filter,should) for regular + admin user search."""
    if not query:
        return [], []

@@ -144,8 +145,31 @@ def do_user_search_admin(query: str, terms: dict, page: int) -> dict:
    search all user fields and provide aggregation information
    """

-    must, should = _common_user_search(query)
+    search = create_user_admin_search(query, terms, page)
+    return _execute(search)

+
+def _execute(search: Search) -> dict:
+    if log.isEnabledFor(logging.DEBUG):
+        log.debug(json.dumps(search.to_dict(), indent=4))
+    resp = search.execute()
+    if log.isEnabledFor(logging.DEBUG):
+        log.debug(json.dumps(resp.to_dict(), indent=4))
+    return resp.to_dict()
+
+
+def _execute_multi(search: typing.List[Search]) -> typing.List[dict]:
+    if log.isEnabledFor(logging.DEBUG):
+        log.debug(json.dumps(search.to_dict(), indent=4))
+    resp = search.execute()
+    if log.isEnabledFor(logging.DEBUG):
+        log.debug(json.dumps(resp.to_dict(), indent=4))
+    return [r.to_dict() for r in resp]
+
+
+def create_user_admin_search(query: str, terms: dict, page: int) -> Search:
+    terms = _transform_terms(terms)
+    filters, should = _common_user_search(query)
    if query:
        # We most likely got and id field. we should find it.
        if len(query) == len('563aca02c379cf0005e8e17d'):
@@ -155,26 +179,34 @@ def do_user_search_admin(query: str, terms: dict, page: int) -> dict:
                    'boost': 100,  # how much more it counts for the score
                }
            }})
-
-    search = nested_bool(must, should, terms, index_alias='USER')
+    search = nested_bool(filters, should, terms, index_alias='USER')
    add_aggs_to_search(search, USER_AGG_TERMS)
    search = paginate(search, page)
+    return search

-    if log.isEnabledFor(logging.DEBUG):
-        log.debug(json.dumps(search.to_dict(), indent=4))

-    response = search.execute()
-
-    if log.isEnabledFor(logging.DEBUG):
-        log.debug(json.dumps(response.to_dict(), indent=4))
-
-    return response.to_dict()
+def create_user_search(query: str, terms: dict, page: int) -> Search:
+    search = create_user_admin_search(query, terms, page)
+    return search.source(include=USER_SOURCE_INCLUDE)


 def paginate(search: Search, page_idx: int) -> Search:
    return search[page_idx * ITEMS_PER_PAGE:(page_idx + 1) * ITEMS_PER_PAGE]


+def _transform_terms(terms: dict) -> dict:
+    """
+    Ugly hack! Elastic uses 1/0 for boolean values in its aggregate response,
+    but expects true/false in queries.
+    """
+    transformed = terms.copy()
+    for t in BOOLEAN_TERMS:
+        orig = transformed.get(t)
+        if orig in ('1', '0'):
+            transformed[t] = bool(int(orig))
+    return transformed
+
+
 def setup_app(app):
    global client

--- a/pillar/api/search/routes.py
+++ b/pillar/api/search/routes.py
@@ -18,7 +18,7 @@ TERMS = [
 ]


-def _term_filters() -> dict:
+def _term_filters(args) -> dict:
    """
    Check if frontent wants to filter stuff
    on specific fields AKA facets
@@ -26,35 +26,53 @@ def _term_filters() -> dict:
    return mapping with term field name
    and provided user term value
    """
-    return {term: request.args.get(term, '') for term in TERMS}
+    return {term: args.get(term, '') for term in TERMS}


-def _page_index() -> int:
+def _page_index(page) -> int:
    """Return the page index from the query string."""
    try:
-        page_idx = int(request.args.get('page') or '0')
+        page_idx = int(page)
    except TypeError:
        log.info('invalid page number %r received', request.args.get('page'))
        raise wz_exceptions.BadRequest()
    return page_idx


-@blueprint_search.route('/')
+@blueprint_search.route('/', methods=['GET'])
 def search_nodes():
    searchword = request.args.get('q', '')
    project_id = request.args.get('project', '')
-    terms = _term_filters()
-    page_idx = _page_index()
+    terms = _term_filters(request.args)
+    page_idx = _page_index(request.args.get('page', 0))

    result = queries.do_node_search(searchword, terms, page_idx, project_id)
    return jsonify(result)

+@blueprint_search.route('/multisearch', methods=['GET'])
+def multi_search_nodes():
+    import json
+    if len(request.args) != 1:
+        log.info(f'Expected 1 argument, received {len(request.args)}')
+
+    json_obj = json.loads([a for a in request.args][0])
+    q = []
+    for row in json_obj:
+        q.append({
+            'query': row.get('q', ''),
+            'project_id': row.get('project', ''),
+            'terms': _term_filters(row),
+            'page': _page_index(row.get('page', 0))
+        })
+
+    result = queries.do_multi_node_search(q)
+    return jsonify(result)

@blueprint_search.route('/user')
 def search_user():
    searchword = request.args.get('q', '')
-    terms = _term_filters()
-    page_idx = _page_index()
+    terms = _term_filters(request.args)
+    page_idx = _page_index(request.args.get('page', 0))
    # result is the raw elasticseach output.
    # we need to filter fields in case of user objects.

@@ -65,27 +83,6 @@ def search_user():
        resp.status_code = 500
        return resp

-    # filter sensitive stuff
-    # we only need. objectID, full_name, username
-    hits = result.get('hits', {})
-
-    new_hits = []
-
-    for hit in hits.get('hits'):
-        source = hit['_source']
-        single_hit = {
-            '_source': {
-                'objectID': source.get('objectID'),
-                'username': source.get('username'),
-                'full_name': source.get('full_name'),
-            }
-        }
-
-        new_hits.append(single_hit)
-
-    # replace search result with safe subset
-    result['hits']['hits'] = new_hits
-
    return jsonify(result)


@@ -97,8 +94,8 @@ def search_user_admin():
    """

    searchword = request.args.get('q', '')
-    terms = _term_filters()
-    page_idx = _page_index()
+    terms = _term_filters(request.args)
+    page_idx = _page_index(_page_index(request.args.get('page', 0)))

    try:
        result = queries.do_user_search_admin(searchword, terms, page_idx)
--- a/pillar/api/timeline.py
+++ b/pillar/api/timeline.py
@@ -11,7 +11,6 @@ from flask import Blueprint, current_app, request, url_for
 import pillar
 from pillar import shortcodes
 from pillar.api.utils import jsonify, pretty_duration, str2id
-from pillar.web.utils import pretty_date

 blueprint = Blueprint('timeline', __name__)

@@ -209,7 +208,6 @@ class TimeLineBuilder:

    @classmethod
    def node_prettyfy(cls, node: dict)-> dict:
-        node['pretty_created'] = pretty_date(node['_created'])
        duration_seconds = node['properties'].get('duration_seconds')
        if duration_seconds is not None:
            node['properties']['duration'] = pretty_duration(duration_seconds)
--- a/pillar/celery/search_index_tasks.py
+++ b/pillar/celery/search_index_tasks.py
@@ -1,4 +1,6 @@
 import logging
+
+import bleach
 from bson import ObjectId

 from pillar import current_app
@@ -10,7 +12,7 @@ from pillar.api.search import algolia_indexing
 log = logging.getLogger(__name__)


-INDEX_ALLOWED_NODE_TYPES = {'asset', 'texture', 'group', 'hdri'}
+INDEX_ALLOWED_NODE_TYPES = {'asset', 'texture', 'group', 'hdri', 'post'}


 SEARCH_BACKENDS = {
@@ -28,34 +30,6 @@ def _get_node_from_id(node_id: str):
    return node


-def _handle_picture(node: dict, to_index: dict):
-    """Add picture URL in-place to the to-be-indexed node."""
-
-    picture_id = node.get('picture')
-    if not picture_id:
-        return
-
-    files_collection = current_app.data.driver.db['files']
-    lookup = {'_id': ObjectId(picture_id)}
-    picture = files_collection.find_one(lookup)
-
-    for item in picture.get('variations', []):
-        if item['size'] != 't':
-            continue
-
-        # Not all files have a project...
-        pid = picture.get('project')
-        if pid:
-            link = generate_link(picture['backend'],
-                                 item['file_path'],
-                                 str(pid),
-                                 is_public=True)
-        else:
-            link = item['link']
-        to_index['picture'] = link
-        break
-
-
 def prepare_node_data(node_id: str, node: dict=None) -> dict:
    """Given a node id or a node document, return an indexable version of it.

@@ -86,25 +60,30 @@ def prepare_node_data(node_id: str, node: dict=None) -> dict:
    users_collection = current_app.data.driver.db['users']
    user = users_collection.find_one({'_id': ObjectId(node['user'])})

+    clean_description = bleach.clean(node.get('_description_html') or '', strip=True)
+    if not clean_description and node['node_type'] == 'post':
+        clean_description = bleach.clean(node['properties'].get('_content_html') or '', strip=True)
+
    to_index = {
        'objectID': node['_id'],
        'name': node['name'],
        'project': {
            '_id': project['_id'],
-            'name': project['name']
+            'name': project['name'],
+            'url': project['url'],
        },
        'created': node['_created'],
        'updated': node['_updated'],
        'node_type': node['node_type'],
+        'picture': node.get('picture') or '',
        'user': {
            '_id': user['_id'],
            'full_name': user['full_name']
        },
-        'description': node.get('description'),
+        'description': clean_description or None,
+        'is_free': False
    }

-    _handle_picture(node, to_index)
-
    # If the node has world permissions, compute the Free permission
    if 'world' in node.get('permissions', {}):
        if 'GET' in node['permissions']['world']: