diff --git a/pillar/api/search/__init__.py b/pillar/api/search/__init__.py index dbe228ff..ff44f749 100644 --- a/pillar/api/search/__init__.py +++ b/pillar/api/search/__init__.py @@ -1,13 +1,7 @@ import logging -#import bson -#from flask import current_app - from .routes import blueprint_search -log = logging.getLogger(__name__) - - def setup_app(app, url_prefix: str =None): app.register_api_blueprint( blueprint_search, url_prefix=url_prefix) diff --git a/pillar/api/search/algolia_indexing.py b/pillar/api/search/algolia_indexing.py index 378b2908..e43393fe 100644 --- a/pillar/api/search/algolia_indexing.py +++ b/pillar/api/search/algolia_indexing.py @@ -30,8 +30,9 @@ def index_node_save(node_to_index: dict): def index_node_delete(delete_id: str): - + """ delete node using id""" from pillar.api.utils import algolia + try: algolia.index_node_delete(delete_id) except AlgoliaException as ex: diff --git a/pillar/api/search/documents.py b/pillar/api/search/documents.py index 649a50fe..7ec43454 100644 --- a/pillar/api/search/documents.py +++ b/pillar/api/search/documents.py @@ -1,10 +1,20 @@ +""" +Define elasticsearch document mapping. + +Elasticsearch consist of two parts. + +Part 1: Define the documents in which you +define who fields will be indexed. +Part 2: Building elasticsearch json queries. + +BOTH of these parts are equally importand to have +a search API that returns relevant results. +""" import logging import elasticsearch_dsl as es from elasticsearch_dsl import analysis -# from pillar import current_app -# define elasticsearch document mapping. log = logging.getLogger(__name__) @@ -100,7 +110,14 @@ class Node(es.DocType): index = 'nodes' -def create_doc_from_user_data(user_to_index): +def create_doc_from_user_data(user_to_index: dict) -> User: + """ + We are indexing a user object which identical between search backends + + see celery.search_index_task.py + + this functions returns a proper ElasticSearch document + """ if not user_to_index: return @@ -108,8 +125,8 @@ def create_doc_from_user_data(user_to_index): doc_id = str(user_to_index.get('objectID', '')) if not doc_id: - log.error('ID missing %s', user_to_index) - return + log.error('USER ID is missing %s', user_to_index) + raise KeyError doc = User(_id=doc_id) doc.objectID = str(user_to_index['objectID']) @@ -121,7 +138,14 @@ def create_doc_from_user_data(user_to_index): return doc -def create_doc_from_node_data(node_to_index): +def create_doc_from_node_data(node_to_index: dict) -> Node: + """ + We are indexing a Node object which identical between search backends + + see celery.search_index_task.py + + this functions returns a proper ElasticSearch document + """ if not node_to_index: return diff --git a/pillar/api/search/elastic_indexing.py b/pillar/api/search/elastic_indexing.py index 50f37da2..db8f88cf 100644 --- a/pillar/api/search/elastic_indexing.py +++ b/pillar/api/search/elastic_indexing.py @@ -6,6 +6,7 @@ from elasticsearch.exceptions import NotFoundError from pillar import current_app from . import documents +log = logging.getLogger(__name__) elk_hosts = current_app.config['ELASTIC_SEARCH_HOSTS'] @@ -14,13 +15,10 @@ connections.create_connection( sniff_on_start=True, timeout=20) -log = logging.getLogger(__name__) - def push_updated_user(user_to_index: dict): """ - Push an update to the Elastic index when - a user item is updated. + Push an update to the Elastic index when a user item is updated. """ if not user_to_index: return @@ -30,11 +28,14 @@ def push_updated_user(user_to_index: dict): if not doc: return - log.debug('UPDATE USER %s', doc._id) + log.debug('index update user elasticsearch %s', doc._id) doc.save() def index_node_save(node_to_index: dict): + """ + Push an update to the Elastic index when a node item is saved. + """ if not node_to_index: return @@ -44,13 +45,16 @@ def index_node_save(node_to_index: dict): if not doc: return - log.debug('CREATED ELK NODE DOC %s', doc._id) + log.debug('index created node elasticsearch %s', doc._id) doc.save() def index_node_delete(delete_id: str): + """ + Delete node document from Elastic index useing a node id + """ - log.debug('NODE DELETE INDEXING %s', delete_id) + log.debug('index node delete %s', delete_id) try: doc = documents.Node.get(id=delete_id) diff --git a/pillar/api/search/index.py b/pillar/api/search/index.py index fab826a6..d05645c8 100644 --- a/pillar/api/search/index.py +++ b/pillar/api/search/index.py @@ -1,10 +1,5 @@ import logging -# import time - -# from elasticsearch import helpers -# import elasticsearch - -# from elasticsearch.client import IndicesClient +from typing import List from elasticsearch.exceptions import NotFoundError from elasticsearch_dsl.connections import connections @@ -18,17 +13,14 @@ log = logging.getLogger(__name__) class ResetIndexTask(object): - """ - Clear and build index / mapping - """ + """ Clear and build index / mapping """ index_key = '' """Key into the ELASTIC_INDICES dict in the app config.""" - doc_types = [] + doc_types: List[type] = [] name = 'remove index' def __init__(self): - if not self.index_key: raise ValueError("No index specified") @@ -48,10 +40,10 @@ class ResetIndexTask(object): try: idx.delete(ignore=404) log.info("Deleted index %s", index) - except AttributeError: - log.warning("Could not delete index '%s', ignoring", index) except NotFoundError: log.warning("Could not delete index '%s', ignoring", index) + else: + log.warning("Could not delete index '%s', ignoring", index) # create doc types for dt in self.doc_types: diff --git a/pillar/api/search/queries.py b/pillar/api/search/queries.py index a65641dc..0e223c79 100644 --- a/pillar/api/search/queries.py +++ b/pillar/api/search/queries.py @@ -1,7 +1,7 @@ -import logging import json from elasticsearch import Elasticsearch from elasticsearch_dsl import Search, Q +import logging from pillar import current_app @@ -23,13 +23,9 @@ def add_aggs_to_search(search, agg_terms): def make_must(must: list, terms: dict) -> list: - """ - Given some term parameters - we must match those - """ + """ Given term parameters append must queries to the must list """ for field, value in terms.items(): - if value: must.append({'match': {field: value}}) @@ -54,8 +50,10 @@ def nested_bool(must: list, should: list, terms: dict) -> Search: def do_search(query: str, terms: dict) -> dict: """ - Given user input search for node/stuff + Given user query input and term refinements + search for public published nodes """ + should = [ Q('match', name=query), @@ -77,21 +75,19 @@ def do_search(query: str, terms: dict) -> dict: search = nested_bool(must, should, terms) add_aggs_to_search(search, node_agg_terms) - if current_app.config['DEBUG']: + if log.isEnabledFor(logging.DEBUG): print(json.dumps(search.to_dict(), indent=4)) response = search.execute() - if current_app.config['DEBUG']: + if log.isEnabledFor(logging.DEBUG): print(json.dumps(response.to_dict(), indent=4)) return response.to_dict() def do_user_search(query: str, terms: dict) -> dict: - """ - return user objects - """ + """ return user objects represented in elasicsearch result dict""" should = [ Q('match', username=query), Q('match', full_name=query), @@ -101,7 +97,7 @@ def do_user_search(query: str, terms: dict) -> dict: Q('term', _type='user') ] - # We got an id field. we MUST find it. + # We most likely got and id field. we MUST find it. if len(query) == len('563aca02c379cf0005e8e17d'): must.append(Q('term', _id=query)) @@ -111,12 +107,12 @@ def do_user_search(query: str, terms: dict) -> dict: search = nested_bool(must, should, terms) add_aggs_to_search(search, user_agg_terms) - if current_app.config['DEBUG']: + if log.isEnabledFor(logging.DEBUG): print(json.dumps(search.to_dict(), indent=4)) response = search.execute() - if current_app.config['DEBUG']: + if log.isEnabledFor(logging.DEBUG): print(json.dumps(response.to_dict(), indent=4)) return response.to_dict() @@ -124,7 +120,8 @@ def do_user_search(query: str, terms: dict) -> dict: def do_user_search_admin(query: str) -> dict: """ - return users with all fields and aggregations + return users seach result dict object + search all user fields and provide aggregation information """ should = [ Q('match', username=query), @@ -135,12 +132,12 @@ def do_user_search_admin(query: str) -> dict: search = Search(using=client) search.query = bool_query - if current_app.config['DEBUG']: + if log.isEnabledFor(logging.DEBUG): log.debug(json.dumps(search.to_dict(), indent=4)) response = search.execute() - if current_app.config['DEBUG']: + if log.isEnabledFor(logging.DEBUG): log.debug(json.dumps(response.to_dict(), indent=4)) return response.to_dict() diff --git a/pillar/api/search/routes.py b/pillar/api/search/routes.py index a3f8a9e3..5b62a1f8 100644 --- a/pillar/api/search/routes.py +++ b/pillar/api/search/routes.py @@ -11,37 +11,28 @@ log = logging.getLogger(__name__) blueprint_search = Blueprint('elksearch', __name__) +terms = [ + 'node_type', 'media', + 'tags', 'is_free', 'projectname', + 'roles', +] + def _valid_search() -> str: - """ - Returns search parameters, raising error when missing. - """ - - searchword = request.args.get('q', '') - # if not searchword: - # raise wz_exceptions.BadRequest( - # 'You are forgetting a "?q=whatareyoulookingfor"') - return searchword + """ Returns search parameters """ + query = request.args.get('q', '') + return query def _term_filters() -> dict: """ Check if frontent wants to filter stuff on specific fields AKA facets + + return mapping with term field name + and provided user term value """ - - terms = [ - 'node_type', 'media', - 'tags', 'is_free', 'projectname', - 'roles', - ] - - parsed_terms = {} - - for term in terms: - parsed_terms[term] = request.args.get(term, '') - - return parsed_terms + return {term: request.args.get(term, '') for term in terms} @blueprint_search.route('/') @@ -54,12 +45,9 @@ def search_nodes(): @blueprint_search.route('/user') def search_user(): - searchword = _valid_search() - terms = _term_filters() data = queries.do_user_search(searchword, terms) - return jsonify(data) diff --git a/pillar/api/utils/algolia.py b/pillar/api/utils/algolia.py index 169be666..d63af622 100644 --- a/pillar/api/utils/algolia.py +++ b/pillar/api/utils/algolia.py @@ -21,7 +21,6 @@ def index_user_save(to_index_user: dict): @skip_when_testing def index_node_save(node_to_index): - if not current_app.algolia_index_nodes: return current_app.algolia_index_nodes.save_object(node_to_index) diff --git a/pillar/celery/search_index_tasks.py b/pillar/celery/search_index_tasks.py index 2c045e56..743eca08 100644 --- a/pillar/celery/search_index_tasks.py +++ b/pillar/celery/search_index_tasks.py @@ -1,10 +1,9 @@ import logging from bson import ObjectId + from pillar import current_app from pillar.api.file_storage import generate_link - from pillar.api.search import elastic_indexing - from pillar.api.search import algolia_indexing @@ -21,10 +20,7 @@ SEARCH_BACKENDS = { def _get_node_from_id(node_id: str): - """ - """ node_oid = ObjectId(node_id) - log.info('Retrieving node %s', node_oid) nodes_coll = current_app.db('nodes') node = nodes_coll.find_one({'_id': node_oid}) @@ -56,15 +52,19 @@ def _handle_picture(node: dict, to_index: dict): is_public=True) -def prepare_node_data(node_id: str, node=None) -> dict: +def prepare_node_data(node_id: str, node: dict=None) -> dict: """ - Given node build data object with fields to index + Given node by id or actual node build data object with fields to index """ + + if node_id and node: + raise ValueError("do not provide node and node_id together") + if node_id: node = _get_node_from_id(node_id) if node is None: - log.warning('Unable to find node %s, not updating Algolia.', node_id) + log.warning('Unable to find node %s, not updating.', node_id) return if node['node_type'] not in INDEX_ALLOWED_NODE_TYPES: @@ -95,8 +95,7 @@ def prepare_node_data(node_id: str, node=None) -> dict: }, } - if 'description' in node and node['description']: - to_index['description'] = node['description'] + to_index['description'] = node.get('description') _handle_picture(node, to_index) @@ -140,7 +139,7 @@ def prepare_user_data(user_id: str, user=None) -> dict: # Strip unneeded roles index_roles = user_roles.intersection(current_app.user_roles_indexable) - log.debug('Pushed user %r to Search index', user['_id']) + log.debug('Push user %r to Search index', user['_id']) user_to_index = { 'objectID': user['_id'], @@ -181,7 +180,6 @@ def node_delete(node_id: str): # Deleting a node takes nothing more than the ID anyway. # No need to fetch anything from Mongo. delete_id = ObjectId(node_id) - algolia_indexing.index_node_delete(delete_id) for searchoption in current_app.config['SEARCH_BACKENDS']: searchmodule = SEARCH_BACKENDS[searchoption]