From 47a1db07dcfecf624df479bf7f96a6668b5ae0cf Mon Sep 17 00:00:00 2001 From: Stephan Preeker Date: Wed, 25 Oct 2017 17:09:10 +0200 Subject: [PATCH 1/7] T53161 start working on elastic.. --- pillar/__init__.py | 2 +- pillar/api/nodes/__init__.py | 14 ++++---- pillar/api/users/hooks.py | 17 ++++++--- pillar/api/utils/algolia.py | 54 +++++++++++++++++++---------- pillar/celery/algolia_tasks.py | 16 ++++----- pillar/celery/search_index_tasks.py | 30 ++++++++++++++++ src/scripts/algolia_search.js | 5 +++ 7 files changed, 98 insertions(+), 40 deletions(-) create mode 100644 pillar/celery/search_index_tasks.py diff --git a/pillar/__init__.py b/pillar/__init__.py index ba79fa48..646c5a83 100644 --- a/pillar/__init__.py +++ b/pillar/__init__.py @@ -466,7 +466,7 @@ class PillarServer(Eve): # Pillar-defined Celery task modules: celery_task_modules = [ 'pillar.celery.tasks', - 'pillar.celery.algolia_tasks', + 'pillar.celery.search_index_tasks', 'pillar.celery.file_link_tasks', ] diff --git a/pillar/api/nodes/__init__.py b/pillar/api/nodes/__init__.py index 27437ce6..1b00fa8b 100644 --- a/pillar/api/nodes/__init__.py +++ b/pillar/api/nodes/__init__.py @@ -167,7 +167,8 @@ def create_short_code(node) -> str: def short_link_info(short_code): """Returns the short link info in a dict.""" - short_link = urllib.parse.urljoin(current_app.config['SHORT_LINK_BASE_URL'], short_code) + short_link = urllib.parse.urljoin( + current_app.config['SHORT_LINK_BASE_URL'], short_code) return { 'short_code': short_code, @@ -185,7 +186,7 @@ def after_replacing_node(item, original): project is private, prevent public indexing. """ - from pillar.celery import algolia_tasks + from pillar.celery import search_index_tasks as index projects_collection = current_app.data.driver.db['projects'] project = projects_collection.find_one({'_id': item['project']}) @@ -195,10 +196,11 @@ def after_replacing_node(item, original): status = item['properties'].get('status', 'unpublished') node_id = str(item['_id']) + if status == 'published': - algolia_tasks.algolia_index_node_save.delay(node_id) + index.node_save.delay(node_id) else: - algolia_tasks.algolia_index_node_delete.delay(node_id) + index.node_delete.delay(node_id) def before_inserting_nodes(items): @@ -372,8 +374,8 @@ def before_deleting_node(node: dict): def after_deleting_node(item): - from pillar.celery import algolia_tasks - algolia_tasks.algolia_index_node_delete.delay(str(item['_id'])) + from pillar.celery import search_index_tasks as index + index.node_delete.delay(str(item['_id'])) only_for_comments = only_for_node_type_decorator('comment') diff --git a/pillar/api/users/hooks.py b/pillar/api/users/hooks.py index 46e4ef9a..7bdf75d4 100644 --- a/pillar/api/users/hooks.py +++ b/pillar/api/users/hooks.py @@ -61,19 +61,26 @@ def before_replacing_user(request, lookup): # Regular users should always have an email address if 'service' not in put_data.get('roles', ()): if not put_data.get('email'): - raise wz_exceptions.UnprocessableEntity('email field must be given') + raise wz_exceptions.UnprocessableEntity( + 'email field must be given') def push_updated_user_to_algolia(user, original): - """Push an update to the Algolia index when a user item is updated""" + """ + Push an update to the Algolia index when a user + item is updated + """ - from pillar.celery import algolia_tasks + from pillar.celery import search_index_tasks as index - algolia_tasks.push_updated_user_to_algolia.delay(str(user['_id'])) + index.updated_user.delay(str(user['_id'])) def send_blinker_signal_roles_changed(user, original): - """Sends a Blinker signal that the user roles were changed, so others can respond.""" + """ + Sends a Blinker signal that the user roles were + changed, so others can respond. + """ current_roles = set(user.get('roles', [])) original_roles = set(original.get('roles', [])) diff --git a/pillar/api/utils/algolia.py b/pillar/api/utils/algolia.py index 7ade679f..f5e8abcc 100644 --- a/pillar/api/utils/algolia.py +++ b/pillar/api/utils/algolia.py @@ -35,7 +35,31 @@ def algolia_index_user_save(user): 'email': user['email'] }) - log.debug('Pushed user %r to Algolia index %r', user['_id'], index_users.index_name) + log.debug( + 'Pushed user %r to Algolia index %r', + user['_id'], index_users.index_name) + + +def _handle_picture(node, doc): + """ + add picture fields to be indexed + """ + + if 'picture' in node and node['picture']: + files_collection = current_app.data.driver.db['files'] + lookup = {'_id': ObjectId(node['picture'])} + picture = files_collection.find_one(lookup) + + img_variation_t = next( + (item for item in picture['variations'] + if item['size'] == 't'), None) + + if img_variation_t: + doc['picture'] = generate_link( + picture['backend'], + img_variation_t['file_path'], + project_id=str(picture['project']), + is_public=True) @skip_when_testing @@ -54,7 +78,7 @@ def algolia_index_node_save(node): users_collection = current_app.data.driver.db['users'] user = users_collection.find_one({'_id': ObjectId(node['user'])}) - node_ob = { + doc = { 'objectID': node['_id'], 'name': node['name'], 'project': { @@ -69,35 +93,27 @@ def algolia_index_node_save(node): 'full_name': user['full_name'] }, } + if 'description' in node and node['description']: - node_ob['description'] = node['description'] - if 'picture' in node and node['picture']: - files_collection = current_app.data.driver.db['files'] - lookup = {'_id': ObjectId(node['picture'])} - picture = files_collection.find_one(lookup) - if picture['backend'] == 'gcs': - variation_t = next((item for item in picture['variations'] \ - if item['size'] == 't'), None) - if variation_t: - node_ob['picture'] = generate_link(picture['backend'], - variation_t['file_path'], - project_id=str(picture['project']), - is_public=True) + doc['description'] = node['description'] + + _handle_picture(node, doc) + # If the node has world permissions, compute the Free permission if 'permissions' in node and 'world' in node['permissions']: if 'GET' in node['permissions']['world']: - node_ob['is_free'] = True + doc['is_free'] = True # Append the media key if the node is of node_type 'asset' if node['node_type'] == 'asset': - node_ob['media'] = node['properties']['content_type'] + doc['media'] = node['properties']['content_type'] # Add extra properties for prop in ('tags', 'license_notes'): if prop in node['properties']: - node_ob[prop] = node['properties'][prop] + doc[prop] = node['properties'][prop] - current_app.algolia_index_nodes.save_object(node_ob) + current_app.algolia_index_nodes.save_object(doc) @skip_when_testing diff --git a/pillar/celery/algolia_tasks.py b/pillar/celery/algolia_tasks.py index 0bc61173..5218e8f0 100644 --- a/pillar/celery/algolia_tasks.py +++ b/pillar/celery/algolia_tasks.py @@ -8,7 +8,6 @@ from pillar import current_app log = logging.getLogger(__name__) -@current_app.celery.task(ignore_result=True) def push_updated_user_to_algolia(user_id: str): """Push an update to the Algolia index when a user item is updated""" @@ -25,12 +24,11 @@ def push_updated_user_to_algolia(user_id: str): try: algolia_index_user_save(user) except AlgoliaException as ex: - log.warning('Unable to push user info to Algolia for user "%s", id=%s; %s', + log.warning('Unable to push user info to Algolia for user "%s", id=%s; %s', # noqa user.get('username'), user_id, ex) -@current_app.celery.task(ignore_result=True) -def algolia_index_node_save(node_id: str): +def index_node_save(node_id: str): from pillar.api.utils.algolia import algolia_index_node_save node_oid = bson.ObjectId(node_id) @@ -46,17 +44,17 @@ def algolia_index_node_save(node_id: str): try: algolia_index_node_save(node) except AlgoliaException as ex: - log.warning('Unable to push node info to Algolia for node %s; %s', node_id, ex) + log.warning('Unable to push node info to Algolia for node %s; %s', node_id, ex) # noqa -@current_app.celery.task(ignore_result=True) -def algolia_index_node_delete(node_id: str): +def index_node_delete(node_id: str): from pillar.api.utils.algolia import algolia_index_node_delete - # Deleting a node takes nothing more than the ID anyway. No need to fetch anything from Mongo. + # Deleting a node takes nothing more than the ID anyway. + # No need to fetch anything from Mongo. fake_node = {'_id': bson.ObjectId(node_id)} try: algolia_index_node_delete(fake_node) except AlgoliaException as ex: - log.warning('Unable to delete node info to Algolia for node %s; %s', node_id, ex) + log.warning('Unable to delete node info to Algolia for node %s; %s', node_id, ex) # noqa diff --git a/pillar/celery/search_index_tasks.py b/pillar/celery/search_index_tasks.py new file mode 100644 index 00000000..769e331f --- /dev/null +++ b/pillar/celery/search_index_tasks.py @@ -0,0 +1,30 @@ +import logging + +from . import algolia_tasks + +from pillar import current_app + +log = logging.getLogger(__name__) + + +# TODO make index backend conditional on settings. +# now uses angolia, but should use elastic + + +@current_app.celery.task(ignore_result=True) +def updated_user(user_id: str): + """Push an update to the index when a user item is updated""" + + algolia_tasks.push_updated_user_to_algolia(user_id) + + +@current_app.celery.task(ignore_result=True) +def node_save(node_id: str): + + algolia_tasks.index_node_save(node_id) + + +@current_app.celery.task(ignore_result=True) +def node_delete(node_id: str): + + algolia_tasks.index_node_delete(node_id) diff --git a/src/scripts/algolia_search.js b/src/scripts/algolia_search.js index 9e1ff119..d1b445c6 100644 --- a/src/scripts/algolia_search.js +++ b/src/scripts/algolia_search.js @@ -21,6 +21,7 @@ $(document).ready(function() { var sliderTemplate = Hogan.compile($('#slider-template').text()); var paginationTemplate = Hogan.compile($('#pagination-template').text()); + // replace with something elasticy! // Client initialization var algolia = algoliasearch(APPLICATION_ID, SEARCH_ONLY_API_KEY); @@ -36,6 +37,7 @@ $(document).ready(function() { }) }; + // replace with something elastici! // Setup the search helper var helper = algoliasearchHelper(algolia, INDEX_NAME, params); @@ -43,6 +45,7 @@ $(document).ready(function() { var result = $.grep(FACET_CONFIG, function(e) { return e.hidden && e.hidden == true; }); + for (var i = 0; i < result.length; i++) { var f = result[i]; helper.addFacetRefinement(f.name, f.value); @@ -60,9 +63,11 @@ $(document).ready(function() { helper.on('change', function(state) { setURLParams(state); }); + helper.on('error', function(error) { console.log(error); }); + helper.on('result', function(content, state) { renderStats(content); renderHits(content); From b6af919fa9229d37e138baf4d209cf2ea3a669cc Mon Sep 17 00:00:00 2001 From: Stephan Preeker Date: Fri, 3 Nov 2017 16:40:02 +0100 Subject: [PATCH 2/7] T53161 proces feedback sybren, replace angolia with search --- pillar/api/users/__init__.py | 10 ++-- pillar/api/users/hooks.py | 8 ++-- pillar/api/utils/algolia.py | 46 ++++++++++--------- .../{algolia_tasks.py => algolia_indexing.py} | 3 +- pillar/celery/search_index_tasks.py | 31 +++++++++---- pillar/config.py | 1 + src/scripts/algolia_search.js | 2 + 7 files changed, 61 insertions(+), 40 deletions(-) rename pillar/celery/{algolia_tasks.py => algolia_indexing.py} (97%) diff --git a/pillar/api/users/__init__.py b/pillar/api/users/__init__.py index 664f78c0..20329c3a 100644 --- a/pillar/api/users/__init__.py +++ b/pillar/api/users/__init__.py @@ -31,7 +31,7 @@ def add_user_to_group(user_id: bson.ObjectId, group_id: bson.ObjectId): def user_group_action(user_id: bson.ObjectId, group_id: bson.ObjectId, action: str): """Performs a group action (add/remove). - + :param user_id: the user's ObjectID. :param group_id: the group's ObjectID. :param action: either '$pull' to remove from a group, or '$addToSet' to add to a group. @@ -54,9 +54,9 @@ def user_group_action(user_id: bson.ObjectId, group_id: bson.ObjectId, action: s f'user not found.') -def _update_algolia_user_changed_role(sender, user: dict): +def _update_search_user_changed_role(sender, user: dict): log.debug('Sending updated user %s to Algolia due to role change', user['_id']) - hooks.push_updated_user_to_algolia(user, original=None) + hooks.push_updated_user_to_search(user, original=None) def setup_app(app, api_prefix): @@ -66,7 +66,7 @@ def setup_app(app, api_prefix): app.on_post_GET_users += hooks.post_GET_user app.on_pre_PUT_users += hooks.check_put_access app.on_pre_PUT_users += hooks.before_replacing_user - app.on_replaced_users += hooks.push_updated_user_to_algolia + app.on_replaced_users += hooks.push_updated_user_to_search app.on_replaced_users += hooks.send_blinker_signal_roles_changed app.on_fetched_item_users += hooks.after_fetching_user app.on_fetched_resource_users += hooks.after_fetching_user_resource @@ -76,4 +76,4 @@ def setup_app(app, api_prefix): app.register_api_blueprint(blueprint_api, url_prefix=api_prefix) - service.signal_user_changed_role.connect(_update_algolia_user_changed_role) + service.signal_user_changed_role.connect(_update_search_user_changed_role) diff --git a/pillar/api/users/hooks.py b/pillar/api/users/hooks.py index 7bdf75d4..9d8033a2 100644 --- a/pillar/api/users/hooks.py +++ b/pillar/api/users/hooks.py @@ -65,15 +65,15 @@ def before_replacing_user(request, lookup): 'email field must be given') -def push_updated_user_to_algolia(user, original): +def push_updated_user_to_search(user, original): """ - Push an update to the Algolia index when a user + Push an update to the Search index when a user item is updated """ - from pillar.celery import search_index_tasks as index + from pillar.celery import search_index_tasks as searchindex - index.updated_user.delay(str(user['_id'])) + searchindex.updated_user.delay(str(user['_id'])) def send_blinker_signal_roles_changed(user, original): diff --git a/pillar/api/utils/algolia.py b/pillar/api/utils/algolia.py index f5e8abcc..c1daafda 100644 --- a/pillar/api/utils/algolia.py +++ b/pillar/api/utils/algolia.py @@ -40,26 +40,28 @@ def algolia_index_user_save(user): user['_id'], index_users.index_name) -def _handle_picture(node, doc): +def _handle_picture(node: dict, to_index: dict): """ add picture fields to be indexed """ - if 'picture' in node and node['picture']: - files_collection = current_app.data.driver.db['files'] - lookup = {'_id': ObjectId(node['picture'])} - picture = files_collection.find_one(lookup) + if not node.get('picture'): + return - img_variation_t = next( - (item for item in picture['variations'] - if item['size'] == 't'), None) + files_collection = current_app.data.driver.db['files'] + lookup = {'_id': ObjectId(node['picture'])} + picture = files_collection.find_one(lookup) - if img_variation_t: - doc['picture'] = generate_link( - picture['backend'], - img_variation_t['file_path'], - project_id=str(picture['project']), - is_public=True) + img_variation_t = next( + (item for item in picture['variations'] + if item['size'] == 't'), None) + + if img_variation_t: + to_index['picture'] = generate_link( + picture['backend'], + img_variation_t['file_path'], + project_id=str(picture['project']), + is_public=True) @skip_when_testing @@ -78,7 +80,7 @@ def algolia_index_node_save(node): users_collection = current_app.data.driver.db['users'] user = users_collection.find_one({'_id': ObjectId(node['user'])}) - doc = { + to_index = { 'objectID': node['_id'], 'name': node['name'], 'project': { @@ -95,25 +97,25 @@ def algolia_index_node_save(node): } if 'description' in node and node['description']: - doc['description'] = node['description'] + to_index['description'] = node['description'] - _handle_picture(node, doc) + _handle_picture(node, to_index) # If the node has world permissions, compute the Free permission - if 'permissions' in node and 'world' in node['permissions']: + if 'world' in node.get('permissions', {}): if 'GET' in node['permissions']['world']: - doc['is_free'] = True + to_index['is_free'] = True # Append the media key if the node is of node_type 'asset' if node['node_type'] == 'asset': - doc['media'] = node['properties']['content_type'] + to_index['media'] = node['properties']['content_type'] # Add extra properties for prop in ('tags', 'license_notes'): if prop in node['properties']: - doc[prop] = node['properties'][prop] + to_index[prop] = node['properties'][prop] - current_app.algolia_index_nodes.save_object(doc) + current_app.algolia_index_nodes.save_object(to_index) @skip_when_testing diff --git a/pillar/celery/algolia_tasks.py b/pillar/celery/algolia_indexing.py similarity index 97% rename from pillar/celery/algolia_tasks.py rename to pillar/celery/algolia_indexing.py index 5218e8f0..d947983d 100644 --- a/pillar/celery/algolia_tasks.py +++ b/pillar/celery/algolia_indexing.py @@ -8,7 +8,7 @@ from pillar import current_app log = logging.getLogger(__name__) -def push_updated_user_to_algolia(user_id: str): +def push_updated_user(user_id: str): """Push an update to the Algolia index when a user item is updated""" from pillar.api.utils.algolia import algolia_index_user_save @@ -48,6 +48,7 @@ def index_node_save(node_id: str): def index_node_delete(node_id: str): + from pillar.api.utils.algolia import algolia_index_node_delete # Deleting a node takes nothing more than the ID anyway. diff --git a/pillar/celery/search_index_tasks.py b/pillar/celery/search_index_tasks.py index 769e331f..ce17c69a 100644 --- a/pillar/celery/search_index_tasks.py +++ b/pillar/celery/search_index_tasks.py @@ -1,30 +1,45 @@ import logging - -from . import algolia_tasks - from pillar import current_app +from . import algolia_indexing +# from . import elastic_indexing + + log = logging.getLogger(__name__) +# TODO(stephan) make index backend conditional on settings. -# TODO make index backend conditional on settings. -# now uses angolia, but should use elastic +SEARCH_BACKENDS = { + 'algolia': algolia_indexing, + 'elastic': None, # elastic_indexing +} @current_app.celery.task(ignore_result=True) def updated_user(user_id: str): """Push an update to the index when a user item is updated""" - algolia_tasks.push_updated_user_to_algolia(user_id) + algolia_indexing.push_updated_user(user_id) @current_app.celery.task(ignore_result=True) def node_save(node_id: str): - algolia_tasks.index_node_save(node_id) + algolia_indexing.index_node_save(node_id) @current_app.celery.task(ignore_result=True) def node_delete(node_id: str): - algolia_tasks.index_node_delete(node_id) + algolia_indexing.index_node_delete(node_id) + + + +def build_doc_to_index_from(node: dict): + """ + Given node build an to_index document + """ + pass + + + diff --git a/pillar/config.py b/pillar/config.py index 526ae4a6..89bee431 100644 --- a/pillar/config.py +++ b/pillar/config.py @@ -75,6 +75,7 @@ ALGOLIA_INDEX_NODES = 'dev_Nodes' SEARCH_BACKEND = 'algolia' # algolia, elastic + ZENCODER_API_KEY = '-SECRET-' ZENCODER_NOTIFICATIONS_SECRET = '-SECRET-' ZENCODER_NOTIFICATIONS_URL = 'http://zencoderfetcher/' diff --git a/src/scripts/algolia_search.js b/src/scripts/algolia_search.js index d1b445c6..be2982c1 100644 --- a/src/scripts/algolia_search.js +++ b/src/scripts/algolia_search.js @@ -2,6 +2,8 @@ $(document).ready(function() { /******************** * INITIALIZATION + * + * TODO (stephan) * *******************/ var HITS_PER_PAGE = 25; From 1fe88819f43d5efb57c14b0c097d0b95cbccfa69 Mon Sep 17 00:00:00 2001 From: Stephan Preeker Date: Fri, 3 Nov 2017 18:18:12 +0100 Subject: [PATCH 3/7] T53161 WIP create elasticsearch app / doc / stuff --- pillar/__init__.py | 2 +- pillar/api/search/__init__.py | 0 pillar/api/search/algolia_indexing.py | 38 ++++++ pillar/api/search/documents.py | 66 ++++++++++ pillar/api/search/elastic_indexing.py | 25 ++++ pillar/api/utils/algolia.py | 105 ++-------------- pillar/celery/algolia_indexing.py | 55 +++------ pillar/celery/search_index_tasks.py | 170 +++++++++++++++++++++++--- pillar/config.py | 2 +- 9 files changed, 306 insertions(+), 157 deletions(-) create mode 100644 pillar/api/search/__init__.py create mode 100644 pillar/api/search/algolia_indexing.py create mode 100644 pillar/api/search/documents.py create mode 100644 pillar/api/search/elastic_indexing.py diff --git a/pillar/__init__.py b/pillar/__init__.py index 646c5a83..ec39f845 100644 --- a/pillar/__init__.py +++ b/pillar/__init__.py @@ -241,7 +241,7 @@ class PillarServer(Eve): def _config_algolia(self): # Algolia search - if self.config['SEARCH_BACKEND'] != 'algolia': + if 'algolia' not in self.config['SEARCH_BACKENDS']: return from algoliasearch import algoliasearch diff --git a/pillar/api/search/__init__.py b/pillar/api/search/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pillar/api/search/algolia_indexing.py b/pillar/api/search/algolia_indexing.py new file mode 100644 index 00000000..378b2908 --- /dev/null +++ b/pillar/api/search/algolia_indexing.py @@ -0,0 +1,38 @@ +import logging + +from algoliasearch.helpers import AlgoliaException + +log = logging.getLogger(__name__) + + +def push_updated_user(user_to_index: dict): + """Push an update to the Algolia index when a user item is updated""" + + from pillar.api.utils.algolia import index_user_save + + try: + index_user_save(user_to_index) + except AlgoliaException as ex: + log.warning( + 'Unable to push user info to Algolia for user "%s", id=%s; %s', # noqa + user_to_index.get('username'), + user_to_index.get('objectID'), ex) + + +def index_node_save(node_to_index: dict): + from pillar.api.utils import algolia + + try: + algolia.index_node_save(node_to_index) + except AlgoliaException as ex: + log.warning( + 'Unable to push node info to Algolia for node %s; %s', node_to_index, ex) # noqa + + +def index_node_delete(delete_id: str): + + from pillar.api.utils import algolia + try: + algolia.index_node_delete(delete_id) + except AlgoliaException as ex: + log.warning('Unable to delete node info to Algolia for node %s; %s', delete_id, ex) # noqa diff --git a/pillar/api/search/documents.py b/pillar/api/search/documents.py new file mode 100644 index 00000000..4578d1c9 --- /dev/null +++ b/pillar/api/search/documents.py @@ -0,0 +1,66 @@ +import logging + +import elasticsearch_dsl as es +from elasticsearch_dsl import analysis +# from pillar import current_app + +# define elasticsearch document mapping. + + +log = logging.getLogger(__name__) + + +edge_ngram_filter = analysis.token_filter( + 'edge_ngram_filter', + type='edge_ngram', + min_gram=1, + max_gram=15 +) + + +autocomplete = es.analyzer( + 'autocomplete', + tokenizer='standard', + filter=['lowercase', edge_ngram_filter] +) + + +class User(es.DocType): + """ + Elastic document describing user + """ + + name = es.String( + fielddata=True, + analyzer=autocomplete, + ) + + +class Node(es.DocType): + """ + Elastic document describing user + """ + + node_type = es.Keyword() + + x_code = es.String( + multi=True, + fielddata=True, + analyzer=autocomplete, + ) + + +def create_doc_from_user_data(user_to_index): + doc_id = user_to_index['objectID'] + doc = User(_id=doc_id) + return doc + + +def create_doc_from_node_data(node_to_index): + + # node stuff + doc_id = node_to_index['objectID'] + doc = Node(_id=doc_id) + doc.node_type = node_to_index['node_type'] + + return doc diff --git a/pillar/api/search/elastic_indexing.py b/pillar/api/search/elastic_indexing.py new file mode 100644 index 00000000..dce8f58d --- /dev/null +++ b/pillar/api/search/elastic_indexing.py @@ -0,0 +1,25 @@ +import logging + +log = logging.getLogger(__name__) + + +def push_updated_user(user_to_index: dict): + """Push an update to the Algolia index when a user item is updated""" + + log.warning( + 'WIP USER ELK INDEXING %s %s', + user_to_index.get('username'), + user_to_index.get('objectID')) + + +def index_node_save(node_to_index: dict): + + log.warning( + 'WIP USER NODE INDEXING %s', + node_to_index.get('objectID')) + + +def index_node_delete(delete_id: str): + + log.warning( + 'WIP NODE DELETE INDEXING %s', delete_id) diff --git a/pillar/api/utils/algolia.py b/pillar/api/utils/algolia.py index c1daafda..eabc4af1 100644 --- a/pillar/api/utils/algolia.py +++ b/pillar/api/utils/algolia.py @@ -3,123 +3,32 @@ import logging from bson import ObjectId from pillar import current_app -from pillar.api.file_storage import generate_link from . import skip_when_testing log = logging.getLogger(__name__) -INDEX_ALLOWED_NODE_TYPES = {'asset', 'texture', 'group', 'hdri'} - @skip_when_testing -def algolia_index_user_save(user): +def index_user_save(to_index_user: dict): index_users = current_app.algolia_index_users if not index_users: log.debug('No Algolia index defined, so nothing to do.') return - user_roles = set(user.get('roles', ())) - if 'service' in user_roles: - return - - # Strip unneeded roles - index_roles = user_roles.intersection(current_app.user_roles_indexable) - # Create or update Algolia index for the user - index_users.save_object({ - 'objectID': user['_id'], - 'full_name': user['full_name'], - 'username': user['username'], - 'roles': list(index_roles), - 'groups': user['groups'], - 'email': user['email'] - }) - - log.debug( - 'Pushed user %r to Algolia index %r', - user['_id'], index_users.index_name) - - -def _handle_picture(node: dict, to_index: dict): - """ - add picture fields to be indexed - """ - - if not node.get('picture'): - return - - files_collection = current_app.data.driver.db['files'] - lookup = {'_id': ObjectId(node['picture'])} - picture = files_collection.find_one(lookup) - - img_variation_t = next( - (item for item in picture['variations'] - if item['size'] == 't'), None) - - if img_variation_t: - to_index['picture'] = generate_link( - picture['backend'], - img_variation_t['file_path'], - project_id=str(picture['project']), - is_public=True) + index_users.save_object() @skip_when_testing -def algolia_index_node_save(node): +def index_node_save(node_to_index): + if not current_app.algolia_index_nodes: return - if node['node_type'] not in INDEX_ALLOWED_NODE_TYPES: - return - # If a nodes does not have status published, do not index - if node['properties'].get('status') != 'published': - return - - projects_collection = current_app.data.driver.db['projects'] - project = projects_collection.find_one({'_id': ObjectId(node['project'])}) - - users_collection = current_app.data.driver.db['users'] - user = users_collection.find_one({'_id': ObjectId(node['user'])}) - - to_index = { - 'objectID': node['_id'], - 'name': node['name'], - 'project': { - '_id': project['_id'], - 'name': project['name'] - }, - 'created': node['_created'], - 'updated': node['_updated'], - 'node_type': node['node_type'], - 'user': { - '_id': user['_id'], - 'full_name': user['full_name'] - }, - } - - if 'description' in node and node['description']: - to_index['description'] = node['description'] - - _handle_picture(node, to_index) - - # If the node has world permissions, compute the Free permission - if 'world' in node.get('permissions', {}): - if 'GET' in node['permissions']['world']: - to_index['is_free'] = True - - # Append the media key if the node is of node_type 'asset' - if node['node_type'] == 'asset': - to_index['media'] = node['properties']['content_type'] - - # Add extra properties - for prop in ('tags', 'license_notes'): - if prop in node['properties']: - to_index[prop] = node['properties'][prop] - - current_app.algolia_index_nodes.save_object(to_index) + current_app.algolia_index_nodes.save_object(node_to_index) @skip_when_testing -def algolia_index_node_delete(node): +def index_node_delete(delete_id): if current_app.algolia_index_nodes is None: return - current_app.algolia_index_nodes.delete_object(node['_id']) + current_app.algolia_index_nodes.delete_object(delete_id) diff --git a/pillar/celery/algolia_indexing.py b/pillar/celery/algolia_indexing.py index d947983d..378b2908 100644 --- a/pillar/celery/algolia_indexing.py +++ b/pillar/celery/algolia_indexing.py @@ -1,61 +1,38 @@ import logging from algoliasearch.helpers import AlgoliaException -import bson - -from pillar import current_app log = logging.getLogger(__name__) -def push_updated_user(user_id: str): +def push_updated_user(user_to_index: dict): """Push an update to the Algolia index when a user item is updated""" - from pillar.api.utils.algolia import algolia_index_user_save - - user_oid = bson.ObjectId(user_id) - log.info('Retrieving user %s', user_oid) - users_coll = current_app.db('users') - user = users_coll.find_one({'_id': user_oid}) - if user is None: - log.warning('Unable to find user %s, not updating Algolia.', user_oid) - return + from pillar.api.utils.algolia import index_user_save try: - algolia_index_user_save(user) + index_user_save(user_to_index) except AlgoliaException as ex: - log.warning('Unable to push user info to Algolia for user "%s", id=%s; %s', # noqa - user.get('username'), user_id, ex) + log.warning( + 'Unable to push user info to Algolia for user "%s", id=%s; %s', # noqa + user_to_index.get('username'), + user_to_index.get('objectID'), ex) -def index_node_save(node_id: str): - from pillar.api.utils.algolia import algolia_index_node_save - - node_oid = bson.ObjectId(node_id) - log.info('Retrieving node %s', node_oid) - - nodes_coll = current_app.db('nodes') - node = nodes_coll.find_one({'_id': node_oid}) - - if node is None: - log.warning('Unable to find node %s, not updating Algolia.', node_id) - return +def index_node_save(node_to_index: dict): + from pillar.api.utils import algolia try: - algolia_index_node_save(node) + algolia.index_node_save(node_to_index) except AlgoliaException as ex: - log.warning('Unable to push node info to Algolia for node %s; %s', node_id, ex) # noqa + log.warning( + 'Unable to push node info to Algolia for node %s; %s', node_to_index, ex) # noqa -def index_node_delete(node_id: str): - - from pillar.api.utils.algolia import algolia_index_node_delete - - # Deleting a node takes nothing more than the ID anyway. - # No need to fetch anything from Mongo. - fake_node = {'_id': bson.ObjectId(node_id)} +def index_node_delete(delete_id: str): + from pillar.api.utils import algolia try: - algolia_index_node_delete(fake_node) + algolia.index_node_delete(delete_id) except AlgoliaException as ex: - log.warning('Unable to delete node info to Algolia for node %s; %s', node_id, ex) # noqa + log.warning('Unable to delete node info to Algolia for node %s; %s', delete_id, ex) # noqa diff --git a/pillar/celery/search_index_tasks.py b/pillar/celery/search_index_tasks.py index ce17c69a..e9bf2412 100644 --- a/pillar/celery/search_index_tasks.py +++ b/pillar/celery/search_index_tasks.py @@ -1,45 +1,179 @@ import logging +from bson import ObjectId from pillar import current_app +from pillar.api.file_storage import generate_link -from . import algolia_indexing -# from . import elastic_indexing +# TODO WIP (stephan) make index backend conditional on settings. +from pillar.api.search import elastic_indexing + +from pillar.api.search import algolia_indexing log = logging.getLogger(__name__) -# TODO(stephan) make index backend conditional on settings. + +INDEX_ALLOWED_NODE_TYPES = {'asset', 'texture', 'group', 'hdri'} + SEARCH_BACKENDS = { 'algolia': algolia_indexing, - 'elastic': None, # elastic_indexing + 'elastic': elastic_indexing } +def _get_node_from_id(node_id: str): + """ + """ + node_oid = ObjectId(node_id) + log.info('Retrieving node %s', node_oid) + + nodes_coll = current_app.db('nodes') + node = nodes_coll.find_one({'_id': node_oid}) + + return node + + +def _handle_picture(node: dict, to_index: dict): + """ + add picture fields to be indexed + """ + + if not node.get('picture'): + return + + files_collection = current_app.data.driver.db['files'] + lookup = {'_id': ObjectId(node['picture'])} + picture = files_collection.find_one(lookup) + + img_variation_t = next( + (item for item in picture['variations'] + if item['size'] == 't'), None) + + if img_variation_t: + to_index['picture'] = generate_link( + picture['backend'], + img_variation_t['file_path'], + project_id=str(picture['project']), + is_public=True) + + +def prepare_node_data(node_id: str): + """ + Given node build data object with fields to index + """ + node = _get_node_from_id(node_id) + + if node is None: + log.warning('Unable to find node %s, not updating Algolia.', node_id) + return + + if node['node_type'] not in INDEX_ALLOWED_NODE_TYPES: + return + # If a nodes does not have status published, do not index + if node['properties'].get('status') != 'published': + return + + projects_collection = current_app.data.driver.db['projects'] + project = projects_collection.find_one({'_id': ObjectId(node['project'])}) + + users_collection = current_app.data.driver.db['users'] + user = users_collection.find_one({'_id': ObjectId(node['user'])}) + + to_index = { + 'objectID': node['_id'], + 'name': node['name'], + 'project': { + '_id': project['_id'], + 'name': project['name'] + }, + 'created': node['_created'], + 'updated': node['_updated'], + 'node_type': node['node_type'], + 'user': { + '_id': user['_id'], + 'full_name': user['full_name'] + }, + } + + if 'description' in node and node['description']: + to_index['description'] = node['description'] + + _handle_picture(node, to_index) + + # If the node has world permissions, compute the Free permission + if 'world' in node.get('permissions', {}): + if 'GET' in node['permissions']['world']: + to_index['is_free'] = True + + # Append the media key if the node is of node_type 'asset' + if node['node_type'] == 'asset': + to_index['media'] = node['properties']['content_type'] + + # Add extra properties + for prop in ('tags', 'license_notes'): + if prop in node['properties']: + to_index[prop] = node['properties'][prop] + + return to_index + + +def prepare_user_data(user_id: str): + """ + Prepare data to index for user node + """ + + user_oid = ObjectId(user_id) + log.info('Retrieving user %s', user_oid) + users_coll = current_app.db('users') + user = users_coll.find_one({'_id': user_oid}) + if user is None: + log.warning('Unable to find user %s, not updating Algolia.', user_oid) + return + + user_roles = set(user.get('roles', ())) + if 'service' in user_roles: + return + + # Strip unneeded roles + index_roles = user_roles.intersection(current_app.user_roles_indexable) + + log.debug('Pushed user %r to Search index', user['_id']) + + user_to_index = { + 'objectID': user['_id'], + 'full_name': user['full_name'], + 'username': user['username'], + 'roles': list(index_roles), + 'groups': user['groups'], + 'email': user['email'] + } + + return user_to_index + + @current_app.celery.task(ignore_result=True) def updated_user(user_id: str): """Push an update to the index when a user item is updated""" - algolia_indexing.push_updated_user(user_id) + user_to_index = prepare_user_data(user_id) + + for searchoption in current_app.config['SEARCH_BACKENDS']: + for searchmodule in SEARCH_BACKENDS[searchoption]: + searchmodule.push_updated_user(user_to_index) @current_app.celery.task(ignore_result=True) def node_save(node_id: str): - algolia_indexing.index_node_save(node_id) + to_index = prepare_node_data(node_id) + + algolia_indexing.index_node_save(to_index) @current_app.celery.task(ignore_result=True) def node_delete(node_id: str): - algolia_indexing.index_node_delete(node_id) - - - -def build_doc_to_index_from(node: dict): - """ - Given node build an to_index document - """ - pass - - - + # Deleting a node takes nothing more than the ID anyway. + # No need to fetch anything from Mongo. + delete_id = ObjectId(node_id) + algolia_indexing.index_node_delete(delete_id) diff --git a/pillar/config.py b/pillar/config.py index 89bee431..deb60871 100644 --- a/pillar/config.py +++ b/pillar/config.py @@ -73,7 +73,7 @@ ALGOLIA_API_KEY = '-SECRET-' ALGOLIA_INDEX_USERS = 'dev_Users' ALGOLIA_INDEX_NODES = 'dev_Nodes' -SEARCH_BACKEND = 'algolia' # algolia, elastic +SEARCH_BACKENDS = ['algolia', 'elastic'] ZENCODER_API_KEY = '-SECRET-' From a8849ec823d5200d360d45b45e0b2bba6ad8575c Mon Sep 17 00:00:00 2001 From: Stephan Preeker Date: Fri, 10 Nov 2017 16:05:12 +0100 Subject: [PATCH 4/7] T53161 elasticsearch can index nodes now. cli command. NOTE config changes!! --- pillar/__init__.py | 1 + pillar/api/__init__.py | 2 + pillar/api/search/__init__.py | 13 +++++ pillar/api/search/documents.py | 59 ++++++++++++++++++++--- pillar/api/search/elastic_indexing.py | 30 ++++++++++-- pillar/api/search/index.py | 69 +++++++++++++++++++++++++++ pillar/api/search/queries.py | 42 ++++++++++++++++ pillar/api/search/routes.py | 33 +++++++++++++ pillar/api/utils/algolia.py | 2 +- pillar/celery/search_index_tasks.py | 20 +++++--- pillar/cli/__init__.py | 5 +- pillar/cli/elastic.py | 42 ++++++++++++++++ pillar/config.py | 6 +++ 13 files changed, 304 insertions(+), 20 deletions(-) create mode 100644 pillar/api/search/index.py create mode 100644 pillar/api/search/queries.py create mode 100644 pillar/api/search/routes.py create mode 100644 pillar/cli/elastic.py diff --git a/pillar/__init__.py b/pillar/__init__.py index ec39f845..18b18497 100644 --- a/pillar/__init__.py +++ b/pillar/__init__.py @@ -693,6 +693,7 @@ class PillarServer(Eve): api.setup_app(self) web.setup_app(self) + authentication.setup_app(self) for ext in self.pillar_extensions.values(): diff --git a/pillar/api/__init__.py b/pillar/api/__init__.py index d9172b47..bf513675 100644 --- a/pillar/api/__init__.py +++ b/pillar/api/__init__.py @@ -2,9 +2,11 @@ def setup_app(app): from . import encoding, blender_id, projects, local_auth, file_storage from . import users, nodes, latest, blender_cloud, service, activities from . import organizations + from . import search encoding.setup_app(app, url_prefix='/encoding') blender_id.setup_app(app, url_prefix='/blender_id') + search.setup_app(app, url_prefix='/newsearch') projects.setup_app(app, api_prefix='/p') local_auth.setup_app(app, url_prefix='/auth') file_storage.setup_app(app, url_prefix='/storage') diff --git a/pillar/api/search/__init__.py b/pillar/api/search/__init__.py index e69de29b..dbe228ff 100644 --- a/pillar/api/search/__init__.py +++ b/pillar/api/search/__init__.py @@ -0,0 +1,13 @@ +import logging + +#import bson +#from flask import current_app + +from .routes import blueprint_search + +log = logging.getLogger(__name__) + + +def setup_app(app, url_prefix: str =None): + app.register_api_blueprint( + blueprint_search, url_prefix=url_prefix) diff --git a/pillar/api/search/documents.py b/pillar/api/search/documents.py index 4578d1c9..2638479f 100644 --- a/pillar/api/search/documents.py +++ b/pillar/api/search/documents.py @@ -26,15 +26,16 @@ autocomplete = es.analyzer( class User(es.DocType): - """ - Elastic document describing user - """ + """Elastic document describing user.""" name = es.String( fielddata=True, analyzer=autocomplete, ) + class Meta: + index = 'users' + class Node(es.DocType): """ @@ -43,12 +44,39 @@ class Node(es.DocType): node_type = es.Keyword() - x_code = es.String( - multi=True, + objectID = es.Keyword() + + name = es.String( fielddata=True, - analyzer=autocomplete, + analyzer=autocomplete ) + user_id = es.Keyword() + user_name = es.String( + fielddata=True, + analyzer=autocomplete + ) + + description = es.String() + + is_free = es.Boolean() + + project_id = es.Keyword() + project_name = es.String() + + media = es.Keyword() + + picture_url = es.Keyword() + + tags = es.Keyword(multi=True) + license_notes = es.String() + + created_at = es.Date() + updated_at = es.Date() + + class Meta: + index = 'nodes' + def create_doc_from_user_data(user_to_index): doc_id = user_to_index['objectID'] @@ -59,8 +87,25 @@ def create_doc_from_user_data(user_to_index): def create_doc_from_node_data(node_to_index): # node stuff - doc_id = node_to_index['objectID'] + doc_id = str(node_to_index['objectID']) doc = Node(_id=doc_id) + doc.node_type = node_to_index['node_type'] + doc.name = node_to_index['name'] + doc.user_id = str(node_to_index['user']['_id']) + doc.user_name = node_to_index['user']['full_name'] + doc.project_id = str(node_to_index['project']['_id']) + doc.project_name = node_to_index['project']['name'] + + if node_to_index['node_type'] == 'asset': + doc.media = node_to_index['media'] + + doc.picture_url = node_to_index.get('picture') + + doc.tags = node_to_index.get('tags') + doc.license_notes = node_to_index.get('license_notes') + + doc.created_at = node_to_index['created'] + doc.updated_at = node_to_index['updated'] return doc diff --git a/pillar/api/search/elastic_indexing.py b/pillar/api/search/elastic_indexing.py index dce8f58d..a76f0fa3 100644 --- a/pillar/api/search/elastic_indexing.py +++ b/pillar/api/search/elastic_indexing.py @@ -1,10 +1,25 @@ import logging +from pillar import current_app +from elasticsearch_dsl.connections import connections + +from . import documents + + +elk_hosts = current_app.config['ELASTIC_SEARCH_HOSTS'] + +connections.create_connection( + hosts=elk_hosts, + sniff_on_start=True, + timeout=20) log = logging.getLogger(__name__) def push_updated_user(user_to_index: dict): - """Push an update to the Algolia index when a user item is updated""" + """ + Push an update to the Elastic index when + a user item is updated. + """ log.warning( 'WIP USER ELK INDEXING %s %s', @@ -15,11 +30,18 @@ def push_updated_user(user_to_index: dict): def index_node_save(node_to_index: dict): log.warning( - 'WIP USER NODE INDEXING %s', + 'ELK NODE INDEXING %s', node_to_index.get('objectID')) + log.warning(node_to_index) + + doc = documents.create_doc_from_node_data(node_to_index) + + log.warning('CREATED ELK DOC') + doc.save() + def index_node_delete(delete_id: str): - log.warning( - 'WIP NODE DELETE INDEXING %s', delete_id) + log.warning('NODE DELETE INDEXING %s', delete_id) + documents.Node(id=delete_id).delete() diff --git a/pillar/api/search/index.py b/pillar/api/search/index.py new file mode 100644 index 00000000..d052f580 --- /dev/null +++ b/pillar/api/search/index.py @@ -0,0 +1,69 @@ +import logging +# import time + +# from elasticsearch import helpers +# import elasticsearch + +# from elasticsearch.client import IndicesClient + +from elasticsearch.exceptions import NotFoundError +from elasticsearch_dsl.connections import connections +import elasticsearch_dsl as es + +from pillar import current_app + +from . import documents + +log = logging.getLogger(__name__) + + +class ResetIndexTask(object): + """ + Clear and build index / mapping + """ + index = '' + doc_types = [] + name = 'remove index' + + def __init__(self): + + if not self.index: + raise ValueError("No index specified") + + if not self.doc_types: + raise ValueError("No doc_types specified") + + connections.create_connection( + hosts=current_app.config['ELASTIC_SEARCH_HOSTS'], + # sniff_on_start=True, + retry_on_timeout=True, + ) + + def execute(self): + + idx = es.Index(self.index) + + try: + idx.delete(ignore=404) + log.info("Deleted index %s", self.index) + except AttributeError: + log.warning("Could not delete index '%s', ignoring", self.index) + except NotFoundError: + log.warning("Could not delete index '%s', ignoring", self.index) + + # create doc types + for dt in self.doc_types: + idx.doc_type(dt) + + # create index + idx.create() + + +class ResetNodeIndex(ResetIndexTask): + index = current_app.config['ELASTIC_INDICES']['NODE'] + doc_types = [documents.Node] + + +def reset_node_index(): + resettask = ResetNodeIndex() + resettask.execute() diff --git a/pillar/api/search/queries.py b/pillar/api/search/queries.py new file mode 100644 index 00000000..9aaa7bbf --- /dev/null +++ b/pillar/api/search/queries.py @@ -0,0 +1,42 @@ +import logging +import json +from elasticsearch import Elasticsearch +from elasticsearch_dsl import Search, Q +from elasticsearch_dsl.connections import connections + +from pillar import current_app + +#elk_hosts = current_app.config['ELASTIC_SEARCH_HOSTS'] +# +#connections.create_connection( +# hosts=elk_hosts, +# sniff_on_start=True, +# timeout=20) +# +client = Elasticsearch() + +log = logging.getLogger(__name__) + + +def do_search(query: str) -> dict: + """ + Given user input search for node/stuff + """ + should = [ + Q('match', name=query), + Q('match', user_name=query), + Q('match', project_name=query), + Q('match', description=query), + Q('term', media=query), + Q('term', tags=query), + ] + bool_query = Q('bool', should=should) + search = Search(using=client) + search.query = bool_query + + if current_app.config['DEBUG']: + log.debug(json.dumps(search.to_dict(), indent=4)) + + response = search.execute() + + return response.to_dict() diff --git a/pillar/api/search/routes.py b/pillar/api/search/routes.py new file mode 100644 index 00000000..77f1a7e7 --- /dev/null +++ b/pillar/api/search/routes.py @@ -0,0 +1,33 @@ +import json +import logging + +from bson import ObjectId +from flask import Blueprint, request, current_app, make_response, url_for +from flask import Response +from werkzeug import exceptions as wz_exceptions + +from pillar.api.utils import authorization, jsonify, str2id +from pillar.api.utils import mongo +from pillar.api.utils.authorization import require_login, check_permissions +from pillar.auth import current_user + + +log = logging.getLogger(__name__) + +blueprint_search = Blueprint('elksearch', __name__) + +from . import queries + +#@authorization.require_login(require_cap='subscriber') +@blueprint_search.route('/', methods=['GET']) +def search_nodes(): + + searchword = request.args.get('q', '') + + if not searchword: + return 'You are forgetting a "?q=whatareyoulookingfor"' + + data = queries.do_search(searchword) + + resp = Response(json.dumps(data), mimetype='application/json') + return resp diff --git a/pillar/api/utils/algolia.py b/pillar/api/utils/algolia.py index eabc4af1..169be666 100644 --- a/pillar/api/utils/algolia.py +++ b/pillar/api/utils/algolia.py @@ -16,7 +16,7 @@ def index_user_save(to_index_user: dict): return # Create or update Algolia index for the user - index_users.save_object() + index_users.save_object(to_index_user) @skip_when_testing diff --git a/pillar/celery/search_index_tasks.py b/pillar/celery/search_index_tasks.py index e9bf2412..e5cac649 100644 --- a/pillar/celery/search_index_tasks.py +++ b/pillar/celery/search_index_tasks.py @@ -3,7 +3,6 @@ from bson import ObjectId from pillar import current_app from pillar.api.file_storage import generate_link -# TODO WIP (stephan) make index backend conditional on settings. from pillar.api.search import elastic_indexing from pillar.api.search import algolia_indexing @@ -57,11 +56,12 @@ def _handle_picture(node: dict, to_index: dict): is_public=True) -def prepare_node_data(node_id: str): +def prepare_node_data(node_id: str, node=None) -> dict: """ Given node build data object with fields to index """ - node = _get_node_from_id(node_id) + if node_id: + node = _get_node_from_id(node_id) if node is None: log.warning('Unable to find node %s, not updating Algolia.', node_id) @@ -117,7 +117,7 @@ def prepare_node_data(node_id: str): return to_index -def prepare_user_data(user_id: str): +def prepare_user_data(user_id: str) -> dict: """ Prepare data to index for user node """ @@ -158,8 +158,8 @@ def updated_user(user_id: str): user_to_index = prepare_user_data(user_id) for searchoption in current_app.config['SEARCH_BACKENDS']: - for searchmodule in SEARCH_BACKENDS[searchoption]: - searchmodule.push_updated_user(user_to_index) + searchmodule = SEARCH_BACKENDS[searchoption] + searchmodule.push_updated_user(user_to_index) @current_app.celery.task(ignore_result=True) @@ -167,7 +167,9 @@ def node_save(node_id: str): to_index = prepare_node_data(node_id) - algolia_indexing.index_node_save(to_index) + for searchoption in current_app.config['SEARCH_BACKENDS']: + searchmodule = SEARCH_BACKENDS[searchoption] + searchmodule.index_node_save(to_index) @current_app.celery.task(ignore_result=True) @@ -177,3 +179,7 @@ def node_delete(node_id: str): # No need to fetch anything from Mongo. delete_id = ObjectId(node_id) algolia_indexing.index_node_delete(delete_id) + + for searchoption in current_app.config['SEARCH_BACKENDS']: + searchmodule = SEARCH_BACKENDS[searchoption] + searchmodule.index_node_delete(delete_id) diff --git a/pillar/cli/__init__.py b/pillar/cli/__init__.py index 9ff08f7a..1fec040d 100644 --- a/pillar/cli/__init__.py +++ b/pillar/cli/__init__.py @@ -12,6 +12,8 @@ from pillar.cli.celery import manager_celery from pillar.cli.maintenance import manager_maintenance from pillar.cli.operations import manager_operations from pillar.cli.setup import manager_setup +from pillar.cli.elastic import manager_elk + from pillar.cli import translations log = logging.getLogger(__name__) @@ -20,4 +22,5 @@ manager = Manager(current_app) manager.add_command('celery', manager_celery) manager.add_command("maintenance", manager_maintenance) manager.add_command("setup", manager_setup) -manager.add_command("operations", manager_operations) \ No newline at end of file +manager.add_command("operations", manager_operations) +manager.add_command("elastic", manager_elk) diff --git a/pillar/cli/elastic.py b/pillar/cli/elastic.py new file mode 100644 index 00000000..dcadc55f --- /dev/null +++ b/pillar/cli/elastic.py @@ -0,0 +1,42 @@ +import logging + +from flask_script import Manager + +from pillar import current_app + +log = logging.getLogger(__name__) + +manager_elk = Manager( + current_app, usage="Elastic utilities, like reset_index()") + + +@manager_elk.command +def reset_index(elk_index): + """ + Destroy and recreate elastic indices + + node, user ... + """ + #real_current_app = current_app._get_current_object()._get_current_object() + + with current_app.app_context(): + from pillar.api.search import index + if elk_index == 'nodes': + index.reset_node_index() + + +@manager_elk.command +def reindex_nodes(): + + db = current_app.db() + nodes_coll = db['nodes'] + node_count = nodes_coll.count() + + log.debug('Reindexing %d in Elastic', node_count) + + from pillar.celery.search_index_tasks import prepare_node_data + from pillar.api.search import elastic_indexing + + for node in nodes_coll.find(): + to_index = prepare_node_data('', node=node) + elastic_indexing.index_node_save(to_index) diff --git a/pillar/config.py b/pillar/config.py index deb60871..42b19fb1 100644 --- a/pillar/config.py +++ b/pillar/config.py @@ -75,6 +75,12 @@ ALGOLIA_INDEX_NODES = 'dev_Nodes' SEARCH_BACKENDS = ['algolia', 'elastic'] +ELASTIC_SEARCH_HOSTS = ['elasticsearch'] +ELASTIC_INDICES = { + 'NODE': 'nodes', + 'USER': 'users', +} + ZENCODER_API_KEY = '-SECRET-' ZENCODER_NOTIFICATIONS_SECRET = '-SECRET-' From 5a9a2c426878ea0e65a53c68406f3f1b23c6e08f Mon Sep 17 00:00:00 2001 From: Stephan Preeker Date: Fri, 10 Nov 2017 19:09:14 +0100 Subject: [PATCH 5/7] T53161 WIP javascript search WIP WIP --- src/scripts/tutti/4_search.js | 36 +++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/src/scripts/tutti/4_search.js b/src/scripts/tutti/4_search.js index c705fa1a..9c53603c 100644 --- a/src/scripts/tutti/4_search.js +++ b/src/scripts/tutti/4_search.js @@ -7,16 +7,44 @@ $(document).ready(function() { if (typeof algoliaIndex === 'undefined') return; + var elasticSearch = function() { + + console.log('yay'); + + return function findMatches(q, cb, async){ + let newhits = []; + // do a query + console.log(q); + $.getJSON("/api/newsearch?q=" + q, function( data ) { + console.log(data.hits.hits); + let hits = data.hits.hits; + newhits = hits.map(function(hit){ + console.log(hit._source); + return hit._source; + }); + console.log(newhits); + cb(newhits); + console.log(cb); + async(); + }); + //api/newsearch?q=test%20dji + // return the matches.. + //cb(matches); + }; + + }; var searchInput = $('#cloud-search'); var tu = searchInput.typeahead({hint: true}, { - source: algoliaIndex.ttAdapter(), + //source: algoliaIndex.ttAdapter(), + source: elasticSearch(), displayKey: 'name', limit: 10, minLength: 0, templates: { suggestion: function(hit) { - + console.log('hit2'); + console.log(hit); var hitMedia = (hit.media ? ' · '+hit.media+'' : ''); var hitFree = (hit.is_free ? '
free
' : ''); var hitPicture; @@ -76,6 +104,7 @@ $(document).ready(function() { }); searchInput.keyup(function(e) { + console.log('upupup'); if ( $('.tt-dataset').is(':empty') ){ if(e.keyCode == 13){ window.location.href = '/search#q='+ $("#cloud-search").val() + '&page=1'; @@ -84,6 +113,9 @@ $(document).ready(function() { }); searchInput.bind('typeahead:render', function(event, suggestions, async, dataset) { + console.log('woot'); + console.log(suggestions); + console.log(dataset); if( suggestions != undefined && $('.tt-all-results').length <= 0){ $('.tt-dataset').append( '' + From 235a88d613d7e3e7f962dd477dc2e2603e590b06 Mon Sep 17 00:00:00 2001 From: Stephan Preeker Date: Fri, 17 Nov 2017 14:05:24 +0100 Subject: [PATCH 6/7] T53161 Proof of Concept working --- pillar/api/search/documents.py | 33 +++++++++++++-------- pillar/api/search/queries.py | 6 ++-- src/scripts/tutti/4_search.js | 54 +++++++++++++--------------------- 3 files changed, 45 insertions(+), 48 deletions(-) diff --git a/pillar/api/search/documents.py b/pillar/api/search/documents.py index 2638479f..0c3d5249 100644 --- a/pillar/api/search/documents.py +++ b/pillar/api/search/documents.py @@ -51,22 +51,29 @@ class Node(es.DocType): analyzer=autocomplete ) - user_id = es.Keyword() - user_name = es.String( - fielddata=True, - analyzer=autocomplete - ) + user = es.Object({ + 'fields': { + 'id': es.Keyword(), + 'name': es.String( + fielddata=True, + analyzer=autocomplete) + } + }) description = es.String() is_free = es.Boolean() - project_id = es.Keyword() - project_name = es.String() + project = es.Object({ + 'fields': { + 'id': es.Keyword(), + 'name': es.Keyword(), + } + }) media = es.Keyword() - picture_url = es.Keyword() + picture = es.Keyword() tags = es.Keyword(multi=True) license_notes = es.String() @@ -92,15 +99,15 @@ def create_doc_from_node_data(node_to_index): doc.node_type = node_to_index['node_type'] doc.name = node_to_index['name'] - doc.user_id = str(node_to_index['user']['_id']) - doc.user_name = node_to_index['user']['full_name'] - doc.project_id = str(node_to_index['project']['_id']) - doc.project_name = node_to_index['project']['name'] + doc.user.id = str(node_to_index['user']['_id']) + doc.user.name = node_to_index['user']['full_name'] + doc.project.id = str(node_to_index['project']['_id']) + doc.project.name = node_to_index['project']['name'] if node_to_index['node_type'] == 'asset': doc.media = node_to_index['media'] - doc.picture_url = node_to_index.get('picture') + doc.picture = node_to_index.get('picture') doc.tags = node_to_index.get('tags') doc.license_notes = node_to_index.get('license_notes') diff --git a/pillar/api/search/queries.py b/pillar/api/search/queries.py index 9aaa7bbf..26d7698d 100644 --- a/pillar/api/search/queries.py +++ b/pillar/api/search/queries.py @@ -24,8 +24,10 @@ def do_search(query: str) -> dict: """ should = [ Q('match', name=query), - Q('match', user_name=query), - Q('match', project_name=query), + + {"match": {"project.name": query}}, + {"match": {"user.name": query}}, + Q('match', description=query), Q('term', media=query), Q('term', tags=query), diff --git a/src/scripts/tutti/4_search.js b/src/scripts/tutti/4_search.js index 9c53603c..63ae6834 100644 --- a/src/scripts/tutti/4_search.js +++ b/src/scripts/tutti/4_search.js @@ -5,46 +5,38 @@ $(document).ready(function() { - if (typeof algoliaIndex === 'undefined') return; + var getSearch = function(q, cb, async){ + let newhits = []; + $.getJSON("/api/newsearch?q=" + q, function( data ) { + let hits = data.hits.hits; + newhits = hits.map(function(hit){ + return hit._source; + }); + cb(newhits.slice(0)); + async(newhits.slice(0)); + }); + }; - var elasticSearch = function() { - - console.log('yay'); + var elasticSearch = (function() { return function findMatches(q, cb, async){ - let newhits = []; - // do a query - console.log(q); - $.getJSON("/api/newsearch?q=" + q, function( data ) { - console.log(data.hits.hits); - let hits = data.hits.hits; - newhits = hits.map(function(hit){ - console.log(hit._source); - return hit._source; - }); - console.log(newhits); - cb(newhits); - console.log(cb); - async(); - }); - //api/newsearch?q=test%20dji - // return the matches.. - //cb(matches); + if (!cb) { return; } + getSearch(q, cb, async); }; + }); - }; var searchInput = $('#cloud-search'); var tu = searchInput.typeahead({hint: true}, { //source: algoliaIndex.ttAdapter(), source: elasticSearch(), + //source: elkBlood(), + async: true, displayKey: 'name', limit: 10, minLength: 0, templates: { suggestion: function(hit) { - console.log('hit2'); - console.log(hit); var hitMedia = (hit.media ? ' · '+hit.media+'' : ''); var hitFree = (hit.is_free ? '
free
' : ''); var hitPicture; @@ -55,7 +47,7 @@ $(document).ready(function() { hitPicture = '
'; hitPicture += (hit.media ? '' : ''); hitPicture += '
'; - }; + } var $span = $('').addClass('project').text(hit.project.name); var $searchHitName = $('
' + ''+ ''); - }; + } }); }); From 8b25024f6f73dfd32fcc91496645f47630b6d21a Mon Sep 17 00:00:00 2001 From: Stephan Preeker Date: Fri, 17 Nov 2017 16:05:22 +0100 Subject: [PATCH 7/7] T53161 Proof of Concept working USER search. WIP js. --- pillar/api/search/documents.py | 44 +++++++++++++++++++++-- pillar/api/search/elastic_indexing.py | 6 +++- pillar/api/search/index.py | 15 ++++++++ pillar/api/search/queries.py | 41 ++++++++++++++++++++++ pillar/api/search/routes.py | 50 ++++++++++++++++++++++++--- pillar/celery/search_index_tasks.py | 13 ++++--- pillar/cli/elastic.py | 48 ++++++++++++++++++++++--- src/scripts/tutti/4_search.js | 1 - 8 files changed, 200 insertions(+), 18 deletions(-) diff --git a/pillar/api/search/documents.py b/pillar/api/search/documents.py index 0c3d5249..bb029df3 100644 --- a/pillar/api/search/documents.py +++ b/pillar/api/search/documents.py @@ -28,7 +28,22 @@ autocomplete = es.analyzer( class User(es.DocType): """Elastic document describing user.""" - name = es.String( + objectID = es.Keyword() + + username = es.String( + fielddata=True, + analyzer=autocomplete, + ) + + full_name = es.String( + fielddata=True, + analyzer=autocomplete, + ) + + roles = es.Keyword(multi=True) + groups = es.Keyword(multi=True) + + email = es.String( fielddata=True, analyzer=autocomplete, ) @@ -86,8 +101,14 @@ class Node(es.DocType): def create_doc_from_user_data(user_to_index): - doc_id = user_to_index['objectID'] + doc_id = str(user_to_index['objectID']) doc = User(_id=doc_id) + doc.objectID = str(user_to_index['objectID']) + doc.username = user_to_index['username'] + doc.full_name = user_to_index['full_name'] + doc.roles = list(map(str, user_to_index['roles'])) + doc.groups = list(map(str, user_to_index['groups'])) + doc.email = user_to_index['email'] return doc @@ -95,8 +116,10 @@ def create_doc_from_node_data(node_to_index): # node stuff doc_id = str(node_to_index['objectID']) + doc = Node(_id=doc_id) + doc.objectID = str(node_to_index['objectID']) doc.node_type = node_to_index['node_type'] doc.name = node_to_index['name'] doc.user.id = str(node_to_index['user']['_id']) @@ -116,3 +139,20 @@ def create_doc_from_node_data(node_to_index): doc.updated_at = node_to_index['updated'] return doc + + +def create_doc_from_user(user_to_index: dict) -> User: + """ + Create a user document from user + """ + + doc_id = str(user_to_index['objectID']) + doc = User(_id=doc_id) + doc.objectID = str(user_to_index['objectID']) + doc.full_name = user_to_index['full_name'] + doc.username = user_to_index['username'] + doc.roles = user_to_index['roles'] + doc.groups = user_to_index['groups'] + doc.email = user_to_index['email'] + + return doc diff --git a/pillar/api/search/elastic_indexing.py b/pillar/api/search/elastic_indexing.py index a76f0fa3..1e372087 100644 --- a/pillar/api/search/elastic_indexing.py +++ b/pillar/api/search/elastic_indexing.py @@ -26,6 +26,10 @@ def push_updated_user(user_to_index: dict): user_to_index.get('username'), user_to_index.get('objectID')) + doc = documents.create_doc_from_user_data(user_to_index) + doc.save() + log.warning('CREATED ELK USER DOC') + def index_node_save(node_to_index: dict): @@ -37,7 +41,7 @@ def index_node_save(node_to_index: dict): doc = documents.create_doc_from_node_data(node_to_index) - log.warning('CREATED ELK DOC') + log.warning('CREATED ELK NODE DOC') doc.save() diff --git a/pillar/api/search/index.py b/pillar/api/search/index.py index d052f580..0ecc70d8 100644 --- a/pillar/api/search/index.py +++ b/pillar/api/search/index.py @@ -64,6 +64,21 @@ class ResetNodeIndex(ResetIndexTask): doc_types = [documents.Node] +class ResetUserIndex(ResetIndexTask): + index = current_app.config['ELASTIC_INDICES']['USER'] + doc_types = [documents.User] + + def reset_node_index(): resettask = ResetNodeIndex() resettask.execute() + + +def reset_index(indexnames): + if 'users' in indexnames: + resettask = ResetUserIndex() + resettask.execute() + if 'nodes' in indexnames: + resettask = ResetUserIndex() + resettask.execute() + diff --git a/pillar/api/search/queries.py b/pillar/api/search/queries.py index 26d7698d..2fe038be 100644 --- a/pillar/api/search/queries.py +++ b/pillar/api/search/queries.py @@ -42,3 +42,44 @@ def do_search(query: str) -> dict: response = search.execute() return response.to_dict() + + +def do_user_search(query: str) -> dict: + """ + return user objects + """ + should = [ + Q('match', username=query), + Q('match', full_name=query), + ] + bool_query = Q('bool', should=should) + search = Search(using=client) + search.query = bool_query + + if current_app.config['DEBUG']: + log.debug(json.dumps(search.to_dict(), indent=4)) + + response = search.execute() + + return response.to_dict() + + +def do_user_search_admin(query: str) -> dict: + """ + return users with all fields and aggregations + """ + should = [ + Q('match', username=query), + Q('match', email=query), + Q('match', full_name=query), + ] + bool_query = Q('bool', should=should) + search = Search(using=client) + search.query = bool_query + + if current_app.config['DEBUG']: + log.debug(json.dumps(search.to_dict(), indent=4)) + + response = search.execute() + + return response.to_dict() diff --git a/pillar/api/search/routes.py b/pillar/api/search/routes.py index 77f1a7e7..99a18be8 100644 --- a/pillar/api/search/routes.py +++ b/pillar/api/search/routes.py @@ -18,16 +18,58 @@ blueprint_search = Blueprint('elksearch', __name__) from . import queries -#@authorization.require_login(require_cap='subscriber') -@blueprint_search.route('/', methods=['GET']) -def search_nodes(): + +def _valid_search() -> [str, str]: + """ + Validate search parameters + """ searchword = request.args.get('q', '') if not searchword: - return 'You are forgetting a "?q=whatareyoulookingfor"' + return '', 'You are forgetting a "?q=whatareyoulookingfor"' + + return searchword, '' + + +@blueprint_search.route('/', methods=['GET']) +def search_nodes(): + + searchword, err = _valid_search() + if err: + return err data = queries.do_search(searchword) resp = Response(json.dumps(data), mimetype='application/json') return resp + + +@blueprint_search.route('/user', methods=['GET']) +def search_user(): + + searchword, err = _valid_search() + if err: + return err + + data = queries.do_user_search(searchword) + + resp = Response(json.dumps(data), mimetype='application/json') + return resp + + +@authorization.require_login(require_cap='admin') +@blueprint_search.route('/admin/user', methods=['GET']) +def search_user_admin(): + """ + User search over all fields. + """ + + searchword, err = _valid_search() + if err: + return err + + data = queries.do_user_search_admin(searchword) + + resp = Response(json.dumps(data), mimetype='application/json') + return resp diff --git a/pillar/celery/search_index_tasks.py b/pillar/celery/search_index_tasks.py index e5cac649..2c045e56 100644 --- a/pillar/celery/search_index_tasks.py +++ b/pillar/celery/search_index_tasks.py @@ -117,20 +117,23 @@ def prepare_node_data(node_id: str, node=None) -> dict: return to_index -def prepare_user_data(user_id: str) -> dict: +def prepare_user_data(user_id: str, user=None) -> dict: """ Prepare data to index for user node """ - user_oid = ObjectId(user_id) - log.info('Retrieving user %s', user_oid) - users_coll = current_app.db('users') - user = users_coll.find_one({'_id': user_oid}) + if not user: + user_oid = ObjectId(user_id) + log.info('Retrieving user %s', user_oid) + users_coll = current_app.db('users') + user = users_coll.find_one({'_id': user_oid}) + if user is None: log.warning('Unable to find user %s, not updating Algolia.', user_oid) return user_roles = set(user.get('roles', ())) + if 'service' in user_roles: return diff --git a/pillar/cli/elastic.py b/pillar/cli/elastic.py index dcadc55f..cee03f81 100644 --- a/pillar/cli/elastic.py +++ b/pillar/cli/elastic.py @@ -9,25 +9,49 @@ log = logging.getLogger(__name__) manager_elk = Manager( current_app, usage="Elastic utilities, like reset_index()") +indexes = ['users', 'nodes'] + @manager_elk.command -def reset_index(elk_index): +def reset_index(elk_index=None): """ Destroy and recreate elastic indices node, user ... """ - #real_current_app = current_app._get_current_object()._get_current_object() with current_app.app_context(): from pillar.api.search import index + if not elk_index: + index.reset_index(indexes) + return if elk_index == 'nodes': - index.reset_node_index() + index.reset_index(['node']) + return + if elk_index == 'users': + index.reset_index(['user']) + return -@manager_elk.command -def reindex_nodes(): +def _reindex_users(): + db = current_app.db() + users_coll = db['users'] + user_count = users_coll.count() + log.debug('Reindexing %d in Elastic', user_count) + + from pillar.celery.search_index_tasks import prepare_user_data + from pillar.api.search import elastic_indexing + + for user in users_coll.find(): + to_index = prepare_user_data('', user=user) + if not to_index: + log.debug('missing user..') + continue + elastic_indexing.push_updated_user(to_index) + + +def _reindex_nodes(): db = current_app.db() nodes_coll = db['nodes'] node_count = nodes_coll.count() @@ -40,3 +64,17 @@ def reindex_nodes(): for node in nodes_coll.find(): to_index = prepare_node_data('', node=node) elastic_indexing.index_node_save(to_index) + + +@manager_elk.command +def reindex(indexname=None): + + if not indexname: + log.debug('reindex everything..') + _reindex_nodes() + _reindex_users() + + elif indexname == 'users': + _reindex_users() + elif indexname == 'nodes': + _reindex_nodes() diff --git a/src/scripts/tutti/4_search.js b/src/scripts/tutti/4_search.js index 63ae6834..b73068ff 100644 --- a/src/scripts/tutti/4_search.js +++ b/src/scripts/tutti/4_search.js @@ -30,7 +30,6 @@ $(document).ready(function() { var tu = searchInput.typeahead({hint: true}, { //source: algoliaIndex.ttAdapter(), source: elasticSearch(), - //source: elkBlood(), async: true, displayKey: 'name', limit: 10,