wip D2950

This commit is contained in:
Stephan preeker 2017-12-15 17:33:06 +01:00
parent 8206186426
commit 7c6425ff4d
9 changed files with 86 additions and 89 deletions

View File

@ -1,13 +1,7 @@
import logging import logging
#import bson
#from flask import current_app
from .routes import blueprint_search from .routes import blueprint_search
log = logging.getLogger(__name__)
def setup_app(app, url_prefix: str =None): def setup_app(app, url_prefix: str =None):
app.register_api_blueprint( app.register_api_blueprint(
blueprint_search, url_prefix=url_prefix) blueprint_search, url_prefix=url_prefix)

View File

@ -30,8 +30,9 @@ def index_node_save(node_to_index: dict):
def index_node_delete(delete_id: str): def index_node_delete(delete_id: str):
""" delete node using id"""
from pillar.api.utils import algolia from pillar.api.utils import algolia
try: try:
algolia.index_node_delete(delete_id) algolia.index_node_delete(delete_id)
except AlgoliaException as ex: except AlgoliaException as ex:

View File

@ -1,10 +1,20 @@
"""
Define elasticsearch document mapping.
Elasticsearch consist of two parts.
Part 1: Define the documents in which you
define who fields will be indexed.
Part 2: Building elasticsearch json queries.
BOTH of these parts are equally importand to have
a search API that returns relevant results.
"""
import logging import logging
import elasticsearch_dsl as es import elasticsearch_dsl as es
from elasticsearch_dsl import analysis from elasticsearch_dsl import analysis
# from pillar import current_app
# define elasticsearch document mapping.
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -100,7 +110,14 @@ class Node(es.DocType):
index = 'nodes' index = 'nodes'
def create_doc_from_user_data(user_to_index): def create_doc_from_user_data(user_to_index: dict) -> User:
"""
We are indexing a user object which identical between search backends
see celery.search_index_task.py
this functions returns a proper ElasticSearch document
"""
if not user_to_index: if not user_to_index:
return return
@ -108,8 +125,8 @@ def create_doc_from_user_data(user_to_index):
doc_id = str(user_to_index.get('objectID', '')) doc_id = str(user_to_index.get('objectID', ''))
if not doc_id: if not doc_id:
log.error('ID missing %s', user_to_index) log.error('USER ID is missing %s', user_to_index)
return raise KeyError
doc = User(_id=doc_id) doc = User(_id=doc_id)
doc.objectID = str(user_to_index['objectID']) doc.objectID = str(user_to_index['objectID'])
@ -121,7 +138,14 @@ def create_doc_from_user_data(user_to_index):
return doc return doc
def create_doc_from_node_data(node_to_index): def create_doc_from_node_data(node_to_index: dict) -> Node:
"""
We are indexing a Node object which identical between search backends
see celery.search_index_task.py
this functions returns a proper ElasticSearch document
"""
if not node_to_index: if not node_to_index:
return return

View File

@ -6,6 +6,7 @@ from elasticsearch.exceptions import NotFoundError
from pillar import current_app from pillar import current_app
from . import documents from . import documents
log = logging.getLogger(__name__)
elk_hosts = current_app.config['ELASTIC_SEARCH_HOSTS'] elk_hosts = current_app.config['ELASTIC_SEARCH_HOSTS']
@ -14,13 +15,10 @@ connections.create_connection(
sniff_on_start=True, sniff_on_start=True,
timeout=20) timeout=20)
log = logging.getLogger(__name__)
def push_updated_user(user_to_index: dict): def push_updated_user(user_to_index: dict):
""" """
Push an update to the Elastic index when Push an update to the Elastic index when a user item is updated.
a user item is updated.
""" """
if not user_to_index: if not user_to_index:
return return
@ -30,11 +28,14 @@ def push_updated_user(user_to_index: dict):
if not doc: if not doc:
return return
log.debug('UPDATE USER %s', doc._id) log.debug('index update user elasticsearch %s', doc._id)
doc.save() doc.save()
def index_node_save(node_to_index: dict): def index_node_save(node_to_index: dict):
"""
Push an update to the Elastic index when a node item is saved.
"""
if not node_to_index: if not node_to_index:
return return
@ -44,13 +45,16 @@ def index_node_save(node_to_index: dict):
if not doc: if not doc:
return return
log.debug('CREATED ELK NODE DOC %s', doc._id) log.debug('index created node elasticsearch %s', doc._id)
doc.save() doc.save()
def index_node_delete(delete_id: str): def index_node_delete(delete_id: str):
"""
Delete node document from Elastic index useing a node id
"""
log.debug('NODE DELETE INDEXING %s', delete_id) log.debug('index node delete %s', delete_id)
try: try:
doc = documents.Node.get(id=delete_id) doc = documents.Node.get(id=delete_id)

View File

@ -1,10 +1,5 @@
import logging import logging
# import time from typing import List
# from elasticsearch import helpers
# import elasticsearch
# from elasticsearch.client import IndicesClient
from elasticsearch.exceptions import NotFoundError from elasticsearch.exceptions import NotFoundError
from elasticsearch_dsl.connections import connections from elasticsearch_dsl.connections import connections
@ -18,17 +13,14 @@ log = logging.getLogger(__name__)
class ResetIndexTask(object): class ResetIndexTask(object):
""" """ Clear and build index / mapping """
Clear and build index / mapping
"""
index_key = '' index_key = ''
"""Key into the ELASTIC_INDICES dict in the app config.""" """Key into the ELASTIC_INDICES dict in the app config."""
doc_types = [] doc_types: List[type] = []
name = 'remove index' name = 'remove index'
def __init__(self): def __init__(self):
if not self.index_key: if not self.index_key:
raise ValueError("No index specified") raise ValueError("No index specified")
@ -48,10 +40,10 @@ class ResetIndexTask(object):
try: try:
idx.delete(ignore=404) idx.delete(ignore=404)
log.info("Deleted index %s", index) log.info("Deleted index %s", index)
except AttributeError:
log.warning("Could not delete index '%s', ignoring", index)
except NotFoundError: except NotFoundError:
log.warning("Could not delete index '%s', ignoring", index) log.warning("Could not delete index '%s', ignoring", index)
else:
log.warning("Could not delete index '%s', ignoring", index)
# create doc types # create doc types
for dt in self.doc_types: for dt in self.doc_types:

View File

@ -1,7 +1,7 @@
import logging
import json import json
from elasticsearch import Elasticsearch from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search, Q from elasticsearch_dsl import Search, Q
import logging
from pillar import current_app from pillar import current_app
@ -23,13 +23,9 @@ def add_aggs_to_search(search, agg_terms):
def make_must(must: list, terms: dict) -> list: def make_must(must: list, terms: dict) -> list:
""" """ Given term parameters append must queries to the must list """
Given some term parameters
we must match those
"""
for field, value in terms.items(): for field, value in terms.items():
if value: if value:
must.append({'match': {field: value}}) must.append({'match': {field: value}})
@ -54,8 +50,10 @@ def nested_bool(must: list, should: list, terms: dict) -> Search:
def do_search(query: str, terms: dict) -> dict: def do_search(query: str, terms: dict) -> dict:
""" """
Given user input search for node/stuff Given user query input and term refinements
search for public published nodes
""" """
should = [ should = [
Q('match', name=query), Q('match', name=query),
@ -77,21 +75,19 @@ def do_search(query: str, terms: dict) -> dict:
search = nested_bool(must, should, terms) search = nested_bool(must, should, terms)
add_aggs_to_search(search, node_agg_terms) add_aggs_to_search(search, node_agg_terms)
if current_app.config['DEBUG']: if log.isEnabledFor(logging.DEBUG):
print(json.dumps(search.to_dict(), indent=4)) print(json.dumps(search.to_dict(), indent=4))
response = search.execute() response = search.execute()
if current_app.config['DEBUG']: if log.isEnabledFor(logging.DEBUG):
print(json.dumps(response.to_dict(), indent=4)) print(json.dumps(response.to_dict(), indent=4))
return response.to_dict() return response.to_dict()
def do_user_search(query: str, terms: dict) -> dict: def do_user_search(query: str, terms: dict) -> dict:
""" """ return user objects represented in elasicsearch result dict"""
return user objects
"""
should = [ should = [
Q('match', username=query), Q('match', username=query),
Q('match', full_name=query), Q('match', full_name=query),
@ -101,7 +97,7 @@ def do_user_search(query: str, terms: dict) -> dict:
Q('term', _type='user') Q('term', _type='user')
] ]
# We got an id field. we MUST find it. # We most likely got and id field. we MUST find it.
if len(query) == len('563aca02c379cf0005e8e17d'): if len(query) == len('563aca02c379cf0005e8e17d'):
must.append(Q('term', _id=query)) must.append(Q('term', _id=query))
@ -111,12 +107,12 @@ def do_user_search(query: str, terms: dict) -> dict:
search = nested_bool(must, should, terms) search = nested_bool(must, should, terms)
add_aggs_to_search(search, user_agg_terms) add_aggs_to_search(search, user_agg_terms)
if current_app.config['DEBUG']: if log.isEnabledFor(logging.DEBUG):
print(json.dumps(search.to_dict(), indent=4)) print(json.dumps(search.to_dict(), indent=4))
response = search.execute() response = search.execute()
if current_app.config['DEBUG']: if log.isEnabledFor(logging.DEBUG):
print(json.dumps(response.to_dict(), indent=4)) print(json.dumps(response.to_dict(), indent=4))
return response.to_dict() return response.to_dict()
@ -124,7 +120,8 @@ def do_user_search(query: str, terms: dict) -> dict:
def do_user_search_admin(query: str) -> dict: def do_user_search_admin(query: str) -> dict:
""" """
return users with all fields and aggregations return users seach result dict object
search all user fields and provide aggregation information
""" """
should = [ should = [
Q('match', username=query), Q('match', username=query),
@ -135,12 +132,12 @@ def do_user_search_admin(query: str) -> dict:
search = Search(using=client) search = Search(using=client)
search.query = bool_query search.query = bool_query
if current_app.config['DEBUG']: if log.isEnabledFor(logging.DEBUG):
log.debug(json.dumps(search.to_dict(), indent=4)) log.debug(json.dumps(search.to_dict(), indent=4))
response = search.execute() response = search.execute()
if current_app.config['DEBUG']: if log.isEnabledFor(logging.DEBUG):
log.debug(json.dumps(response.to_dict(), indent=4)) log.debug(json.dumps(response.to_dict(), indent=4))
return response.to_dict() return response.to_dict()

View File

@ -11,37 +11,28 @@ log = logging.getLogger(__name__)
blueprint_search = Blueprint('elksearch', __name__) blueprint_search = Blueprint('elksearch', __name__)
terms = [
'node_type', 'media',
'tags', 'is_free', 'projectname',
'roles',
]
def _valid_search() -> str: def _valid_search() -> str:
""" """ Returns search parameters """
Returns search parameters, raising error when missing. query = request.args.get('q', '')
""" return query
searchword = request.args.get('q', '')
# if not searchword:
# raise wz_exceptions.BadRequest(
# 'You are forgetting a "?q=whatareyoulookingfor"')
return searchword
def _term_filters() -> dict: def _term_filters() -> dict:
""" """
Check if frontent wants to filter stuff Check if frontent wants to filter stuff
on specific fields AKA facets on specific fields AKA facets
return mapping with term field name
and provided user term value
""" """
return {term: request.args.get(term, '') for term in terms}
terms = [
'node_type', 'media',
'tags', 'is_free', 'projectname',
'roles',
]
parsed_terms = {}
for term in terms:
parsed_terms[term] = request.args.get(term, '')
return parsed_terms
@blueprint_search.route('/') @blueprint_search.route('/')
@ -54,12 +45,9 @@ def search_nodes():
@blueprint_search.route('/user') @blueprint_search.route('/user')
def search_user(): def search_user():
searchword = _valid_search() searchword = _valid_search()
terms = _term_filters() terms = _term_filters()
data = queries.do_user_search(searchword, terms) data = queries.do_user_search(searchword, terms)
return jsonify(data) return jsonify(data)

View File

@ -21,7 +21,6 @@ def index_user_save(to_index_user: dict):
@skip_when_testing @skip_when_testing
def index_node_save(node_to_index): def index_node_save(node_to_index):
if not current_app.algolia_index_nodes: if not current_app.algolia_index_nodes:
return return
current_app.algolia_index_nodes.save_object(node_to_index) current_app.algolia_index_nodes.save_object(node_to_index)

View File

@ -1,10 +1,9 @@
import logging import logging
from bson import ObjectId from bson import ObjectId
from pillar import current_app from pillar import current_app
from pillar.api.file_storage import generate_link from pillar.api.file_storage import generate_link
from pillar.api.search import elastic_indexing from pillar.api.search import elastic_indexing
from pillar.api.search import algolia_indexing from pillar.api.search import algolia_indexing
@ -21,10 +20,7 @@ SEARCH_BACKENDS = {
def _get_node_from_id(node_id: str): def _get_node_from_id(node_id: str):
"""
"""
node_oid = ObjectId(node_id) node_oid = ObjectId(node_id)
log.info('Retrieving node %s', node_oid)
nodes_coll = current_app.db('nodes') nodes_coll = current_app.db('nodes')
node = nodes_coll.find_one({'_id': node_oid}) node = nodes_coll.find_one({'_id': node_oid})
@ -56,15 +52,19 @@ def _handle_picture(node: dict, to_index: dict):
is_public=True) is_public=True)
def prepare_node_data(node_id: str, node=None) -> dict: def prepare_node_data(node_id: str, node: dict=None) -> dict:
""" """
Given node build data object with fields to index Given node by id or actual node build data object with fields to index
""" """
if node_id and node:
raise ValueError("do not provide node and node_id together")
if node_id: if node_id:
node = _get_node_from_id(node_id) node = _get_node_from_id(node_id)
if node is None: if node is None:
log.warning('Unable to find node %s, not updating Algolia.', node_id) log.warning('Unable to find node %s, not updating.', node_id)
return return
if node['node_type'] not in INDEX_ALLOWED_NODE_TYPES: if node['node_type'] not in INDEX_ALLOWED_NODE_TYPES:
@ -95,8 +95,7 @@ def prepare_node_data(node_id: str, node=None) -> dict:
}, },
} }
if 'description' in node and node['description']: to_index['description'] = node.get('description')
to_index['description'] = node['description']
_handle_picture(node, to_index) _handle_picture(node, to_index)
@ -140,7 +139,7 @@ def prepare_user_data(user_id: str, user=None) -> dict:
# Strip unneeded roles # Strip unneeded roles
index_roles = user_roles.intersection(current_app.user_roles_indexable) index_roles = user_roles.intersection(current_app.user_roles_indexable)
log.debug('Pushed user %r to Search index', user['_id']) log.debug('Push user %r to Search index', user['_id'])
user_to_index = { user_to_index = {
'objectID': user['_id'], 'objectID': user['_id'],
@ -181,7 +180,6 @@ def node_delete(node_id: str):
# Deleting a node takes nothing more than the ID anyway. # Deleting a node takes nothing more than the ID anyway.
# No need to fetch anything from Mongo. # No need to fetch anything from Mongo.
delete_id = ObjectId(node_id) delete_id = ObjectId(node_id)
algolia_indexing.index_node_delete(delete_id)
for searchoption in current_app.config['SEARCH_BACKENDS']: for searchoption in current_app.config['SEARCH_BACKENDS']:
searchmodule = SEARCH_BACKENDS[searchoption] searchmodule = SEARCH_BACKENDS[searchoption]