wip D2950

This commit is contained in:
Stephan preeker 2017-12-15 17:33:06 +01:00
parent 8206186426
commit 7c6425ff4d
9 changed files with 86 additions and 89 deletions

View File

@ -1,13 +1,7 @@
import logging
#import bson
#from flask import current_app
from .routes import blueprint_search
log = logging.getLogger(__name__)
def setup_app(app, url_prefix: str =None):
app.register_api_blueprint(
blueprint_search, url_prefix=url_prefix)

View File

@ -30,8 +30,9 @@ def index_node_save(node_to_index: dict):
def index_node_delete(delete_id: str):
""" delete node using id"""
from pillar.api.utils import algolia
try:
algolia.index_node_delete(delete_id)
except AlgoliaException as ex:

View File

@ -1,10 +1,20 @@
"""
Define elasticsearch document mapping.
Elasticsearch consist of two parts.
Part 1: Define the documents in which you
define who fields will be indexed.
Part 2: Building elasticsearch json queries.
BOTH of these parts are equally importand to have
a search API that returns relevant results.
"""
import logging
import elasticsearch_dsl as es
from elasticsearch_dsl import analysis
# from pillar import current_app
# define elasticsearch document mapping.
log = logging.getLogger(__name__)
@ -100,7 +110,14 @@ class Node(es.DocType):
index = 'nodes'
def create_doc_from_user_data(user_to_index):
def create_doc_from_user_data(user_to_index: dict) -> User:
"""
We are indexing a user object which identical between search backends
see celery.search_index_task.py
this functions returns a proper ElasticSearch document
"""
if not user_to_index:
return
@ -108,8 +125,8 @@ def create_doc_from_user_data(user_to_index):
doc_id = str(user_to_index.get('objectID', ''))
if not doc_id:
log.error('ID missing %s', user_to_index)
return
log.error('USER ID is missing %s', user_to_index)
raise KeyError
doc = User(_id=doc_id)
doc.objectID = str(user_to_index['objectID'])
@ -121,7 +138,14 @@ def create_doc_from_user_data(user_to_index):
return doc
def create_doc_from_node_data(node_to_index):
def create_doc_from_node_data(node_to_index: dict) -> Node:
"""
We are indexing a Node object which identical between search backends
see celery.search_index_task.py
this functions returns a proper ElasticSearch document
"""
if not node_to_index:
return

View File

@ -6,6 +6,7 @@ from elasticsearch.exceptions import NotFoundError
from pillar import current_app
from . import documents
log = logging.getLogger(__name__)
elk_hosts = current_app.config['ELASTIC_SEARCH_HOSTS']
@ -14,13 +15,10 @@ connections.create_connection(
sniff_on_start=True,
timeout=20)
log = logging.getLogger(__name__)
def push_updated_user(user_to_index: dict):
"""
Push an update to the Elastic index when
a user item is updated.
Push an update to the Elastic index when a user item is updated.
"""
if not user_to_index:
return
@ -30,11 +28,14 @@ def push_updated_user(user_to_index: dict):
if not doc:
return
log.debug('UPDATE USER %s', doc._id)
log.debug('index update user elasticsearch %s', doc._id)
doc.save()
def index_node_save(node_to_index: dict):
"""
Push an update to the Elastic index when a node item is saved.
"""
if not node_to_index:
return
@ -44,13 +45,16 @@ def index_node_save(node_to_index: dict):
if not doc:
return
log.debug('CREATED ELK NODE DOC %s', doc._id)
log.debug('index created node elasticsearch %s', doc._id)
doc.save()
def index_node_delete(delete_id: str):
"""
Delete node document from Elastic index useing a node id
"""
log.debug('NODE DELETE INDEXING %s', delete_id)
log.debug('index node delete %s', delete_id)
try:
doc = documents.Node.get(id=delete_id)

View File

@ -1,10 +1,5 @@
import logging
# import time
# from elasticsearch import helpers
# import elasticsearch
# from elasticsearch.client import IndicesClient
from typing import List
from elasticsearch.exceptions import NotFoundError
from elasticsearch_dsl.connections import connections
@ -18,17 +13,14 @@ log = logging.getLogger(__name__)
class ResetIndexTask(object):
"""
Clear and build index / mapping
"""
""" Clear and build index / mapping """
index_key = ''
"""Key into the ELASTIC_INDICES dict in the app config."""
doc_types = []
doc_types: List[type] = []
name = 'remove index'
def __init__(self):
if not self.index_key:
raise ValueError("No index specified")
@ -48,10 +40,10 @@ class ResetIndexTask(object):
try:
idx.delete(ignore=404)
log.info("Deleted index %s", index)
except AttributeError:
log.warning("Could not delete index '%s', ignoring", index)
except NotFoundError:
log.warning("Could not delete index '%s', ignoring", index)
else:
log.warning("Could not delete index '%s', ignoring", index)
# create doc types
for dt in self.doc_types:

View File

@ -1,7 +1,7 @@
import logging
import json
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search, Q
import logging
from pillar import current_app
@ -23,13 +23,9 @@ def add_aggs_to_search(search, agg_terms):
def make_must(must: list, terms: dict) -> list:
"""
Given some term parameters
we must match those
"""
""" Given term parameters append must queries to the must list """
for field, value in terms.items():
if value:
must.append({'match': {field: value}})
@ -54,8 +50,10 @@ def nested_bool(must: list, should: list, terms: dict) -> Search:
def do_search(query: str, terms: dict) -> dict:
"""
Given user input search for node/stuff
Given user query input and term refinements
search for public published nodes
"""
should = [
Q('match', name=query),
@ -77,21 +75,19 @@ def do_search(query: str, terms: dict) -> dict:
search = nested_bool(must, should, terms)
add_aggs_to_search(search, node_agg_terms)
if current_app.config['DEBUG']:
if log.isEnabledFor(logging.DEBUG):
print(json.dumps(search.to_dict(), indent=4))
response = search.execute()
if current_app.config['DEBUG']:
if log.isEnabledFor(logging.DEBUG):
print(json.dumps(response.to_dict(), indent=4))
return response.to_dict()
def do_user_search(query: str, terms: dict) -> dict:
"""
return user objects
"""
""" return user objects represented in elasicsearch result dict"""
should = [
Q('match', username=query),
Q('match', full_name=query),
@ -101,7 +97,7 @@ def do_user_search(query: str, terms: dict) -> dict:
Q('term', _type='user')
]
# We got an id field. we MUST find it.
# We most likely got and id field. we MUST find it.
if len(query) == len('563aca02c379cf0005e8e17d'):
must.append(Q('term', _id=query))
@ -111,12 +107,12 @@ def do_user_search(query: str, terms: dict) -> dict:
search = nested_bool(must, should, terms)
add_aggs_to_search(search, user_agg_terms)
if current_app.config['DEBUG']:
if log.isEnabledFor(logging.DEBUG):
print(json.dumps(search.to_dict(), indent=4))
response = search.execute()
if current_app.config['DEBUG']:
if log.isEnabledFor(logging.DEBUG):
print(json.dumps(response.to_dict(), indent=4))
return response.to_dict()
@ -124,7 +120,8 @@ def do_user_search(query: str, terms: dict) -> dict:
def do_user_search_admin(query: str) -> dict:
"""
return users with all fields and aggregations
return users seach result dict object
search all user fields and provide aggregation information
"""
should = [
Q('match', username=query),
@ -135,12 +132,12 @@ def do_user_search_admin(query: str) -> dict:
search = Search(using=client)
search.query = bool_query
if current_app.config['DEBUG']:
if log.isEnabledFor(logging.DEBUG):
log.debug(json.dumps(search.to_dict(), indent=4))
response = search.execute()
if current_app.config['DEBUG']:
if log.isEnabledFor(logging.DEBUG):
log.debug(json.dumps(response.to_dict(), indent=4))
return response.to_dict()

View File

@ -11,37 +11,28 @@ log = logging.getLogger(__name__)
blueprint_search = Blueprint('elksearch', __name__)
terms = [
'node_type', 'media',
'tags', 'is_free', 'projectname',
'roles',
]
def _valid_search() -> str:
"""
Returns search parameters, raising error when missing.
"""
searchword = request.args.get('q', '')
# if not searchword:
# raise wz_exceptions.BadRequest(
# 'You are forgetting a "?q=whatareyoulookingfor"')
return searchword
""" Returns search parameters """
query = request.args.get('q', '')
return query
def _term_filters() -> dict:
"""
Check if frontent wants to filter stuff
on specific fields AKA facets
return mapping with term field name
and provided user term value
"""
terms = [
'node_type', 'media',
'tags', 'is_free', 'projectname',
'roles',
]
parsed_terms = {}
for term in terms:
parsed_terms[term] = request.args.get(term, '')
return parsed_terms
return {term: request.args.get(term, '') for term in terms}
@blueprint_search.route('/')
@ -54,12 +45,9 @@ def search_nodes():
@blueprint_search.route('/user')
def search_user():
searchword = _valid_search()
terms = _term_filters()
data = queries.do_user_search(searchword, terms)
return jsonify(data)

View File

@ -21,7 +21,6 @@ def index_user_save(to_index_user: dict):
@skip_when_testing
def index_node_save(node_to_index):
if not current_app.algolia_index_nodes:
return
current_app.algolia_index_nodes.save_object(node_to_index)

View File

@ -1,10 +1,9 @@
import logging
from bson import ObjectId
from pillar import current_app
from pillar.api.file_storage import generate_link
from pillar.api.search import elastic_indexing
from pillar.api.search import algolia_indexing
@ -21,10 +20,7 @@ SEARCH_BACKENDS = {
def _get_node_from_id(node_id: str):
"""
"""
node_oid = ObjectId(node_id)
log.info('Retrieving node %s', node_oid)
nodes_coll = current_app.db('nodes')
node = nodes_coll.find_one({'_id': node_oid})
@ -56,15 +52,19 @@ def _handle_picture(node: dict, to_index: dict):
is_public=True)
def prepare_node_data(node_id: str, node=None) -> dict:
def prepare_node_data(node_id: str, node: dict=None) -> dict:
"""
Given node build data object with fields to index
Given node by id or actual node build data object with fields to index
"""
if node_id and node:
raise ValueError("do not provide node and node_id together")
if node_id:
node = _get_node_from_id(node_id)
if node is None:
log.warning('Unable to find node %s, not updating Algolia.', node_id)
log.warning('Unable to find node %s, not updating.', node_id)
return
if node['node_type'] not in INDEX_ALLOWED_NODE_TYPES:
@ -95,8 +95,7 @@ def prepare_node_data(node_id: str, node=None) -> dict:
},
}
if 'description' in node and node['description']:
to_index['description'] = node['description']
to_index['description'] = node.get('description')
_handle_picture(node, to_index)
@ -140,7 +139,7 @@ def prepare_user_data(user_id: str, user=None) -> dict:
# Strip unneeded roles
index_roles = user_roles.intersection(current_app.user_roles_indexable)
log.debug('Pushed user %r to Search index', user['_id'])
log.debug('Push user %r to Search index', user['_id'])
user_to_index = {
'objectID': user['_id'],
@ -181,7 +180,6 @@ def node_delete(node_id: str):
# Deleting a node takes nothing more than the ID anyway.
# No need to fetch anything from Mongo.
delete_id = ObjectId(node_id)
algolia_indexing.index_node_delete(delete_id)
for searchoption in current_app.config['SEARCH_BACKENDS']:
searchmodule = SEARCH_BACKENDS[searchoption]