From 62954ac1573082249dbe1a748d3b1982703edbbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sybren=20A=2E=20St=C3=BCvel?= Date: Fri, 15 Sep 2017 16:47:40 +0200 Subject: [PATCH] Latest assets/comments: using Mongo aggregation instead of Python code --- pillar/api/latest.py | 118 +++++++++++++++++++++---------------------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/pillar/api/latest.py b/pillar/api/latest.py index c834e713..8cffbb78 100644 --- a/pillar/api/latest.py +++ b/pillar/api/latest.py @@ -1,5 +1,8 @@ +import functools import itertools +import typing +import bson import pymongo from flask import Blueprint, current_app @@ -12,99 +15,95 @@ def keep_fetching(collection, db_filter, projection, sort, py_filter, batch_size=12): """Yields results for which py_filter returns True""" - projection['_deleted'] = 1 + db_filter['_deleted'] = {'$ne': True} curs = collection.find(db_filter, projection).sort(sort) curs.batch_size(batch_size) for doc in curs: - if doc.get('_deleted'): - continue - doc.pop('_deleted', None) if py_filter(doc): yield doc -def latest_nodes(db_filter, projection, py_filter, limit): - nodes = current_app.data.driver.db['nodes'] +def _public_project_ids() -> typing.List[bson.ObjectId]: + """Returns a list of ObjectIDs of public projects. + + Memoized in setup_app(). + """ + + proj_coll = current_app.db('projects') + result = proj_coll.find({'is_private': False}, {'_id': 1}) + return [p['_id'] for p in result] + + +def latest_nodes(db_filter, projection, limit): + """Returns the latest nodes, of a certain type, of public projects. + + Also includes information about the project and the user of each node. + """ proj = { '_created': 1, '_updated': 1, + 'user.full_name': 1, + 'project._id': 1, + 'project.url': 1, + 'project.name': 1, + 'name': 1, + 'node_type': 1, + 'parent': 1, + **projection, } - proj.update(projection) - latest = keep_fetching(nodes, db_filter, proj, - [('_created', pymongo.DESCENDING)], - py_filter, limit) + nodes_coll = current_app.db('nodes') + pipeline = [ + {'$match': {'_deleted': {'$ne': True}}}, + {'$match': db_filter}, + {'$match': {'project': {'$in': _public_project_ids()}}}, + {'$sort': {'_created': pymongo.DESCENDING}}, + {'$limit': limit}, + {'$lookup': {"from": "users", + "localField": "user", + "foreignField": "_id", + "as": "user"}}, + {'$unwind': {'path': "$user"}}, + {'$lookup': {"from": "projects", + "localField": "project", + "foreignField": "_id", + "as": "project"}}, + {'$unwind': {'path': "$project"}}, + {'$project': proj}, + ] - result = list(itertools.islice(latest, limit)) - return result - - -def has_public_project(node_doc): - """Returns True iff the project the node belongs to is public.""" - - project_id = node_doc.get('project') - return is_project_public(project_id) - - -# TODO: cache result, for a limited amt. of time, or for this HTTP request. -def is_project_public(project_id): - """Returns True iff the project is public.""" - - project = current_app.data.driver.db['projects'].find_one(project_id) - if not project: - return False - - return not project.get('is_private') + print('QUERY: db.nodes.aggregate(%r)' % pipeline) + latest = nodes_coll.aggregate(pipeline) + return list(latest) @blueprint.route('/assets') def latest_assets(): latest = latest_nodes({'node_type': 'asset', 'properties.status': 'published'}, - {'name': 1, 'project': 1, 'user': 1, 'node_type': 1, + {'name': 1, 'node_type': 1, 'parent': 1, 'picture': 1, 'properties.status': 1, 'properties.content_type': 1, 'permissions.world': 1}, - has_public_project, 12) - - embed_user(latest) - embed_project(latest) + 12) return jsonify({'_items': latest}) -def embed_user(latest): - users = current_app.data.driver.db['users'] - - for comment in latest: - user_id = comment['user'] - comment['user'] = users.find_one(user_id, { - 'auth': 0, 'groups': 0, 'roles': 0, 'settings': 0, 'email': 0, - '_created': 0, '_updated': 0, '_etag': 0}) - - -def embed_project(latest): - projects = current_app.data.driver.db['projects'] - - for comment in latest: - project_id = comment['project'] - comment['project'] = projects.find_one(project_id, {'_id': 1, 'name': 1, - 'url': 1}) - - @blueprint.route('/comments') def latest_comments(): latest = latest_nodes({'node_type': 'comment', 'properties.status': 'published'}, - {'project': 1, 'parent': 1, 'user': 1, + {'parent': 1, 'properties.content': 1, 'node_type': 1, 'properties.status': 1, 'properties.is_reply': 1}, - has_public_project, 10) + 10) # Embed the comments' parents. + # TODO: move to aggregation pipeline. nodes = current_app.data.driver.db['nodes'] parents = {} for comment in latest: @@ -118,11 +117,12 @@ def latest_comments(): parents[parent_id] = parent comment['parent'] = parent - embed_project(latest) - embed_user(latest) - return jsonify({'_items': latest}) def setup_app(app, url_prefix): + global _public_project_ids + app.register_api_blueprint(blueprint, url_prefix=url_prefix) + cached = app.cache.cached(timeout=3600) + _public_project_ids = cached(_public_project_ids)