Latest assets/comments: using Mongo aggregation instead of Python code

This commit is contained in:
2017-09-15 16:47:40 +02:00
parent 1c70d80b99
commit 62954ac157

View File

@@ -1,5 +1,8 @@
import functools
import itertools import itertools
import typing
import bson
import pymongo import pymongo
from flask import Blueprint, current_app from flask import Blueprint, current_app
@@ -12,99 +15,95 @@ def keep_fetching(collection, db_filter, projection, sort, py_filter,
batch_size=12): batch_size=12):
"""Yields results for which py_filter returns True""" """Yields results for which py_filter returns True"""
projection['_deleted'] = 1 db_filter['_deleted'] = {'$ne': True}
curs = collection.find(db_filter, projection).sort(sort) curs = collection.find(db_filter, projection).sort(sort)
curs.batch_size(batch_size) curs.batch_size(batch_size)
for doc in curs: for doc in curs:
if doc.get('_deleted'):
continue
doc.pop('_deleted', None)
if py_filter(doc): if py_filter(doc):
yield doc yield doc
def latest_nodes(db_filter, projection, py_filter, limit): def _public_project_ids() -> typing.List[bson.ObjectId]:
nodes = current_app.data.driver.db['nodes'] """Returns a list of ObjectIDs of public projects.
Memoized in setup_app().
"""
proj_coll = current_app.db('projects')
result = proj_coll.find({'is_private': False}, {'_id': 1})
return [p['_id'] for p in result]
def latest_nodes(db_filter, projection, limit):
"""Returns the latest nodes, of a certain type, of public projects.
Also includes information about the project and the user of each node.
"""
proj = { proj = {
'_created': 1, '_created': 1,
'_updated': 1, '_updated': 1,
'user.full_name': 1,
'project._id': 1,
'project.url': 1,
'project.name': 1,
'name': 1,
'node_type': 1,
'parent': 1,
**projection,
} }
proj.update(projection)
latest = keep_fetching(nodes, db_filter, proj, nodes_coll = current_app.db('nodes')
[('_created', pymongo.DESCENDING)], pipeline = [
py_filter, limit) {'$match': {'_deleted': {'$ne': True}}},
{'$match': db_filter},
{'$match': {'project': {'$in': _public_project_ids()}}},
{'$sort': {'_created': pymongo.DESCENDING}},
{'$limit': limit},
{'$lookup': {"from": "users",
"localField": "user",
"foreignField": "_id",
"as": "user"}},
{'$unwind': {'path': "$user"}},
{'$lookup': {"from": "projects",
"localField": "project",
"foreignField": "_id",
"as": "project"}},
{'$unwind': {'path': "$project"}},
{'$project': proj},
]
result = list(itertools.islice(latest, limit)) print('QUERY: db.nodes.aggregate(%r)' % pipeline)
return result latest = nodes_coll.aggregate(pipeline)
return list(latest)
def has_public_project(node_doc):
"""Returns True iff the project the node belongs to is public."""
project_id = node_doc.get('project')
return is_project_public(project_id)
# TODO: cache result, for a limited amt. of time, or for this HTTP request.
def is_project_public(project_id):
"""Returns True iff the project is public."""
project = current_app.data.driver.db['projects'].find_one(project_id)
if not project:
return False
return not project.get('is_private')
@blueprint.route('/assets') @blueprint.route('/assets')
def latest_assets(): def latest_assets():
latest = latest_nodes({'node_type': 'asset', latest = latest_nodes({'node_type': 'asset',
'properties.status': 'published'}, 'properties.status': 'published'},
{'name': 1, 'project': 1, 'user': 1, 'node_type': 1, {'name': 1, 'node_type': 1,
'parent': 1, 'picture': 1, 'properties.status': 1, 'parent': 1, 'picture': 1, 'properties.status': 1,
'properties.content_type': 1, 'properties.content_type': 1,
'permissions.world': 1}, 'permissions.world': 1},
has_public_project, 12) 12)
embed_user(latest)
embed_project(latest)
return jsonify({'_items': latest}) return jsonify({'_items': latest})
def embed_user(latest):
users = current_app.data.driver.db['users']
for comment in latest:
user_id = comment['user']
comment['user'] = users.find_one(user_id, {
'auth': 0, 'groups': 0, 'roles': 0, 'settings': 0, 'email': 0,
'_created': 0, '_updated': 0, '_etag': 0})
def embed_project(latest):
projects = current_app.data.driver.db['projects']
for comment in latest:
project_id = comment['project']
comment['project'] = projects.find_one(project_id, {'_id': 1, 'name': 1,
'url': 1})
@blueprint.route('/comments') @blueprint.route('/comments')
def latest_comments(): def latest_comments():
latest = latest_nodes({'node_type': 'comment', latest = latest_nodes({'node_type': 'comment',
'properties.status': 'published'}, 'properties.status': 'published'},
{'project': 1, 'parent': 1, 'user': 1, {'parent': 1,
'properties.content': 1, 'node_type': 1, 'properties.content': 1, 'node_type': 1,
'properties.status': 1, 'properties.status': 1,
'properties.is_reply': 1}, 'properties.is_reply': 1},
has_public_project, 10) 10)
# Embed the comments' parents. # Embed the comments' parents.
# TODO: move to aggregation pipeline.
nodes = current_app.data.driver.db['nodes'] nodes = current_app.data.driver.db['nodes']
parents = {} parents = {}
for comment in latest: for comment in latest:
@@ -118,11 +117,12 @@ def latest_comments():
parents[parent_id] = parent parents[parent_id] = parent
comment['parent'] = parent comment['parent'] = parent
embed_project(latest)
embed_user(latest)
return jsonify({'_items': latest}) return jsonify({'_items': latest})
def setup_app(app, url_prefix): def setup_app(app, url_prefix):
global _public_project_ids
app.register_api_blueprint(blueprint, url_prefix=url_prefix) app.register_api_blueprint(blueprint, url_prefix=url_prefix)
cached = app.cache.cached(timeout=3600)
_public_project_ids = cached(_public_project_ids)