Latest assets/comments: using Mongo aggregation instead of Python code

This commit is contained in:
Sybren A. Stüvel 2017-09-15 16:47:40 +02:00
parent 1c70d80b99
commit 62954ac157

View File

@ -1,5 +1,8 @@
import functools
import itertools
import typing
import bson
import pymongo
from flask import Blueprint, current_app
@ -12,99 +15,95 @@ def keep_fetching(collection, db_filter, projection, sort, py_filter,
batch_size=12):
"""Yields results for which py_filter returns True"""
projection['_deleted'] = 1
db_filter['_deleted'] = {'$ne': True}
curs = collection.find(db_filter, projection).sort(sort)
curs.batch_size(batch_size)
for doc in curs:
if doc.get('_deleted'):
continue
doc.pop('_deleted', None)
if py_filter(doc):
yield doc
def latest_nodes(db_filter, projection, py_filter, limit):
nodes = current_app.data.driver.db['nodes']
def _public_project_ids() -> typing.List[bson.ObjectId]:
"""Returns a list of ObjectIDs of public projects.
Memoized in setup_app().
"""
proj_coll = current_app.db('projects')
result = proj_coll.find({'is_private': False}, {'_id': 1})
return [p['_id'] for p in result]
def latest_nodes(db_filter, projection, limit):
"""Returns the latest nodes, of a certain type, of public projects.
Also includes information about the project and the user of each node.
"""
proj = {
'_created': 1,
'_updated': 1,
'user.full_name': 1,
'project._id': 1,
'project.url': 1,
'project.name': 1,
'name': 1,
'node_type': 1,
'parent': 1,
**projection,
}
proj.update(projection)
latest = keep_fetching(nodes, db_filter, proj,
[('_created', pymongo.DESCENDING)],
py_filter, limit)
nodes_coll = current_app.db('nodes')
pipeline = [
{'$match': {'_deleted': {'$ne': True}}},
{'$match': db_filter},
{'$match': {'project': {'$in': _public_project_ids()}}},
{'$sort': {'_created': pymongo.DESCENDING}},
{'$limit': limit},
{'$lookup': {"from": "users",
"localField": "user",
"foreignField": "_id",
"as": "user"}},
{'$unwind': {'path': "$user"}},
{'$lookup': {"from": "projects",
"localField": "project",
"foreignField": "_id",
"as": "project"}},
{'$unwind': {'path': "$project"}},
{'$project': proj},
]
result = list(itertools.islice(latest, limit))
return result
def has_public_project(node_doc):
"""Returns True iff the project the node belongs to is public."""
project_id = node_doc.get('project')
return is_project_public(project_id)
# TODO: cache result, for a limited amt. of time, or for this HTTP request.
def is_project_public(project_id):
"""Returns True iff the project is public."""
project = current_app.data.driver.db['projects'].find_one(project_id)
if not project:
return False
return not project.get('is_private')
print('QUERY: db.nodes.aggregate(%r)' % pipeline)
latest = nodes_coll.aggregate(pipeline)
return list(latest)
@blueprint.route('/assets')
def latest_assets():
latest = latest_nodes({'node_type': 'asset',
'properties.status': 'published'},
{'name': 1, 'project': 1, 'user': 1, 'node_type': 1,
{'name': 1, 'node_type': 1,
'parent': 1, 'picture': 1, 'properties.status': 1,
'properties.content_type': 1,
'permissions.world': 1},
has_public_project, 12)
embed_user(latest)
embed_project(latest)
12)
return jsonify({'_items': latest})
def embed_user(latest):
users = current_app.data.driver.db['users']
for comment in latest:
user_id = comment['user']
comment['user'] = users.find_one(user_id, {
'auth': 0, 'groups': 0, 'roles': 0, 'settings': 0, 'email': 0,
'_created': 0, '_updated': 0, '_etag': 0})
def embed_project(latest):
projects = current_app.data.driver.db['projects']
for comment in latest:
project_id = comment['project']
comment['project'] = projects.find_one(project_id, {'_id': 1, 'name': 1,
'url': 1})
@blueprint.route('/comments')
def latest_comments():
latest = latest_nodes({'node_type': 'comment',
'properties.status': 'published'},
{'project': 1, 'parent': 1, 'user': 1,
{'parent': 1,
'properties.content': 1, 'node_type': 1,
'properties.status': 1,
'properties.is_reply': 1},
has_public_project, 10)
10)
# Embed the comments' parents.
# TODO: move to aggregation pipeline.
nodes = current_app.data.driver.db['nodes']
parents = {}
for comment in latest:
@ -118,11 +117,12 @@ def latest_comments():
parents[parent_id] = parent
comment['parent'] = parent
embed_project(latest)
embed_user(latest)
return jsonify({'_items': latest})
def setup_app(app, url_prefix):
global _public_project_ids
app.register_api_blueprint(blueprint, url_prefix=url_prefix)
cached = app.cache.cached(timeout=3600)
_public_project_ids = cached(_public_project_ids)