Synchronise badges with Blender ID

Synchronisation is performed in the background by the Celery Beat, every 10 minutes. It has a time limit of 9 minutes to prevent multiple refresh tasks from running at the same time. Synchronisation is also possible with the `manage.py badges sync` CLI command, which can sync either a single user or all users.
2018-09-12 14:27:44 +02:00
parent 0983474e76
commit 9a9ca1bf8b
10 changed files with 475 additions and 4 deletions
--- a/pillar/init.py
+++ b/pillar/init.py
@@ -477,6 +477,7 @@ class PillarServer(BlinkerCompatibleEve):

        # Pillar-defined Celery task modules:
        celery_task_modules = [
+            'pillar.celery.badges',
            'pillar.celery.email_tasks',
            'pillar.celery.file_link_tasks',
            'pillar.celery.search_index_tasks',
--- a/pillar/api/eve_settings.py
+++ b/pillar/api/eve_settings.py
@@ -152,6 +152,14 @@ users_schema = {
        },
    },

+    'badges': {
+        'type': 'dict',
+        'schema': {
+            'html': {'type': 'string'},  # HTML fetched from Blender ID.
+            'expires': {'type': 'datetime'},  # When we should fetch it again.
+        },
+    },
+
    # Properties defined by extensions. Extensions should use their name (see the
    # PillarExtension.name property) as the key, and are free to use whatever they want as value,
    # but we suggest a dict for future extendability.
--- a/pillar/badge_sync.py
+++ b/pillar/badge_sync.py
@@ -0,0 +1,185 @@
+import collections
+import datetime
+import logging
+import typing
+from urllib.parse import urljoin
+
+import bson
+import requests
+
+from pillar import current_app
+from pillar.api.utils import utcnow
+
+SyncUser = collections.namedtuple('SyncUser', 'user_id token bid_user_id')
+BadgeHTML = collections.namedtuple('BadgeHTML', 'html expires')
+log = logging.getLogger(__name__)
+
+
+class StopRefreshing(Exception):
+    """Indicates that Blender ID is having problems.
+
+    Further badge refreshes should be put on hold to avoid bludgeoning
+    a suffering Blender ID.
+    """
+
+
+def find_users_to_sync() -> typing.Iterable[SyncUser]:
+    """Return user information of syncable users with badges."""
+
+    now = utcnow()
+    tokens_coll = current_app.db('tokens')
+    cursor = tokens_coll.aggregate([
+        # Find all users who have a 'badge' scope in their OAuth token.
+        {'$match': {
+            'token': {'$exists': True},
+            'oauth_scopes': 'badge',
+            'expire_time': {'$gt': now},
+        }},
+        {'$lookup': {
+            'from': 'users',
+            'localField': 'user',
+            'foreignField': '_id',
+            'as': 'user'
+        }},
+
+        # Prevent 'user' from being an array.
+        {'$unwind': {'path': '$user'}},
+
+        # Get the Blender ID user ID only.
+        {'$unwind': {'path': '$user.auth'}},
+        {'$match': {'user.auth.provider': 'blender-id'}},
+
+        # Only select those users whose badge doesn't exist or has expired.
+        {'$match': {
+            'user.badges.expires': {'$not': {'$gt': now}}
+        }},
+
+        # Make sure that the badges that expire last are also refreshed last.
+        {'$sort': {'user.badges.expires': 1}},
+
+        # Reduce the document to the info we're after.
+        {'$project': {
+            'token': True,
+            'user._id': True,
+            'user.auth.user_id': True,
+            'user.badges.expires': True,
+        }},
+    ])
+
+    log.debug('Aggregating tokens and users')
+    for user_info in cursor:
+        log.debug('User %s has badges %s',
+                  user_info['user']['_id'], user_info['user'].get('badges'))
+        yield SyncUser(
+            user_id=user_info['user']['_id'],
+            token=user_info['token'],
+            bid_user_id=user_info['user']['auth']['user_id'])
+
+
+def fetch_badge_html(session: requests.Session, user: SyncUser, size: str) \
+        -> typing.Optional[BadgeHTML]:
+    """Fetch a Blender ID badge for this user.
+
+    :param session:
+    :param user:
+    :param size: Size indication for the badge images, see the Blender ID
+        documentation/code. As of this writing valid sizes are {'s', 'm', 'l'}.
+    """
+    my_log = log.getChild('fetch_badge_html')
+
+    blender_id_endpoint = current_app.config['BLENDER_ID_ENDPOINT']
+    url = urljoin(blender_id_endpoint, f'api/badges/{user.bid_user_id}/html/{size}')
+
+    my_log.debug('Fetching badge HTML at %s for user %s', url, user.user_id)
+    try:
+        resp = session.get(url, headers={'Authorization': f'Bearer {user.token}'})
+    except requests.ConnectionError as ex:
+        my_log.warning('Unable to connect to Blender ID at %s: %s', url, ex)
+        raise StopRefreshing()
+
+    if resp.status_code == 204:
+        my_log.debug('No badges for user %s', user.user_id)
+        return None
+    if resp.status_code == 403:
+        my_log.warning('Tried fetching %s for user %s but received a 403: %s',
+                       url, user.user_id, resp.text)
+        return None
+    if resp.status_code == 400:
+        my_log.warning('Blender ID did not accept our GET request at %s for user %s: %s',
+                       url, user.user_id, resp.text)
+        return None
+    if resp.status_code == 500:
+        my_log.warning('Blender ID returned an internal server error on %s for user %s, '
+                       'aborting all badge refreshes: %s', url, user.user_id, resp.text)
+        raise StopRefreshing()
+    if resp.status_code == 404:
+        my_log.warning('Blender ID has no user %s for our user %s', user.bid_user_id, user.user_id)
+        return None
+    resp.raise_for_status()
+
+    my_log.debug('Received new badge HTML from %s for user %s', url, user.user_id)
+    badge_expiry = badge_expiry_config()
+    return BadgeHTML(
+        html=resp.text,
+        expires=utcnow() + badge_expiry,
+    )
+
+
+def refresh_all_badges(only_user_id: typing.Optional[bson.ObjectId] = None, *,
+                       dry_run=False,
+                       timelimit: datetime.timedelta):
+    """Re-fetch all badges for all users, except when already refreshed recently.
+
+    :param only_user_id: Only refresh this user. This is expected to be used
+        sparingly during manual maintenance / debugging sessions only. It does
+        fetch all users to refresh, and in Python code skips all except the
+        given one.
+    :param dry_run: if True the changes are described in the log, but not performed.
+    :param timelimit: Refreshing will stop after this time. This allows for cron(-like)
+        jobs to run without overlapping, even when the number fo badges to refresh
+        becomes larger than possible within the period of the cron job.
+    """
+    from requests.adapters import HTTPAdapter
+    my_log = log.getChild('fetch_badge_html')
+
+    # Test the config before we start looping over the world.
+    badge_expiry = badge_expiry_config()
+    if not badge_expiry or not isinstance(badge_expiry, datetime.timedelta):
+        raise ValueError('BLENDER_ID_BADGE_EXPIRY not configured properly, should be a timedelta')
+
+    session = requests.Session()
+    session.mount('https://', HTTPAdapter(max_retries=5))
+    users_coll = current_app.db('users')
+
+    deadline = utcnow() + timelimit
+
+    num_updates = 0
+    for user_info in find_users_to_sync():
+        if utcnow() > deadline:
+            my_log.info('Stopping badge refresh because the timelimit %s (H:MM:SS) was hit.',
+                        timelimit)
+            break
+
+        if only_user_id and user_info.user_id != only_user_id:
+            my_log.debug('Skipping user %s', user_info.user_id)
+            continue
+        try:
+            badge_info = fetch_badge_html(session, user_info, 's')
+        except StopRefreshing:
+            my_log.error('Blender ID has internal problems, stopping badge refreshing at user %s',
+                         user_info)
+            break
+
+        num_updates += 1
+        my_log.info('Updating badges HTML for Blender ID %s, user %s',
+                    user_info.bid_user_id, user_info.user_id)
+        if not dry_run:
+            result = users_coll.update_one({'_id': user_info.user_id},
+                                           {'$set': {'badges': badge_info._asdict()}})
+            if result.matched_count != 1:
+                my_log.warning('Unable to update badges for user %s', user_info.user_id)
+    my_log.info('Updated badges of %d users%s', num_updates, ' (dry-run)' if dry_run else '')
+
+
+def badge_expiry_config() -> datetime.timedelta:
+    return current_app.config.get('BLENDER_ID_BADGE_EXPIRY')
--- a/pillar/celery/badges.py
+++ b/pillar/celery/badges.py
@@ -0,0 +1,20 @@
+"""Badge HTML synchronisation.
+
+Note that this module can only be imported when an application context is
+active. Best to late-import this in the functions where it's needed.
+"""
+import datetime
+import logging
+
+from pillar import current_app, badge_sync
+
+log = logging.getLogger(__name__)
+
+
+@current_app.celery.task(ignore_result=True)
+def sync_badges_for_users(timelimit_seconds: int):
+    """Synchronises Blender ID badges for the most-urgent users."""
+
+    timelimit = datetime.timedelta(seconds=timelimit_seconds)
+    log.info('Refreshing badges, timelimit is %s (H:MM:SS)', timelimit)
+    badge_sync.refresh_all_badges(timelimit=timelimit)
--- a/pillar/cli/init.py
+++ b/pillar/cli/init.py
@@ -13,6 +13,7 @@ from pillar.cli.maintenance import manager_maintenance
 from pillar.cli.operations import manager_operations
 from pillar.cli.setup import manager_setup
 from pillar.cli.elastic import manager_elastic
+from . import badges

 from pillar.cli import translations

@@ -24,3 +25,4 @@ manager.add_command("maintenance", manager_maintenance)
 manager.add_command("setup", manager_setup)
 manager.add_command("operations", manager_operations)
 manager.add_command("elastic", manager_elastic)
+manager.add_command("badges", badges.manager)
--- a/pillar/cli/badges.py
+++ b/pillar/cli/badges.py
@@ -0,0 +1,39 @@
+import datetime
+import logging
+
+from flask_script import Manager
+from pillar import current_app, badge_sync
+from pillar.api.utils import utcnow
+
+log = logging.getLogger(__name__)
+
+manager = Manager(current_app, usage="Badge operations")
+
+
+@manager.option('-u', '--user', dest='email', default='', help='Email address of the user to sync')
+@manager.option('-a', '--all', dest='sync_all', action='store_true', default=False,
+                help='Sync all users')
+@manager.option('--go', action='store_true', default=False,
+                help='Actually perform the sync; otherwise it is a dry-run.')
+def sync(email: str = '', sync_all: bool=False, go: bool=False):
+    if bool(email) == bool(sync_all):
+        raise ValueError('Use either --user or --all.')
+
+    if email:
+        users_coll = current_app.db('users')
+        db_user = users_coll.find_one({'email': email}, projection={'_id': True})
+        if not db_user:
+            raise ValueError(f'No user with email {email!r} found')
+        specific_user = db_user['_id']
+    else:
+        specific_user = None
+
+    if not go:
+        log.info('Performing dry-run, not going to change the user database.')
+    start_time = utcnow()
+    badge_sync.refresh_all_badges(specific_user, dry_run=not go,
+                                  timelimit=datetime.timedelta(hours=1))
+    end_time = utcnow()
+    log.info('%s took %s (H:MM:SS)',
+             'Updating user badges' if go else 'Dry-run',
+             end_time - start_time)
--- a/pillar/config.py
+++ b/pillar/config.py
@@ -1,6 +1,8 @@
+from collections import defaultdict
+import datetime
 import os.path
 from os import getenv
-from collections import defaultdict
+
 import requests.certs

 # Certificate file for communication with other systems.
@@ -204,8 +206,18 @@ CELERY_BEAT_SCHEDULE = {
        'schedule': 600,  # every N seconds
        'args': ('gcs', 100)
    },
+    'refresh-blenderid-badges': {
+        'task': 'pillar.celery.badges.sync_badges_for_users',
+        'schedule': 600,  # every N seconds
+        'args': (540, ),  # time limit in seconds, keep shorter than 'schedule'
+    }
 }

+# Badges will be re-fetched every timedelta.
+# TODO(Sybren): A proper value should be determined after we actually have users with badges.
+BLENDER_ID_BADGE_EXPIRY = datetime.timedelta(hours=4)
+
+
 # Mapping from user role to capabilities obtained by users with that role.
 USER_CAPABILITIES = defaultdict(**{
    'subscriber': {'subscriber', 'home-project'},
--- a/pillar/tests/init.py
+++ b/pillar/tests/init.py
@@ -349,15 +349,21 @@ class AbstractPillarTest(TestMinimal):
            with flask.request_started.connected_to(signal_handler, self.app):
                yield

-    def create_valid_auth_token(self, user_id, token='token'):
+    # TODO: rename to 'create_auth_token' now that 'expire_in_days' can be negative.
+    def create_valid_auth_token(self,
+                                user_id: ObjectId,
+                                token='token',
+                                *,
+                                oauth_scopes: typing.Optional[typing.List[str]]=None,
+                                expire_in_days=1) -> dict:
        from pillar.api.utils import utcnow

-        future = utcnow() + datetime.timedelta(days=1)
+        future = utcnow() + datetime.timedelta(days=expire_in_days)

        with self.app.test_request_context():
            from pillar.api.utils import authentication as auth

-            token_data = auth.store_token(user_id, token, future, None)
+            token_data = auth.store_token(user_id, token, future, oauth_scopes=oauth_scopes)

        return token_data

--- a/pillar/web/users/routes.py
+++ b/pillar/web/users/routes.py
@@ -70,6 +70,9 @@ def oauth_callback(provider):
    db_user = find_user_in_db(user_info, provider=provider)
    db_id, status = upsert_user(db_user)

+    # TODO(Sybren): If the user doesn't have any badges, but the access token
+    # does have 'badge' scope, we should fetch the badges in the background.
+
    if oauth_user.access_token:
        # TODO(Sybren): make nr of days configurable, or get from OAuthSignIn subclass.
        token_expiry = utcnow() + datetime.timedelta(days=15)