From f4e0b9185bb441165cc2b05a122caed2e1dbb576 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sybren=20A=2E=20St=C3=BCvel?= Date: Mon, 26 Mar 2018 11:58:09 +0200 Subject: [PATCH] Shortcodes for YouTube and iframes Added shortcodes 2.5.0 as dependency; Earlier versions corrupted non-ASCII characters, see https://github.com/dmulholland/shortcodes/issues/6 The rendered elements have a `shortcode` CSS class. The YouTube shortcode supports various ways to refer to a video: - `{youtube VideoID}` - `{youtube youtube.com or youtu.be URL}` URLs containing an '=' should be quoted, or otherwise the shortcodes library will parse it as "key=value" pair. The IFrame shortcode supports the `cap` and `nocap` attributes. `cap` indicates the required capability the user should have in order to render the tag. If `nocap` is given, its contents are shown as a message to users who do not have this tag; without it, the iframe is silently hidden. `{iframe src='https://source' cap='subscriber' nocap='Subscribe to view'}` Merged test code + added HTML class for shortcode iframes --- pillar/markdown.py | 10 +- pillar/shortcodes.py | 204 ++++++++++++++++++++++++++++++++ pillar/web/jinja.py | 42 +++++-- requirements.txt | 1 + setup.py | 1 + src/styles/base.sass | 7 ++ tests/test_api/test_markdown.py | 30 ++++- tests/test_shortcodes.py | 157 ++++++++++++++++++++++++ tests/test_web/test_jinja.py | 14 ++- 9 files changed, 443 insertions(+), 23 deletions(-) create mode 100644 pillar/shortcodes.py create mode 100644 tests/test_shortcodes.py diff --git a/pillar/markdown.py b/pillar/markdown.py index 2a5e7f74..6a44ff4a 100644 --- a/pillar/markdown.py +++ b/pillar/markdown.py @@ -6,6 +6,8 @@ This is for user-generated stuff, like comments. import bleach import CommonMark +from . import shortcodes + ALLOWED_TAGS = [ 'a', 'abbr', @@ -40,12 +42,14 @@ ALLOWED_STYLES = [ ] -def markdown(s): - tainted_html = CommonMark.commonmark(s) +def markdown(s: str) -> str: + commented_shortcodes = shortcodes.comment_shortcodes(s) + tainted_html = CommonMark.commonmark(commented_shortcodes) safe_html = bleach.clean(tainted_html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES, - styles=ALLOWED_STYLES) + styles=ALLOWED_STYLES, + strip_comments=False) return safe_html diff --git a/pillar/shortcodes.py b/pillar/shortcodes.py new file mode 100644 index 00000000..982a1399 --- /dev/null +++ b/pillar/shortcodes.py @@ -0,0 +1,204 @@ +"""Shortcode rendering. + +Shortcodes are little snippets between square brackets, which can be rendered +into HTML. Markdown passes such snippets unchanged to its HTML output, so this +module assumes its input is HTML-with-shortcodes. + +See mulholland.xyz/docs/shortcodes/. + +{iframe src='http://hey' has-cap='subscriber'} + +NOTE: nested braces fail, so something like {shortcode abc='{}'} is not +supported. + +NOTE: only single-line shortcodes are supported for now, due to the need to +pass them though Markdown unscathed. +""" +import html as html_module # I want to be able to use the name 'html' in local scope. +import logging +import re +import typing +import urllib.parse + +import shortcodes + +_parser: shortcodes.Parser = None +_commented_parser: shortcodes.Parser = None +log = logging.getLogger(__name__) + + +def shortcode(name: str): + """Class decorator for shortcodes.""" + + def decorator(cls): + assert hasattr(cls, '__call__'), '@shortcode should be used on callables.' + if isinstance(cls, type): + instance = cls() + else: + instance = cls + shortcodes.register(name)(instance) + return cls + + return decorator + + +@shortcode('test') +class Test: + def __call__(self, + context: typing.Any, + content: str, + pargs: typing.List[str], + kwargs: typing.Dict[str, str]) -> str: + """Just for testing. + + "{test abc='def'}" → "
test
abc
def
" + """ + + parts = ['
test
'] + + e = html_module.escape + parts.extend([ + f'
{e(key)}
{e(value)}
' for key, value in kwargs.items() + ]) + parts.append('
') + return ''.join(parts) + + +@shortcode('youtube') +class YouTube: + log = log.getChild('YouTube') + + def video_id(self, url: str) -> str: + """Find the video ID from a YouTube URL. + + :raise ValueError: when the ID cannot be determined. + """ + + if re.fullmatch(r'[a-zA-Z0-9_\-]+', url): + return url + + try: + parts = urllib.parse.urlparse(url) + if parts.netloc == 'youtu.be': + return parts.path.split('/')[1] + if parts.netloc in {'www.youtube.com', 'youtube.com'}: + if parts.path.startswith('/embed/'): + return parts.path.split('/')[2] + if parts.path.startswith('/watch'): + qs = urllib.parse.parse_qs(parts.query) + return qs['v'][0] + except (ValueError, IndexError, KeyError) as ex: + pass + + raise ValueError(f'Unable to parse YouTube URL {url!r}') + + def __call__(self, + context: typing.Any, + content: str, + pargs: typing.List[str], + kwargs: typing.Dict[str, str]) -> str: + """Embed a YouTube video. + + The first parameter must be the YouTube video ID or URL. The width and + height can be passed in the equally named keyword arguments. + """ + + width = kwargs.get('width', '560') + height = kwargs.get('height', '315') + + # Figure out the embed URL for the video. + try: + youtube_src = pargs[0] + except IndexError: + return html_module.escape('{youtube missing YouTube ID/URL}') + + try: + youtube_id = self.video_id(youtube_src) + except ValueError as ex: + return html_module.escape('{youtube %s}' % "; ".join(ex.args)) + if not youtube_id: + return html_module.escape('{youtube invalid YouTube ID/URL}') + + src = f'https://www.youtube.com/embed/{youtube_id}?rel=0' + html = f'' + return html + + +@shortcode('iframe') +def iframe(context: typing.Any, + content: str, + pargs: typing.List[str], + kwargs: typing.Dict[str, str]) -> str: + """Show an iframe to users with the required capability. + + kwargs: + - 'cap': Capability required for viewing. + - others: Turned into attributes for the iframe element. + """ + import xml.etree.ElementTree as ET + from pillar.auth import current_user + + cap = kwargs.pop('cap', None) + if not cap: + return html_module.escape('{iframe missing cap="somecap"}') + + nocap = kwargs.pop('nocap', '') + if not current_user.has_cap(cap): + if not nocap: + return '' + html = html_module.escape(nocap) + return f'

{html}

' + + kwargs['class'] = f'shortcode {kwargs.get("class", "")}'.strip() + element = ET.Element('iframe', kwargs) + html = ET.tostring(element, encoding='unicode', method='html', short_empty_elements=True) + return html + + +def _get_parser() -> typing.Tuple[shortcodes.Parser, shortcodes.Parser]: + """Return the shortcodes parser, create it if necessary.""" + global _parser, _commented_parser + if _parser is None: + start, end = '{}' + _parser = shortcodes.Parser(start, end) + _commented_parser = shortcodes.Parser(f'') + return _parser, _commented_parser + + +def render_commented(text: str, context: typing.Any = None) -> str: + """Parse and render HTML-commented shortcodes. + + Expects shortcodes like "", as output by + escape_html(). + """ + _, parser = _get_parser() + + # TODO(Sybren): catch exceptions and handle those gracefully in the response. + try: + return parser.parse(text, context) + except shortcodes.InvalidTagError as ex: + return html_module.escape('{%s}' % ex) + except shortcodes.RenderingError as ex: + return html_module.escape('{unable to render tag: %s}' % str(ex.__cause__ or ex)) + + +def render(text: str, context: typing.Any = None) -> str: + """Parse and render shortcodes.""" + parser, _ = _get_parser() + + # TODO(Sybren): catch exceptions and handle those gracefully in the response. + return parser.parse(text, context) + + +def comment_shortcodes(html: str) -> str: + """Escape shortcodes in HTML comments. + + This is required to pass the shortcodes as-is through Markdown. Render the + shortcodes afterwards with render_commented(). + + >>> comment_shortcodes("text\\n{shortcode abc='def'}\\n") + "text\\n\\n" + """ + parser, _ = _get_parser() + return parser.regex.sub(r'', html) diff --git a/pillar/web/jinja.py b/pillar/web/jinja.py index f4c74074..171f4dee 100644 --- a/pillar/web/jinja.py +++ b/pillar/web/jinja.py @@ -101,6 +101,8 @@ def do_markdown(s: typing.Optional[str]): This filter is not preferred. Use {'coerce': 'markdown'} in the Eve schema instead, to cache the HTML in the database, and use do_markdowned() to fetch it. + + Jinja example: {{ node.properties.content | markdown }} """ if s is None: return None @@ -113,33 +115,47 @@ def do_markdown(s: typing.Optional[str]): return jinja2.utils.Markup(safe_html) -def do_markdowned(document: typing.Union[dict, pillarsdk.Resource], field_name: str) -> str: +def _get_markdowned_html(document: typing.Union[dict, pillarsdk.Resource], field_name: str) -> str: """Fetch pre-converted Markdown or render on the fly. Use {'coerce': 'markdown'} in the Eve schema to cache the HTML in the database and use do_markdowned() to fetch it in a safe way. - Jinja example: {{ node.properties | markdowned:'content' }} + Places shortcode tags `{...}` between HTML comments, so that they pass + through the Markdown parser as-is. """ if isinstance(document, pillarsdk.Resource): document = document.to_dict() - if not document: + # If it's empty, we don't care what it is. return '' - - my_log = log.getChild('do_markdowned') + assert isinstance(document, dict), \ + f'document should be dict or pillarsdk.Resource, not {document!r}' cache_field_name = pillar.markdown.cache_field_name(field_name) - my_log.debug('Getting %r', cache_field_name) + html = document.get(cache_field_name) + if html is None: + markdown_src = document.get(field_name) or '' + html = pillar.markdown.markdown(markdown_src) + return html - cached_html = document.get(cache_field_name) - if cached_html is not None: - my_log.debug('Cached HTML is %r', cached_html[:40]) - return jinja2.utils.Markup(cached_html) - markdown_src = document.get(field_name) - my_log.debug('No cached HTML, rendering doc[%r]', field_name) - return do_markdown(markdown_src) +def do_markdowned(document: typing.Union[dict, pillarsdk.Resource], field_name: str) \ + -> jinja2.utils.Markup: + """Render Markdown and shortcodes. + + Use {'coerce': 'markdown'} in the Eve schema to cache the HTML in the + database and use do_markdowned() to fetch it in a safe way. + + Jinja example: {{ node.properties | markdowned('content') }} + + The value of `document` is sent as context to the shortcodes render + function. + """ + from pillar import shortcodes + html = _get_markdowned_html(document, field_name) + html = shortcodes.render_commented(html, context=document) + return jinja2.utils.Markup(html) def do_url_for_node(node_id=None, node=None): diff --git a/requirements.txt b/requirements.txt index 2b54d8cc..f0f044d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,6 +28,7 @@ python-dateutil==2.5.3 rauth==0.7.3 raven[flask]==6.3.0 redis==2.10.5 +shortcodes==2.5.0 WebOb==1.5.0 wheel==0.29.0 zencoder==0.6.5 diff --git a/setup.py b/setup.py index 577f5119..fa7b30db 100644 --- a/setup.py +++ b/setup.py @@ -54,6 +54,7 @@ setuptools.setup( 'Pillow>=2.8.1', 'requests>=2.9.1', 'rsa>=3.3', + 'shortcodes>=2.5', # 2.4.0 and earlier corrupted unicode 'zencoder>=0.6.5', 'bcrypt>=2.0.0', 'blinker>=1.4', diff --git a/src/styles/base.sass b/src/styles/base.sass index 3af619cb..82896766 100644 --- a/src/styles/base.sass +++ b/src/styles/base.sass @@ -1217,3 +1217,10 @@ button, .btn &.active opacity: 1 + +p.shortcode.nocap + padding: 0.6em 3em + font-size: .8em + color: $color-text-dark-primary + background-color: $color-background-dark + border-radius: 3px diff --git a/tests/test_api/test_markdown.py b/tests/test_api/test_markdown.py index aab4225c..95e42974 100644 --- a/tests/test_api/test_markdown.py +++ b/tests/test_api/test_markdown.py @@ -1,12 +1,9 @@ -import copy - from pillar.tests import AbstractPillarTest from pillar.tests import common_test_data as ctd class CoerceMarkdownTest(AbstractPillarTest): def test_node_description(self): - from pillar.markdown import markdown pid, uid = self.create_project_with_admin(24 * 'a') self.create_valid_auth_token(uid, 'token-a') node = { @@ -23,10 +20,10 @@ class CoerceMarkdownTest(AbstractPillarTest): node_id = created_data['_id'] json_node = self.get(f'/api/nodes/{node_id}', auth_token='token-a').json() - self.assertEqual(markdown(node['description']), json_node['_description_html']) + self.assertEqual('

Title

\n

This is content.

\n', + json_node['_description_html']) def test_project_description(self): - from pillar.markdown import markdown from pillar.api.utils import remove_private_keys uid = self.create_user(24 * 'a', token='token-a') @@ -50,4 +47,25 @@ class CoerceMarkdownTest(AbstractPillarTest): json_proj.pop('node_types', None) # just to make it easier to print import pprint pprint.pprint(json_proj) - self.assertEqual(markdown(proj['description']), json_proj['_description_html']) + self.assertEqual('

Title

\n

This is content.

\n', + json_proj['_description_html']) + + def test_comment_shortcodes(self): + pid, uid = self.create_project_with_admin(24 * 'a') + self.create_valid_auth_token(uid, 'token-a') + node = { + 'node_type': 'group', + 'name': 'Test group', + 'description': '# Title\n\n{test a="b"}', + 'properties': {}, + 'project': pid, + 'user': uid, + } + + created_data = self.post('/api/nodes', json=node, expected_status=201, + auth_token='token-a').json() + node_id = created_data['_id'] + + json_node = self.get(f'/api/nodes/{node_id}', auth_token='token-a').json() + expect = '

Title

\n\n' + self.assertEqual(expect, json_node['_description_html']) diff --git a/tests/test_shortcodes.py b/tests/test_shortcodes.py new file mode 100644 index 00000000..ef6e6a03 --- /dev/null +++ b/tests/test_shortcodes.py @@ -0,0 +1,157 @@ +import unittest +from pillar.tests import AbstractPillarTest + + +class EscapeHTMLTest(unittest.TestCase): + def test_simple(self): + from pillar.shortcodes import comment_shortcodes + self.assertEqual( + "text\\n\\n", + comment_shortcodes("text\\n{shortcode abc='def'}\\n") + ) + + def test_double_tags(self): + from pillar.shortcodes import comment_shortcodes + self.assertEqual( + "text\\nhey\\n", + comment_shortcodes("text\\n{shortcode abc='def'}hey{othercode}\\n") + ) + + +class DegenerateTest(unittest.TestCase): + def test_degenerate_cases(self): + from pillar.shortcodes import render + + self.assertEqual('', render('')) + with self.assertRaises(TypeError): + render(None) + + +class DemoTest(unittest.TestCase): + def test_demo(self): + from pillar.shortcodes import render + + self.assertEqual('
test
', render('{test}')) + self.assertEqual('
test
a
b
', render('{test a="b"}')) + + def test_unicode(self): + from pillar.shortcodes import render + + self.assertEqual('
test
ü
é
', render('{test ü="é"}')) + + +class YouTubeTest(unittest.TestCase): + def test_missing(self): + from pillar.shortcodes import render + + self.assertEqual('{youtube missing YouTube ID/URL}', render('{youtube}')) + + def test_invalid(self): + from pillar.shortcodes import render + + self.assertEqual( + '{youtube Unable to parse YouTube URL 'https://attacker.com/'}', + render('{youtube https://attacker.com/}') + ) + + def test_id(self): + from pillar.shortcodes import render + + self.assertEqual( + '', + render('{youtube ABCDEF}') + ) + + def test_embed_url(self): + from pillar.shortcodes import render + + self.assertEqual( + '', + render('{youtube http://youtube.com/embed/ABCDEF}') + ) + + def test_youtu_be(self): + from pillar.shortcodes import render + + self.assertEqual( + '', + render('{youtube https://youtu.be/NwVGvcIrNWA}') + ) + + def test_watch(self): + from pillar.shortcodes import render + + self.assertEqual( + '', + render('{youtube "https://www.youtube.com/watch?v=NwVGvcIrNWA"}') + ) + + def test_width_height(self): + from pillar.shortcodes import render + + self.assertEqual( + '', + render('{youtube "https://www.youtube.com/watch?v=NwVGvcIrNWA" width=5 height="3"}') + ) + + +class IFrameTest(AbstractPillarTest): + def test_missing_cap(self): + from pillar.shortcodes import render + + self.assertEqual('{iframe missing cap="somecap"}', render('{iframe}')) + + def test_user_no_cap(self): + from pillar.shortcodes import render + + with self.app.app_context(): + # Anonymous user, so no subscriber capability. + self.assertEqual('', render('{iframe cap=subscriber}')) + self.assertEqual('', render('{iframe cap="subscriber"}')) + self.assertEqual( + '

Aðeins áskrifendur hafa aðgang að þessu efni.

', + render('{iframe' + ' cap="subscriber"' + ' nocap="Aðeins áskrifendur hafa aðgang að þessu efni."}')) + + def test_user_has_cap(self): + from pillar.shortcodes import render + + roles = {'demo'} + uid = self.create_user(roles=roles) + + with self.app.app_context(): + self.login_api_as(uid, roles=roles) + self.assertEqual('', + render('{iframe cap=subscriber}')) + self.assertEqual('', + render('{iframe cap="subscriber"}')) + self.assertEqual('', + render('{iframe cap="subscriber" nocap="x"}')) + + def test_attributes(self): + from pillar.shortcodes import render + + roles = {'demo'} + uid = self.create_user(roles=roles) + + md = '{iframe cap=subscriber zzz=xxx class="bigger" ' \ + 'src="https://docs.python.org/3/library/xml.etree.elementtree.html#functions"}' + expect = '' + + with self.app.app_context(): + self.login_api_as(uid, roles=roles) + self.assertEqual(expect, render(md)) diff --git a/tests/test_web/test_jinja.py b/tests/test_web/test_jinja.py index 8dccc9de..e10d0da8 100644 --- a/tests/test_web/test_jinja.py +++ b/tests/test_web/test_jinja.py @@ -23,9 +23,21 @@ class MarkdownTest(unittest.TestCase): def test_markdowned(self): from pillar.web import jinja - self.assertEqual(None, jinja.do_markdowned({'eek': None}, 'eek')) + self.assertEqual('', jinja.do_markdowned({'eek': None}, 'eek')) self.assertEqual('

ook

\n', jinja.do_markdowned({'eek': 'ook'}, 'eek')) self.assertEqual('

ook

\n', jinja.do_markdowned( {'eek': 'ook', '_eek_html': None}, 'eek')) self.assertEqual('prerendered', jinja.do_markdowned( {'eek': 'ook', '_eek_html': 'prerendered'}, 'eek')) + + def test_markdowned_with_shortcodes(self): + from pillar.web import jinja + + self.assertEqual( + '
test
a
b
c
d
\n', + jinja.do_markdowned({'eek': '{test a="b" c="d"}'}, 'eek')) + + self.assertEqual( + '

Title

\n

Before

\n' + '
test
a
b
c
d
\n', + jinja.do_markdowned({'eek': '# Title\n\nBefore\n{test a="b" c="d"}'}, 'eek'))