blender-studio/common/utils.py

55 lines
1.4 KiB
Python

# noqa: D100
from html.parser import HTMLParser
import re
def chunks(lst, n):
"""Yield successive n-sized chunks from lst."""
for i in range(0, len(lst), n):
yield lst[i : i + n]
class HTMLFilter(HTMLParser):
"""Filter human-readable text portions of HTML."""
skip_text_of = ('a', 'style')
text = ''
skip_tag_text = False
def handle_starttag(self, tag, attrs):
"""Act on start tag."""
if tag in self.skip_text_of:
self.skip_tag_text = True
for name, value in attrs:
if name == 'href':
self.skip_tag_text = True
self.text += value
if tag in ('quote', 'q'):
self.text += ''
def handle_endtag(self, tag):
"""Act on end tag."""
if tag in self.skip_text_of:
self.skip_tag_text = False
if tag in ('quote', 'q'):
self.text += '\n\n'
def handle_data(self, data):
"""Append text."""
if self.skip_tag_text:
return
self.text += data
def html_to_text(data: str) -> str:
"""Return a human-readable text made from given HTML."""
f = HTMLFilter()
f.feed(data)
lines = [_.lstrip(' \t') for _ in f.text.split('\n')]
skip_empty = 0
for line in lines:
if not re.match(r'^\s*$', line):
break
skip_empty += 1
return '\n'.join(lines[skip_empty:]).strip()