diff --git a/shared/utils/text.py b/shared/utils/text.py index 8959e06..2a7bb16 100644 --- a/shared/utils/text.py +++ b/shared/utils/text.py @@ -6,8 +6,8 @@ import translitcodec # provides 'translit/long', used by codecs.encode() # noqa from django.conf import settings from django.utils.encoding import force_str from django.utils.functional import keep_lazy_text -from django.utils.html import mark_safe -from django.utils.text import slugify as django_slugify +from django.utils.html import mark_safe, strip_tags +from django.utils.text import slugify as django_slugify, normalize_newlines from django.utils.translation import gettext_lazy @@ -40,6 +40,7 @@ SLUGIFY_FUNCTION = getattr(settings, 'SLUGIFY_FUNCTION', downgrading_slugify) slugify = SLUGIFY_FUNCTION +@keep_lazy_text def html_entities_to_unicode(html_str): return html.unescape(html_str) @@ -86,5 +87,31 @@ def slimdown(text): return "" +@keep_lazy_text def strip_links(text): return re.sub(r']+>', '', text, flags=re.DOTALL).replace('', '') + + +COLLAPSE_WHITESPACE_RE = re.compile(r"\s+") + + +@keep_lazy_text +def collapse_whitespace(text): + return COLLAPSE_WHITESPACE_RE.sub(" ", text).strip() + + +@keep_lazy_text +def html_to_text(text): + print(text) + rv = collapse_whitespace(strip_tags(html_entities_to_unicode(str(text)))) + print(rv) + return rv + + +try: + from html_sanitizer.django import get_sanitizer + + def sanitized_html(html, config_name='default'): + return get_sanitizer(config_name).sanitize(html) +except ImportError: + pass