Mix of Python and Django utility functions, classed etc.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

118 lines
3.1 KiB

import codecs
import html
import re
import translitcodec # provides 'translit/long', used by codecs.encode() # noqa
from django.conf import settings
from django.utils.encoding import force_str
from django.utils.functional import keep_lazy_text
from django.utils.html import mark_safe, strip_tags
from django.utils.text import slugify as django_slugify, normalize_newlines
from django.utils.translation import gettext_lazy
@keep_lazy_text
def downgrade(value):
"""
Downgrade unicode to ascii, transliterating accented characters.
"""
value = force_str(value or "")
return codecs.encode(value, 'transliterate')
@keep_lazy_text
def slugify_long(value):
return django_slugify(downgrade(value))
# Spreading umlauts is included in the translit/long codec.
slugify_german = slugify_long
@keep_lazy_text
def downgrading_slugify(value):
# Slugfiy only allowing hyphens, numbers and ASCII characters
# FIXME django_slugify might return an empty string; take care that we always return something
return re.sub("[ _]+", "-", django_slugify(downgrade(value)))
SLUGIFY_FUNCTION = getattr(settings, 'SLUGIFY_FUNCTION', downgrading_slugify)
slugify = SLUGIFY_FUNCTION
@keep_lazy_text
def html_entities_to_unicode(html_str):
return html.unescape(html_str)
# Translators: Separator between list elements
DEFAULT_SEPARATOR = gettext_lazy(", ")
# Translators: Last separator of list elements
LAST_WORD_SEPARATOR = gettext_lazy(" and ")
@keep_lazy_text
def text_joined(list_, separator=DEFAULT_SEPARATOR, last_word=LAST_WORD_SEPARATOR):
list_ = list(list_)
if len(list_) == 0:
return ''
if len(list_) == 1:
return force_str(list_[0])
return '%s%s%s' % (
separator.join(force_str(i) for i in list_[:-1]),
force_str(last_word), force_str(list_[-1]))
# TODO Don't match escaped stars (like \*)
b_pattern = re.compile(r"(\*\*)(.*?)\1")
i_pattern = re.compile(r"(\*)(.*?)\1")
u_pattern = re.compile(r"(__)(.*?)\1")
link_pattern = re.compile(r"\[([^\[]+)\]\(([^\)]+)\)")
@keep_lazy_text
def slimdown(text):
"""
Converts simplified markdown (`**`, `*`, `__`) to <b>, <i> und <u> tags.
"""
if text:
text, n = re.subn(b_pattern, "<b>\\2</b>", text)
text, n = re.subn(i_pattern, "<i>\\2</i>", text)
text, n = re.subn(u_pattern, "<u>\\2</u>", text)
text, n = re.subn(link_pattern, "<a href=\'\\2\'>\\1</a>", text)
return mark_safe(text)
else:
return ""
@keep_lazy_text
def strip_links(text):
return re.sub(r'<a[^>]+>', '', text, flags=re.DOTALL).replace('</a>', '')
COLLAPSE_WHITESPACE_RE = re.compile(r"\s+")
@keep_lazy_text
def collapse_whitespace(text):
return COLLAPSE_WHITESPACE_RE.sub(" ", text).strip()
@keep_lazy_text
def html_to_text(text):
print(text)
rv = collapse_whitespace(strip_tags(html_entities_to_unicode(str(text))))
print(rv)
return rv
try:
from html_sanitizer.django import get_sanitizer
def sanitized_html(html, config_name='default'):
return get_sanitizer(config_name).sanitize(html)
except ImportError:
pass