django-shared-utils/shared/utils/text.py

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
# Erik Stein <code@classlibrary.net>, 2015-2017

from django.utils import six
if six.PY3:
    import html

import re

from django.utils.encoding import force_text, smart_text
from django.utils.functional import keep_lazy, keep_lazy_text
from django.utils.safestring import SafeText
from django.utils.html import mark_safe
from django.utils.text import slugify
from django.utils.translation import ugettext_lazy

# from bs4 import BeautifulStoneSoup
import translitcodec  # provides 'translit/long', used by codecs.encode()
import codecs


def downgrade(value):
    """
    Downgrade unicode to ascii, transliterating accented characters.
    """
    value = force_text(value)
    return codecs.encode(value, 'translit/long')
# downgrade = allow_lazy(downgrade, six.text_type, SafeText)


def slugify_long(value):
    return slugify(downgrade(value))
# slugify_long = allow_lazy(slugify_long, six.text_type, SafeText)


# Spreading umlauts is included in the translit/long codec.
slugify_german = slugify_long


if six.PY2:
    import bs4

    def html_entities_to_unicode(html):
        # An incoming HTML or XML entity is always converted into the corresponding Unicode character in bs4
        return smart_text(bs4.BeautifulSoup(html), 'lxml')

else:
    # Works only with Python >= 3.4
    def html_entities_to_unicode(html_str):
        return html.unescape(html_str)
    # html_entities_to_unicode = allow_lazy(html_entities_to_unicode, six.text_type, SafeText)


# Translators: This string is used as a separator between list elements
DEFAULT_SEPARATOR = ugettext_lazy(", ")


@keep_lazy_text
def get_text_joined(list_, separator=DEFAULT_SEPARATOR, last_word=ugettext_lazy(' and ')):
    list_ = list(list_)
    if len(list_) == 0:
        return ''
    if len(list_) == 1:
        return force_text(list_[0])
    return '%s%s%s' % (
        separator.join(force_text(i) for i in list_[:-1]),
        force_text(last_word), force_text(list_[-1]))


def slimdown(text):
    """
    Converts simplified markdown (**, *, _) to <b>, <i> und <u> tags.
    """
    b_pattern = re.compile(r"(\*\*)(.*?)\1")
    i_pattern = re.compile(r"(\*)(.*?)\1")
    u_pattern = re.compile(r"(__)(.*?)\1")

    text, n = re.subn(b_pattern, "<b>\\2</b>", text)
    text, n = re.subn(i_pattern, "<i>\\2</i>", text)
    text, n = re.subn(u_pattern, "<u>\\2</u>", text)
    return mark_safe(text)
Added version from HFI. 9 years ago			`# -- coding: utf-8 --`
			`from __future__ import unicode_literals`
Added html_entities_to_unicode function and template tag. 9 years ago			`# Erik Stein <code@classlibrary.net>, 2015-2017`
Added version from HFI. 9 years ago
htmlentities_to_unicode Python 2 compatibility. 8 years ago			`from django.utils import six`
			`if six.PY3:`
			`import html`

Added slimdown functionality. 8 years ago			`import re`
Fixed html_entities_to_unicode (works only in Python 3). 8 years ago
htmlentities_to_unicode Python 2 compatibility. 8 years ago			`from django.utils.encoding import force_text, smart_text`
Django 2.0 fixes. 8 years ago			`from django.utils.functional import keep_lazy, keep_lazy_text`
Added version from HFI. 9 years ago			`from django.utils.safestring import SafeText`
Added slimdown functionality. 8 years ago			`from django.utils.html import mark_safe`
Added get_text_joined function. 8 years ago			`from django.utils.text import slugify`
Added slimdown functionality. 8 years ago			`from django.utils.translation import ugettext_lazy`
Added version from HFI. 9 years ago
Added slimdown functionality. 8 years ago			`# from bs4 import BeautifulStoneSoup`
Added html_entities_to_unicode function and template tag. 9 years ago			`import translitcodec # provides 'translit/long', used by codecs.encode()`
Added version from HFI. 9 years ago			`import codecs`


			`def downgrade(value):`
			`"""`
			`Downgrade unicode to ascii, transliterating accented characters.`
			`"""`
			`value = force_text(value)`
			`return codecs.encode(value, 'translit/long')`
Don't allow lazy for slugify-functions FIXME fix dependent exceptions and reactive later 8 years ago			`# downgrade = allow_lazy(downgrade, six.text_type, SafeText)`
Added version from HFI. 9 years ago

			`def slugify_long(value):`
			`return slugify(downgrade(value))`
Don't allow lazy for slugify-functions FIXME fix dependent exceptions and reactive later 8 years ago			`# slugify_long = allow_lazy(slugify_long, six.text_type, SafeText)`
Added version from HFI. 9 years ago

Added html_entities_to_unicode function and template tag. 9 years ago			`# Spreading umlauts is included in the translit/long codec.`
			`slugify_german = slugify_long`
Added version from HFI. 9 years ago
Added html_entities_to_unicode function and template tag. 9 years ago
htmlentities_to_unicode Python 2 compatibility. 8 years ago			`if six.PY2:`
			`import bs4`

			`def html_entities_to_unicode(html):`
			`# An incoming HTML or XML entity is always converted into the corresponding Unicode character in bs4`
BeautifulSoup API. 8 years ago			`return smart_text(bs4.BeautifulSoup(html), 'lxml')`
Fixed html_entities_to_unicode (works only in Python 3). 8 years ago
htmlentities_to_unicode Python 2 compatibility. 8 years ago			`else:`
			`# Works only with Python >= 3.4`
			`def html_entities_to_unicode(html_str):`
			`return html.unescape(html_str)`
			`# html_entities_to_unicode = allow_lazy(html_entities_to_unicode, six.text_type, SafeText)`
Added get_text_joined function. 8 years ago

			`# Translators: This string is used as a separator between list elements`
			`DEFAULT_SEPARATOR = ugettext_lazy(", ")`

Fixed html_entities_to_unicode (works only in Python 3). 8 years ago
Added get_text_joined function. 8 years ago			`@keep_lazy_text`
			`def get_text_joined(list_, separator=DEFAULT_SEPARATOR, last_word=ugettext_lazy(' and ')):`
			`list_ = list(list_)`
			`if len(list_) == 0:`
			`return ''`
			`if len(list_) == 1:`
			`return force_text(list_[0])`
			`return '%s%s%s' % (`
			`separator.join(force_text(i) for i in list_[:-1]),`
			`force_text(last_word), force_text(list_[-1]))`
Added slimdown functionality. 8 years ago

			`def slimdown(text):`
			`"""`
fix slimdown bold 8 years ago			`Converts simplified markdown (*, , _) to <b>, <i> und <u> tags.`
Added slimdown functionality. 8 years ago			`"""`
			`b_pattern = re.compile(r"(\\)(.*?)\1")`
fix slimdown bold 8 years ago			`i_pattern = re.compile(r"(\)(.?)\1")`
PageTitlesMixin with translated fields (optional); improved slimdown. 8 years ago			`u_pattern = re.compile(r"(__)(.*?)\1")`
Added slimdown functionality. 8 years ago
			`text, n = re.subn(b_pattern, "<b>\\2</b>", text)`
fix slimdown bold 8 years ago			`text, n = re.subn(i_pattern, "<i>\\2</i>", text)`
Added slimdown functionality. 8 years ago			`text, n = re.subn(u_pattern, "<u>\\2</u>", text)`
			`return mark_safe(text)`