import re from django.contrib.auth.models import AnonymousUser from django.http import HttpRequest from django.template.loader import render_to_string from django.utils.html import strip_tags from content_editor.contents import contents_for_item from shared.utils.text import html_entities_to_unicode def render_page_as_html(page, template, context_data, css_selector=None): request = HttpRequest() request.user = AnonymousUser() html = render_to_string(template, context_data, request=request) if css_selector: import lxml.html doc = lxml.html.fromstring(html) for element in doc.cssselect('script,style'): element.getparent().remove(element) html = [] for part in doc.cssselect(css_selector): html.append(lxml.html.tostring(part).decode().strip()) html = '\n'.join(html) return html def render_page_as_text(page, template, context_data, css_selector=None): html = render_page_as_html(page, template, context_data, css_selector) text = html_entities_to_unicode(strip_tags(html)).strip() text = re.sub('[\t ]+', ' ', text) text = re.sub(re.compile('\n +', re.DOTALL), '\n', text) text = re.sub(re.compile('\n+', re.DOTALL), '\n', text) return text