You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
38 lines
1.3 KiB
38 lines
1.3 KiB
import re |
|
from django.contrib.auth.models import AnonymousUser |
|
from django.http import HttpRequest |
|
from django.template.loader import render_to_string |
|
from django.utils.html import strip_tags |
|
|
|
from shared.utils.text import html_entities_to_unicode |
|
|
|
|
|
def render_page_as_html(page, template, context_data, css_selector=None): |
|
request = HttpRequest() |
|
request.user = AnonymousUser() |
|
assert template, "No template supplied" |
|
html = render_to_string(template, context_data, request=request) |
|
|
|
if css_selector: |
|
import lxml.html |
|
doc = lxml.html.fromstring(html) |
|
for element in doc.cssselect('script,style'): |
|
element.getparent().remove(element) |
|
html = [] |
|
for part in doc.cssselect(css_selector): |
|
html.append(lxml.html.tostring(part).decode().strip()) |
|
html = '\n'.join(html) |
|
return html |
|
|
|
|
|
def render_page_as_text(page, template, context_data, css_selector=None): |
|
html = render_page_as_html(page, template, context_data, css_selector) |
|
text = html_entities_to_unicode(strip_tags(html)).strip() |
|
text = re.sub('[\t ]+', ' ', text) |
|
text = re.sub(re.compile('\n +', re.DOTALL), '\n', text) |
|
text = re.sub(re.compile('\n+', re.DOTALL), '\n', text) |
|
content = [] |
|
for word in text.split(' '): |
|
if len(word) <= 245: |
|
content += [word] |
|
return ' '.join(content)
|
|
|