You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
62 lines
1.8 KiB
62 lines
1.8 KiB
from django.core.exceptions import ImproperlyConfigured
|
|
from django.utils.encoding import smart_str
|
|
from django.utils.functional import cached_property
|
|
|
|
from compressor.exceptions import ParserError
|
|
from compressor.parser import ParserBase
|
|
|
|
|
|
class Html5LibParser(ParserBase):
|
|
def __init__(self, content):
|
|
super().__init__(content)
|
|
import html5lib
|
|
|
|
self.html5lib = html5lib
|
|
|
|
def _serialize(self, elem):
|
|
return self.html5lib.serialize(
|
|
elem,
|
|
tree="etree",
|
|
quote_attr_values="always",
|
|
omit_optional_tags=False,
|
|
)
|
|
|
|
def _find(self, *names):
|
|
for elem in self.html:
|
|
if elem.tag in names:
|
|
yield elem
|
|
|
|
@cached_property
|
|
def html(self):
|
|
try:
|
|
return self.html5lib.parseFragment(self.content, treebuilder="etree")
|
|
except ImportError as err:
|
|
raise ImproperlyConfigured("Error while importing html5lib: %s" % err)
|
|
except Exception as err:
|
|
raise ParserError("Error while initializing Parser: %s" % err)
|
|
|
|
def css_elems(self):
|
|
return self._find(
|
|
"{http://www.w3.org/1999/xhtml}link", "{http://www.w3.org/1999/xhtml}style"
|
|
)
|
|
|
|
def js_elems(self):
|
|
return self._find("{http://www.w3.org/1999/xhtml}script")
|
|
|
|
def elem_attribs(self, elem):
|
|
return elem.attrib
|
|
|
|
def elem_content(self, elem):
|
|
return smart_str(elem.text)
|
|
|
|
def elem_name(self, elem):
|
|
if "}" in elem.tag:
|
|
return elem.tag.split("}")[1]
|
|
return elem.tag
|
|
|
|
def elem_str(self, elem):
|
|
# This method serializes HTML in a way that does not pass all tests.
|
|
# However, this method is only called in tests anyway, so it doesn't
|
|
# really matter.
|
|
return smart_str(self._serialize(elem))
|