20 from htmlentitydefs
import entitydefs
21 from HTMLParser
import HTMLParser
24 from html.entities
import entitydefs
25 from html.parser
import HTMLParser
30 NON_BREAKING_SPACE =
u'\xA0'
40 HTMLParser.__init__(self)
45 'tr_end' : self.
tr_endtr_end,
47 'td_end' : self.
td_endtd_end,
49 'th_end' : self.
td_endtd_end,
53 def read(self, htmlfile, populator, path=None):
59 report_html_pattern = b
'<meta content="Robot Framework .*" name="Generator">'
61 is_report_html =
False
62 for line
in htmlfile.readlines():
63 self.feed(self.
_decode_decode(line))
64 if re.match(report_html_pattern, line):
70 if self.
populatorpopulator.eof()
and not is_report_html:
72 LOGGER.warn(
"Using test data in HTML format is deprecated. "
73 "Convert '%s' to plain text format."
74 % (path
or htmlfile.name))
77 return line.decode(self.
_encoding_encoding)
80 handler = self.
_handlers_handlers.get(tag+
'_start')
81 if handler
is not None:
85 handler = self.
_handlers_handlers.get(tag+
'_end')
86 if handler
is not None:
92 if NON_BREAKING_SPACE
in data:
93 data = data.replace(NON_BREAKING_SPACE,
' ')
104 value = entitydefs[name]
107 if value.startswith(
'&#'):
110 return value.decode(
'ISO-8859-1')
118 if number.startswith((
'x',
'X')):
126 return '&#'+number+
';'
134 self.state = self.INITIAL
135 self.current_row =
None
136 self.current_cell =
None
182 valid_http_equiv =
False
184 for name, value
in attrs:
186 if name ==
'charset':
188 if name ==
'http-equiv' and value.lower() ==
'content-type':
189 valid_http_equiv =
True
190 if name ==
'content':
192 return encoding
if valid_http_equiv
else None
195 for token
in value.split(
';'):
196 token = token.strip()
197 if token.lower().startswith(
'charset='):
207 if not data.lower().startswith(
'xml '):
209 if data.endswith(
'?'):
211 for token
in data.split():
212 if token.lower().startswith(
'encoding='):
214 if encoding.startswith(
"'")
or encoding.startswith(
'"'):
215 encoding = encoding[1:-1]
def handle_data(self, data)
def _get_encoding_from_pi(self, data)
def meta_start(self, attrs)
def unknown_decl(self, data)
def _handle_entityref(self, name)
def _get_encoding_from_meta(self, attrs)
def table_start(self, attrs=None)
def handle_entityref(self, name)
def _get_encoding_from_content_attr(self, value)
def handle_charref(self, number)
def __init__(self, spaces=2)
def handle_endtag(self, tag)
def handle_pi(self, data)
def tr_start(self, attrs=None)
def handle_starttag(self, tag, attrs)
def read(self, htmlfile, populator, path=None)
def br_start(self, attrs=None)
def _handle_charref(self, number)
def td_start(self, attrs=None)