doxy/ride_html/htmlreader_8py_source.html

 #  Copyright 2008-2015 Nokia Networks

 #  Copyright 2016-     Robot Framework Foundation

 #

 #  Licensed under the Apache License, Version 2.0 (the "License");

 #  you may not use this file except in compliance with the License.

 #  You may obtain a copy of the License at

 #

 #      http://www.apache.org/licenses/LICENSE-2.0

 #

 #  Unless required by applicable law or agreed to in writing, software

 #  distributed under the License is distributed on an "AS IS" BASIS,

 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 #  See the License for the specific language governing permissions and

 #  limitations under the License.

 import re

 from robotide.lib.robot.output import LOGGER

 from robotide.lib.robot.utils import PY2


 if PY2:

     from htmlentitydefs import entitydefs

     from HTMLParser import HTMLParser


 else:

     from html.entities import entitydefs

     from html.parser import HTMLParser


     unichr = chr


 NON_BREAKING_SPACE = u'\xA0'


 class HtmlReader(HTMLParser):

     IGNORE = 0

     INITIAL = 1

     PROCESS = 2


     def __init__(self, spaces=2):

         self._spaces_spaces = spaces

         HTMLParser.__init__(self)

         self._encoding_encoding = 'ISO-8859-1'

         self._handlers_handlers = {'table_start' : self.table_starttable_start,

                           'table_end'   : self.table_endtable_end,

                           'tr_start'    : self.tr_starttr_start,

                           'tr_end'      : self.tr_endtr_end,

                           'td_start'    : self.td_starttd_start,

                           'td_end'      : self.td_endtd_end,

                           'th_start'    : self.td_starttd_start,

                           'th_end'      : self.td_endtd_end,

                           'br_start'    : self.br_startbr_start,

                           'meta_start'  : self.meta_startmeta_start}


     def read(self, htmlfile, populator, path=None):

         self.populatorpopulator = populator

         self.statestate = self.IGNOREIGNORE

         self.current_rowcurrent_row = None

         self.current_cellcurrent_cell = None


         report_html_pattern = b'<meta content="Robot Framework .*" name="Generator">'


         is_report_html = False

         for line in htmlfile.readlines():

             self.feed(self._decode_decode(line))

             if re.match(report_html_pattern, line):

                 is_report_html = True

         # Calling close is required by the HTMLParser but may cause problems

         # if the same instance of our HtmlParser is reused. Currently it's

         # used only once so there's no problem.

         self.close()

         if self.populatorpopulator.eof() and not is_report_html:

             # Only warn when the html file is not report html

             LOGGER.warn("Using test data in HTML format is deprecated. "

                         "Convert '%s' to plain text format."

                         % (path or htmlfile.name))


     def _decode(self, line):

         return line.decode(self._encoding_encoding)


     def handle_starttag(self, tag, attrs):

         handler = self._handlers_handlers.get(tag+'_start')

         if handler is not None:

             handler(attrs)


     def handle_endtag(self, tag):

         handler = self._handlers_handlers.get(tag+'_end')

         if handler is not None:

             handler()


     def handle_data(self, data):

         if self.statestate == self.IGNOREIGNORE or self.current_cellcurrent_cell is None:

             return

         if NON_BREAKING_SPACE in data:

             data = data.replace(NON_BREAKING_SPACE, ' ')

         self.current_cellcurrent_cell.append(data)


     def handle_entityref(self, name):

         value = self._handle_entityref_handle_entityref(name)

         self.handle_datahandle_data(value)


     def _handle_entityref(self, name):

         if name == 'apos':  # missing from entitydefs

             return "'"

         try:

             value = entitydefs[name]

         except KeyError:

             return '&'+name+';'

         if value.startswith('&#'):

             return unichr(int(value[2:-1]))

         if PY2:

             return value.decode('ISO-8859-1')

         return value


     def handle_charref(self, number):

         value = self._handle_charref_handle_charref(number)

         self.handle_datahandle_data(value)


     def _handle_charref(self, number):

         if number.startswith(('x', 'X')):

             base = 16

             number = number[1:]

         else:

             base = 10

         try:

             return unichr(int(number, base))

         except ValueError:

             return '&#'+number+';'


     def unknown_decl(self, data):

         # Ignore everything even if it's invalid. This kind of stuff comes

         # at least from MS Excel

         pass


     def table_start(self, attrs=None):

         self.state = self.INITIAL

         self.current_row = None

         self.current_cell = None


     def table_end(self):

         if self.current_rowcurrent_row is not None:

             self.tr_endtr_end()

         self.statestate = self.IGNOREIGNORE


     def tr_start(self, attrs=None):

         if self.current_rowcurrent_row is not None:

             self.tr_endtr_end()

         self.current_rowcurrent_row = []


     def tr_end(self):

         if self.current_rowcurrent_row is None:

             return

         if self.current_cellcurrent_cell is not None:

             self.td_endtd_end()

         if self.statestate == self.INITIALINITIAL:

             accepted = self.populatorpopulator.start_table(self.current_rowcurrent_row)

             self.statestate = self.PROCESSPROCESS if accepted else self.IGNOREIGNORE

         elif self.statestate == self.PROCESSPROCESS:

             self.populatorpopulator.add(self.current_rowcurrent_row)

         self.current_rowcurrent_row = None


     def td_start(self, attrs=None):

         if self.current_cellcurrent_cell is not None:

             self.td_endtd_end()

         if self.current_rowcurrent_row is None:

             self.tr_starttr_start()

         self.current_cellcurrent_cell = []


     def td_end(self):

         if self.current_cellcurrent_cell is not None and self.statestate != self.IGNOREIGNORE:

             cell = ''.join(self.current_cellcurrent_cell)

             self.current_rowcurrent_row.append(cell)

         self.current_cellcurrent_cell = None


     def br_start(self, attrs=None):

         self.handle_datahandle_data('\n')


     def meta_start(self, attrs):

         encoding = self._get_encoding_from_meta_get_encoding_from_meta(attrs)

         if encoding:

             self._encoding_encoding = encoding


     def _get_encoding_from_meta(self, attrs):

         valid_http_equiv = False

         encoding = None

         for name, value in attrs:

             name = name.lower()

             if name == 'charset':  # html5

                 return value

             if name == 'http-equiv' and value.lower() == 'content-type':

                 valid_http_equiv = True

             if name == 'content':

                 encoding = self._get_encoding_from_content_attr_get_encoding_from_content_attr(value)

         return encoding if valid_http_equiv else None


     def _get_encoding_from_content_attr(self, value):

         for token in value.split(';'):

             token = token.strip()

             if token.lower().startswith('charset='):

                 return token[8:]


     def handle_pi(self, data):

         encoding = self._get_encoding_from_pi_get_encoding_from_pi(data)

         if encoding:

             self._encoding_encoding = encoding


     def _get_encoding_from_pi(self, data):

         data = data.strip()

         if not data.lower().startswith('xml '):

             return None

         if data.endswith('?'):

             data = data[:-1]

         for token in data.split():

             if token.lower().startswith('encoding='):

                 encoding = token[9:]

                 if encoding.startswith("'") or encoding.startswith('"'):

                     encoding = encoding[1:-1]

                 return encoding

         return None

robotide.lib.robot.parsing.htmlreader.HtmlReader
Definition: htmlreader.py:33

robotide.lib.robot.parsing.htmlreader.HtmlReader.handle_data
def handle_data(self, data)
Definition: htmlreader.py:89

robotide.lib.robot.parsing.htmlreader.HtmlReader._get_encoding_from_pi
def _get_encoding_from_pi(self, data)
Definition: htmlreader.py:205

robotide.lib.robot.parsing.htmlreader.HtmlReader.meta_start
def meta_start(self, attrs)
Definition: htmlreader.py:176

robotide.lib.robot.parsing.htmlreader.HtmlReader.unknown_decl
def unknown_decl(self, data)
Definition: htmlreader.py:128

robotide.lib.robot.parsing.htmlreader.HtmlReader._handle_entityref
def _handle_entityref(self, name)
Definition: htmlreader.py:100

robotide.lib.robot.parsing.htmlreader.HtmlReader._get_encoding_from_meta
def _get_encoding_from_meta(self, attrs)
Definition: htmlreader.py:181

robotide.lib.robot.parsing.htmlreader.HtmlReader.table_start
def table_start(self, attrs=None)
Definition: htmlreader.py:133

robotide.lib.robot.parsing.htmlreader.HtmlReader.td_end
def td_end(self)
Definition: htmlreader.py:167

robotide.lib.robot.parsing.htmlreader.HtmlReader.handle_entityref
def handle_entityref(self, name)
Definition: htmlreader.py:96

robotide.lib.robot.parsing.htmlreader.HtmlReader._get_encoding_from_content_attr
def _get_encoding_from_content_attr(self, value)
Definition: htmlreader.py:194

robotide.lib.robot.parsing.htmlreader.HtmlReader.table_end
def table_end(self)
Definition: htmlreader.py:138

robotide.lib.robot.parsing.htmlreader.HtmlReader.PROCESS
int PROCESS
Definition: htmlreader.py:36

robotide.lib.robot.parsing.htmlreader.HtmlReader.current_row
current_row
Definition: htmlreader.py:56

robotide.lib.robot.parsing.htmlreader.HtmlReader.handle_charref
def handle_charref(self, number)
Definition: htmlreader.py:113

robotide.lib.robot.parsing.htmlreader.HtmlReader._decode
def _decode(self, line)
Definition: htmlreader.py:76

robotide.lib.robot.parsing.htmlreader.HtmlReader.populator
populator
Definition: htmlreader.py:54

robotide.lib.robot.parsing.htmlreader.HtmlReader._encoding
_encoding
Definition: htmlreader.py:41

robotide.lib.robot.parsing.htmlreader.HtmlReader.IGNORE
int IGNORE
Definition: htmlreader.py:34

robotide.lib.robot.parsing.htmlreader.HtmlReader.__init__
def __init__(self, spaces=2)
Definition: htmlreader.py:38

robotide.lib.robot.parsing.htmlreader.HtmlReader.handle_endtag
def handle_endtag(self, tag)
Definition: htmlreader.py:84

robotide.lib.robot.parsing.htmlreader.HtmlReader.INITIAL
int INITIAL
Definition: htmlreader.py:35

robotide.lib.robot.parsing.htmlreader.HtmlReader.handle_pi
def handle_pi(self, data)
Definition: htmlreader.py:200

robotide.lib.robot.parsing.htmlreader.HtmlReader.tr_end
def tr_end(self)
Definition: htmlreader.py:148

robotide.lib.robot.parsing.htmlreader.HtmlReader.tr_start
def tr_start(self, attrs=None)
Definition: htmlreader.py:143

robotide.lib.robot.parsing.htmlreader.HtmlReader.current_cell
current_cell
Definition: htmlreader.py:57

robotide.lib.robot.parsing.htmlreader.HtmlReader.handle_starttag
def handle_starttag(self, tag, attrs)
Definition: htmlreader.py:79

robotide.lib.robot.parsing.htmlreader.HtmlReader.read
def read(self, htmlfile, populator, path=None)
Definition: htmlreader.py:53

robotide.lib.robot.parsing.htmlreader.HtmlReader.state
state
Definition: htmlreader.py:55

robotide.lib.robot.parsing.htmlreader.HtmlReader.br_start
def br_start(self, attrs=None)
Definition: htmlreader.py:173

robotide.lib.robot.parsing.htmlreader.HtmlReader._spaces
_spaces
Definition: htmlreader.py:39

robotide.lib.robot.parsing.htmlreader.HtmlReader._handlers
_handlers
Definition: htmlreader.py:42

robotide.lib.robot.parsing.htmlreader.HtmlReader._handle_charref
def _handle_charref(self, number)
Definition: htmlreader.py:117

robotide.lib.robot.parsing.htmlreader.HtmlReader.td_start
def td_start(self, attrs=None)
Definition: htmlreader.py:160

robotide.lib.robot.output
Definition: __init__.py:1

robotide.lib.robot.parsing.htmlreader.unichr
unichr
Definition: htmlreader.py:27

robotide.lib.robot.reporting.jsexecutionresult.int
int
Definition: jsexecutionresult.py:25

robotide.lib.robot.utils
Definition: __init__.py:1