Robot Framework
htmlutils.py
Go to the documentation of this file.
1 # Copyright 2008-2015 Nokia Networks
2 # Copyright 2016- Robot Framework Foundation
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 
16 import re
17 try:
18  from urllib import quote
19 except ImportError:
20  from urllib.parse import quote
21 
22 from robot.errors import DataError
23 from robot.utils import html_escape, html_format, NormalizedDict
24 from robot.utils.htmlformatters import HeaderFormatter
25 
26 
28 
31  _header_regexp = re.compile(r'<h([234])>(.+?)</h\1>')
32 
35  _name_regexp = re.compile('`(.+?)`')
36 
37  def __init__(self, keywords, type_info, introduction, doc_format='ROBOT'):
38  self._doc_to_html_doc_to_html = DocToHtml(doc_format)
39  self._targets_targets = self._get_targets_get_targets(keywords, introduction,
40  robot_format=doc_format == 'ROBOT')
41  self._type_info_targets_type_info_targets = self._get_type_info_targets_get_type_info_targets(type_info)
42 
43  def _get_targets(self, keywords, introduction, robot_format):
44  targets = {
45  'introduction': 'Introduction',
46  'library introduction': 'Introduction',
47  'importing': 'Importing',
48  'library importing': 'Importing',
49  'keywords': 'Keywords',
50  }
51  for kw in keywords:
52  targets[kw.name] = kw.name
53  if robot_format:
54  for header in self._yield_header_targets_yield_header_targets(introduction):
55  targets[header] = header
56  return self._escape_and_encode_targets_escape_and_encode_targets(targets)
57 
58  def _get_type_info_targets(self, type_info):
59  targets = {info.name: info.name for info in type_info}
60  return self._escape_and_encode_targets_escape_and_encode_targets(targets)
61 
62  def _yield_header_targets(self, introduction):
63  headers = HeaderFormatter()
64  for line in introduction.splitlines():
65  match = headers.match(line.strip())
66  if match:
67  yield match.group(2)
68 
69  def _escape_and_encode_targets(self, targets):
70  return NormalizedDict((html_escape(key), self._encode_uri_component_encode_uri_component(value))
71  for key, value in targets.items())
72 
73  def _encode_uri_component(self, value):
74  # Emulates encodeURIComponent javascript function
75  return quote(value.encode('UTF-8'), safe="-_.!~*'()")
76 
77  def html(self, doc, intro=False):
78  doc = self._doc_to_html_doc_to_html(doc)
79  if intro:
80  doc = self._header_regexp_header_regexp.sub(r'<h\1 id="\2">\2</h\1>', doc)
81  return self._name_regexp_name_regexp.sub(self._link_keywords_link_keywords, doc)
82 
83  def _link_keywords(self, match):
84  name = match.group(1)
85  targets = self._targets_targets
86  types = self._type_info_targets_type_info_targets
87  if name in targets:
88  return f'<a href="#{targets[name]}" class="name">{name}</a>'
89  elif name in types:
90  return f'<a href="#type-{types[name]}" class="name">{name}</a>'
91  return f'<span class="name">{name}</span>'
92 
93 
94 class DocToHtml:
95 
96  def __init__(self, doc_format):
97  self._formatter_formatter = self._get_formatter_get_formatter(doc_format)
98 
99  def _get_formatter(self, doc_format):
100  try:
101  return {'ROBOT': html_format,
102  'TEXT': self._format_text_format_text,
103  'HTML': lambda doc: doc,
104  'REST': self._format_rest_format_rest}[doc_format]
105  except KeyError:
106  raise DataError("Invalid documentation format '%s'." % doc_format)
107 
108  def _format_text(self, doc):
109  return '<p style="white-space: pre-wrap">%s</p>' % html_escape(doc)
110 
111  def _format_rest(self, doc):
112  try:
113  from docutils.core import publish_parts
114  except ImportError:
115  raise DataError("reST format requires 'docutils' module to be installed.")
116  parts = publish_parts(doc, writer_name='html',
117  settings_overrides={'syntax_highlight': 'short'})
118  return parts['html_body']
119 
120  def __call__(self, doc):
121  return self._formatter_formatter(doc)
122 
123 
125  html_tags = {
126  'b': '*',
127  'i': '_',
128  'strong': '*',
129  'em': '_',
130  'code': '``',
131  'div.*?': ''
132  }
133  html_chars = {
134  '<br */?>': '\n',
135  '&amp;': '&',
136  '&lt;': '<',
137  '&gt;': '>',
138  '&quot;': '"',
139  '&apos;': "'"
140  }
141 
142  def get_shortdoc_from_html(self, doc):
143  match = re.search(r'<p.*?>(.*?)</?p>', doc, re.DOTALL)
144  if match:
145  doc = match.group(1)
146  doc = self.html_to_plain_texthtml_to_plain_text(doc)
147  return doc
148 
149  def html_to_plain_text(self, doc):
150  for tag, repl in self.html_tagshtml_tags.items():
151  doc = re.sub(r'<%(tag)s>(.*?)</%(tag)s>' % {'tag': tag},
152  r'%(repl)s\1%(repl)s' % {'repl': repl}, doc,
153  flags=re.DOTALL)
154  for html, text in self.html_charshtml_chars.items():
155  doc = re.sub(html, text, doc)
156  return doc
def _yield_header_targets(self, introduction)
Definition: htmlutils.py:62
def _get_targets(self, keywords, introduction, robot_format)
Definition: htmlutils.py:43
def _encode_uri_component(self, value)
Definition: htmlutils.py:73
def _escape_and_encode_targets(self, targets)
Definition: htmlutils.py:69
def _get_type_info_targets(self, type_info)
Definition: htmlutils.py:58
def html(self, doc, intro=False)
Definition: htmlutils.py:77
def __init__(self, keywords, type_info, introduction, doc_format='ROBOT')
Definition: htmlutils.py:37
def _get_formatter(self, doc_format)
Definition: htmlutils.py:99
def __init__(self, doc_format)
Definition: htmlutils.py:96
def html_escape(text, linkify=True)
Definition: markuputils.py:44