Robot Framework
lexer.py
Go to the documentation of this file.
1 # Copyright 2008-2015 Nokia Networks
2 # Copyright 2016- Robot Framework Foundation
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 
16 from itertools import chain
17 
18 from robot.errors import DataError
19 from robot.utils import get_error_message, FileReader
20 
21 from .blocklexers import FileLexer
22 from .context import InitFileContext, TestCaseFileContext, ResourceFileContext
23 from .tokenizer import Tokenizer
24 from .tokens import EOS, END, Token
25 
26 
27 
49 def get_tokens(source, data_only=False, tokenize_variables=False, lang=None):
50  lexer = Lexer(TestCaseFileContext(lang=lang), data_only, tokenize_variables)
51  lexer.input(source)
52  return lexer.get_tokens()
53 
54 
55 
60 def get_resource_tokens(source, data_only=False, tokenize_variables=False, lang=None):
61  lexer = Lexer(ResourceFileContext(lang=lang), data_only, tokenize_variables)
62  lexer.input(source)
63  return lexer.get_tokens()
64 
65 
66 
72 def get_init_tokens(source, data_only=False, tokenize_variables=False, lang=None):
73  lexer = Lexer(InitFileContext(lang=lang), data_only, tokenize_variables)
74  lexer.input(source)
75  return lexer.get_tokens()
76 
77 
78 class Lexer:
79 
80  def __init__(self, ctx, data_only=False, tokenize_variables=False):
81  self.lexerlexer = FileLexer(ctx)
82  self.data_onlydata_only = data_only
83  self.tokenize_variablestokenize_variables = tokenize_variables
84  self.statementsstatements = []
85 
86  def input(self, source):
87  for statement in Tokenizer().tokenize(self._read_read(source),
88  self.data_onlydata_only):
89  # Store all tokens but pass only data tokens to lexer.
90  self.statementsstatements.append(statement)
91  if self.data_onlydata_only:
92  data = statement[:]
93  else:
94  # Separators, comments, etc. already have type, data doesn't.
95  data = [t for t in statement if t.type is None]
96  if data:
97  self.lexerlexer.input(data)
98 
99  def _read(self, source):
100  try:
101  with FileReader(source, accept_text=True) as reader:
102  return reader.read()
103  except Exception:
105 
106  def get_tokens(self):
107  self.lexerlexer.lex()
108  statements = self.statementsstatements
109  if not self.data_onlydata_only:
110  statements = chain.from_iterable(
111  self._split_trailing_commented_and_empty_lines_split_trailing_commented_and_empty_lines(s)
112  for s in statements
113  )
114  tokens = self._get_tokens_get_tokens(statements)
115  if self.tokenize_variablestokenize_variables:
116  tokens = self._tokenize_variables_tokenize_variables(tokens)
117  return tokens
118 
119  def _get_tokens(self, statements):
120  if self.data_onlydata_only:
121  ignored_types = {None, Token.COMMENT_HEADER, Token.COMMENT}
122  else:
123  ignored_types = {None}
124  inline_if_type = Token.INLINE_IF
125  for statement in statements:
126  last = None
127  inline_if = False
128  for token in statement:
129  token_type = token.type
130  if token_type in ignored_types:
131  continue
132  if token._add_eos_before and not (last and last._add_eos_after):
133  yield EOS.from_token(token, before=True)
134  yield token
135  if token._add_eos_after:
136  yield EOS.from_token(token)
137  if token_type == inline_if_type:
138  inline_if = True
139  last = token
140  if last and not last._add_eos_after:
141  yield EOS.from_token(last)
142  if inline_if:
143  yield END.from_token(last, virtual=True)
144  yield EOS.from_token(last)
145 
147  lines = self._split_to_lines_split_to_lines(statement)
148  commented_or_empty = []
149  for line in reversed(lines):
150  if not self._is_commented_or_empty_is_commented_or_empty(line):
151  break
152  commented_or_empty.append(line)
153  if not commented_or_empty:
154  return [statement]
155  lines = lines[:-len(commented_or_empty)]
156  statement = list(chain.from_iterable(lines))
157  return [statement] + list(reversed(commented_or_empty))
158 
159  def _split_to_lines(self, statement):
160  lines = []
161  current = []
162  for token in statement:
163  current.append(token)
164  if token.type == Token.EOL:
165  lines.append(current)
166  current = []
167  if current:
168  lines.append(current)
169  return lines
170 
171  def _is_commented_or_empty(self, line):
172  separator_or_ignore = (Token.SEPARATOR, None)
173  comment_or_eol = (Token.COMMENT, Token.EOL)
174  for token in line:
175  if token.type not in separator_or_ignore:
176  return token.type in comment_or_eol
177  return False
178 
179  def _tokenize_variables(self, tokens):
180  for token in tokens:
181  for t in token.tokenize_variables():
182  yield t
def input(self, source)
Definition: lexer.py:86
def _tokenize_variables(self, tokens)
Definition: lexer.py:179
def _read(self, source)
Definition: lexer.py:99
def _split_to_lines(self, statement)
Definition: lexer.py:159
def __init__(self, ctx, data_only=False, tokenize_variables=False)
Definition: lexer.py:80
def _split_trailing_commented_and_empty_lines(self, statement)
Definition: lexer.py:146
def _get_tokens(self, statements)
Definition: lexer.py:119
def _is_commented_or_empty(self, line)
Definition: lexer.py:171
def get_init_tokens(source, data_only=False, tokenize_variables=False, lang=None)
Parses the given source to init file tokens.
Definition: lexer.py:72
def get_resource_tokens(source, data_only=False, tokenize_variables=False, lang=None)
Parses the given source to resource file tokens.
Definition: lexer.py:60
def get_tokens(source, data_only=False, tokenize_variables=False, lang=None)
Parses the given source to tokens.
Definition: lexer.py:49
def get_error_message()
Returns error message of the last occurred exception.
Definition: error.py:34