blob: 9106b9618e28c7cf424d945b253d78cadcd4addf [file] [log] [blame]
Brian Silverman70325d62015-09-20 17:00:43 -04001#!/usr/bin/env python
2#
3# Copyright (c) 2008, Google Inc.
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met:
9#
10# * Redistributions of source code must retain the above copyright
11# notice, this list of conditions and the following disclaimer.
12# * Redistributions in binary form must reproduce the above
13# copyright notice, this list of conditions and the following disclaimer
14# in the documentation and/or other materials provided with the
15# distribution.
16# * Neither the name of Google Inc. nor the names of its
17# contributors may be used to endorse or promote products derived from
18# this software without specific prior written permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31# ---
32#
33# Generate a C include file from a finite state machine definition.
34#
35# Right now the form is the one expected by htmlparser.c so this file is pretty
36# tightly coupled with htmlparser.c.
37#
38
39__author__ = 'falmeida@google.com (Filipe Almeida)'
40
41import sys
42
43from fsm_config import FSMConfig
44
45
46class FSMGenerateAbstract(object):
47
48 def __init__(self, config):
49 self._config = config
50
51 def Generate(self):
52 """Returns the generated FSM description for the specified language.
53
54 Raises a TypeError, because abstract methods can not be called.
55
56 Raises:
57 TypeError
58 """
59 raise TypeError('Abstract method %s.%s called' % (self._class.__name__,
60 self._function))
61
62
63class FSMGenerateC(FSMGenerateAbstract):
64 """Generate the C definition from a statemachien configuration object."""
65
66 TABSTOP_ = 2
67
68 def _Prefix(self):
69 """Return a c declaration prefix."""
70
71 return self._config.name.lower() + '_'
72
73 def _StateInternalC(self, st):
74 """Return the internal name of the state."""
75
76 return '%sSTATE_INT_%s' % (self._Prefix().upper(), st.upper())
77
78 def _StateExternalC(self, st):
79 """Return the external name of the state."""
80
81 return '%sSTATE_%s' % (self._Prefix().upper(), st.upper())
82
83 def _MakeTuple(self, data):
84 """Converts data to a string representation of a C tuple."""
85
86 return '{ %s }' % ', '.join(data)
87
88 def _CreateHeader(self):
89 """Print the include file header."""
90
91 out = []
92
93 if self._config.comment:
94 out.append('/* ' + self._config.comment)
95 else:
96 out.append('/* State machine definition for ' + self._config.name)
97 out.append(' * Auto generated by generate_fsm.py. Please do not edit.')
98 out.append(' */')
99
100 return '\n'.join(out)
101
102 def _ListToIndentedString(self, list):
103 indented_list = [' ' + e for e in list]
104 return ',\n'.join(indented_list)
105
106 def _CreateEnum(self, name, data):
107 """Print a c enum definition."""
108
109 return 'enum %s {\n%s\n};\n' % (name,
110 self._ListToIndentedString(data))
111
112 def _CreateStructList(self, name, type, data):
113 """Print a c flat list.
114
115 Generic function to print list in c in the form of a struct.
116
117 Args:
118 name: name of the structure.
119 type: type of the struct.
120 data: contents of the struct as a list of elements
121
122 Returns:
123 String with the generated list.
124 """
125
126 return "static const %s %s[] = {\n%s\n};\n" % (
127 type,
128 name,
129 self._ListToIndentedString(data))
130
131 def _CreateStatesEnum(self):
132 """Print the internal states enum.
133
134 Prints an enum containing all the valid states.
135
136 Returns:
137 String containing a C enumeration of the states.
138 """
139 list = [] # output list
140
141 for state in self._config.states:
142 list.append(self._StateInternalC(state))
143 return self._CreateEnum(self._Prefix() + 'state_internal_enum', list)
144
145 def _CreateStatesExternal(self):
146 """Print a struct with a mapping from internal to external states."""
147 list = [] # output list
148
149 for state_name in self._config.states:
150 list.append(self._StateExternalC(
151 self._config.states[state_name].external_name))
152
153 return self._CreateStructList(self._Prefix() + 'states_external',
154 'int',
155 list)
156
157 def _CreateStatesInternalNames(self):
158 """Return a struct mapping internal states to a strings."""
159 out = [] # output list
160
161 for state_name in self._config.states:
162 out.append('"' + state_name + '"')
163
164 return self._CreateStructList(self._Prefix() + 'states_internal_names',
165 'char *',
166 out)
167
168 def _CreateNumStates(self):
169 """Print a Macro defining the number of states."""
170
171 return "#define %s_NUM_STATES %s" % (self._config.name.upper(),
172 str(len(self._config.states) + 1))
173
174 def _ExpandBracketExpression(self, expression):
175 """Expand ranges in a regexp bracket expression.
176
177 Returns a string with the ranges in a bracket expression expanded.
178
179 The bracket expression is similar to grep(1) or regular expression bracket
180 expressions but it does not support the negation (^) modifier or named
181 character classes like [:alpha:] or [:alnum:].
182
183 The especial character class [:default:] will expand to all elements in the
184 ascii range.
185
186 For example, the expression 'a-c13A-D' will expand to 'abc13ABCD'.
187
188 Args:
189 expression: A regexp bracket expression. Ie: 'A-Z0-9'.
190
191 Returns:
192 A string with the ranges in the bracket expression expanded.
193 """
194
195 def ExpandRange(start, end):
196 """Return a sequence of characters between start and end.
197
198 Args:
199 start: first character of the sequence.
200 end: last character of the sequence.
201
202 Returns:
203 string containing the sequence of characters between start and end.
204 """
205 return [chr(c) for c in range(ord(start), ord(end) + 1)]
206
207 def ListNext(input_list):
208 """Pop the first element of a list.
209
210 Args:
211 input_list: python list object.
212
213 Returns:
214 First element of the list or None if the list is empty.
215 """
216 if input_list:
217 return input_list.pop(0)
218 else:
219 return None
220
221 out = [] # List containing the output
222
223 # Special case for the character class [:default:]
224 if expression == '[:default:]':
225 out = [chr(c) for c in range(0, 255)]
226 return ''.join(out)
227
228 chars = [c for c in expression] # list o characters in the expression.
229
230 current = ListNext(chars)
231 while current:
232 next = ListNext(chars)
233 if next == '-':
234 next = ListNext(chars)
235 if next:
236 out.extend(ExpandRange(current, next))
237 else:
238 out.append(current)
239 out.append('-')
240 current = ListNext(chars)
241 else:
242 out.append(current)
243 current = next
244
245 return ''.join(out)
246
247 def _CreateTransitionTable(self):
248 """Print the state transition list.
249
250 Returns a set of C structures that define the transition table for the state
251 machine. This structure is a list of lists of ints (int **). The outer list
252 indexes the source state and the inner list contains the destination state
253 for each of the possible input characters:
254
255 const int * const* transitions[source][input] == destination.
256
257 The conditions are mapped from the conditions variable.
258
259 Returns:
260 String containing the generated transition table in a C struct.
261 """
262 out = [] # output list
263 default_state = 'STATEMACHINE_ERROR'
264 state_table = {}
265
266 for state in self._config.states:
267 state_table[state] = [default_state for col in xrange(255)]
268
269 # We process the transition in reverse order while updating the table.
270 for i_transition in range(len(self._config.transitions) - 1, -1, -1):
271 transition = self._config.transitions[i_transition]
272 (condition_name, src, dst) = (transition.condition,
273 transition.source,
274 transition.destination)
275 condition = self._config.conditions[condition_name]
276 char_list = self._ExpandBracketExpression(condition)
277
278 for c in char_list:
279 state_table[src][ord(c)] = self._StateInternalC(dst)
280
281 # Create the inner lists which map input characters to destination states.
282 for state in self._config.states:
283 transition_row = []
284 for c in xrange(0, 255):
285 transition_row.append(' /* %06s */ %s' % (repr(chr(c)),
286 state_table[state][c]))
287
288 out.append(self._CreateStructList('%stransition_row_%s' %
289 (self._Prefix(),
290 state),
291 'int',
292 transition_row))
293 out.append('\n')
294
295 # Create the outer list, which map source states to input characters.
296 out.append('static const %s %s[] = {\n' % ('int *', self._Prefix() +
297 'state_transitions'))
298
299 row_list = [' %stransition_row_%s' %
300 (self._Prefix(), row) for row in self._config.states]
301 out.append(',\n'.join(row_list))
302 out.append('\n};\n')
303
304 return ''.join(out)
305
306 def Generate(self):
307 """Returns the generated the C include statements for the statemachine."""
308
309 print '\n'.join((self._CreateHeader(),
310 self._CreateNumStates(),
311 self._CreateStatesEnum(),
312 self._CreateStatesExternal(),
313 self._CreateStatesInternalNames(),
314 self._CreateTransitionTable()))
315
316
317def main():
318 if len(sys.argv) != 2:
319 print "usage: generate_fsm.py config_file"
320 sys.exit(1)
321
322 config = FSMConfig()
323 config.Load(sys.argv[1])
324
325 gen = FSMGenerateC(config)
326 gen.Generate()
327
328
329if __name__ == "__main__":
330 main()