Brian Silverman | 70325d6 | 2015-09-20 17:00:43 -0400 | [diff] [blame^] | 1 | #!/usr/bin/env python |
| 2 | # |
| 3 | # Copyright (c) 2008, Google Inc. |
| 4 | # All rights reserved. |
| 5 | # |
| 6 | # Redistribution and use in source and binary forms, with or without |
| 7 | # modification, are permitted provided that the following conditions are |
| 8 | # met: |
| 9 | # |
| 10 | # * Redistributions of source code must retain the above copyright |
| 11 | # notice, this list of conditions and the following disclaimer. |
| 12 | # * Redistributions in binary form must reproduce the above |
| 13 | # copyright notice, this list of conditions and the following disclaimer |
| 14 | # in the documentation and/or other materials provided with the |
| 15 | # distribution. |
| 16 | # * Neither the name of Google Inc. nor the names of its |
| 17 | # contributors may be used to endorse or promote products derived from |
| 18 | # this software without specific prior written permission. |
| 19 | # |
| 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 21 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 22 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 23 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 24 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 25 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 26 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 27 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 28 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 29 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 31 | # --- |
| 32 | # |
| 33 | # Generate a C include file from a finite state machine definition. |
| 34 | # |
| 35 | # Right now the form is the one expected by htmlparser.c so this file is pretty |
| 36 | # tightly coupled with htmlparser.c. |
| 37 | # |
| 38 | |
| 39 | __author__ = 'falmeida@google.com (Filipe Almeida)' |
| 40 | |
| 41 | import sys |
| 42 | |
| 43 | from fsm_config import FSMConfig |
| 44 | |
| 45 | |
| 46 | class FSMGenerateAbstract(object): |
| 47 | |
| 48 | def __init__(self, config): |
| 49 | self._config = config |
| 50 | |
| 51 | def Generate(self): |
| 52 | """Returns the generated FSM description for the specified language. |
| 53 | |
| 54 | Raises a TypeError, because abstract methods can not be called. |
| 55 | |
| 56 | Raises: |
| 57 | TypeError |
| 58 | """ |
| 59 | raise TypeError('Abstract method %s.%s called' % (self._class.__name__, |
| 60 | self._function)) |
| 61 | |
| 62 | |
| 63 | class FSMGenerateC(FSMGenerateAbstract): |
| 64 | """Generate the C definition from a statemachien configuration object.""" |
| 65 | |
| 66 | TABSTOP_ = 2 |
| 67 | |
| 68 | def _Prefix(self): |
| 69 | """Return a c declaration prefix.""" |
| 70 | |
| 71 | return self._config.name.lower() + '_' |
| 72 | |
| 73 | def _StateInternalC(self, st): |
| 74 | """Return the internal name of the state.""" |
| 75 | |
| 76 | return '%sSTATE_INT_%s' % (self._Prefix().upper(), st.upper()) |
| 77 | |
| 78 | def _StateExternalC(self, st): |
| 79 | """Return the external name of the state.""" |
| 80 | |
| 81 | return '%sSTATE_%s' % (self._Prefix().upper(), st.upper()) |
| 82 | |
| 83 | def _MakeTuple(self, data): |
| 84 | """Converts data to a string representation of a C tuple.""" |
| 85 | |
| 86 | return '{ %s }' % ', '.join(data) |
| 87 | |
| 88 | def _CreateHeader(self): |
| 89 | """Print the include file header.""" |
| 90 | |
| 91 | out = [] |
| 92 | |
| 93 | if self._config.comment: |
| 94 | out.append('/* ' + self._config.comment) |
| 95 | else: |
| 96 | out.append('/* State machine definition for ' + self._config.name) |
| 97 | out.append(' * Auto generated by generate_fsm.py. Please do not edit.') |
| 98 | out.append(' */') |
| 99 | |
| 100 | return '\n'.join(out) |
| 101 | |
| 102 | def _ListToIndentedString(self, list): |
| 103 | indented_list = [' ' + e for e in list] |
| 104 | return ',\n'.join(indented_list) |
| 105 | |
| 106 | def _CreateEnum(self, name, data): |
| 107 | """Print a c enum definition.""" |
| 108 | |
| 109 | return 'enum %s {\n%s\n};\n' % (name, |
| 110 | self._ListToIndentedString(data)) |
| 111 | |
| 112 | def _CreateStructList(self, name, type, data): |
| 113 | """Print a c flat list. |
| 114 | |
| 115 | Generic function to print list in c in the form of a struct. |
| 116 | |
| 117 | Args: |
| 118 | name: name of the structure. |
| 119 | type: type of the struct. |
| 120 | data: contents of the struct as a list of elements |
| 121 | |
| 122 | Returns: |
| 123 | String with the generated list. |
| 124 | """ |
| 125 | |
| 126 | return "static const %s %s[] = {\n%s\n};\n" % ( |
| 127 | type, |
| 128 | name, |
| 129 | self._ListToIndentedString(data)) |
| 130 | |
| 131 | def _CreateStatesEnum(self): |
| 132 | """Print the internal states enum. |
| 133 | |
| 134 | Prints an enum containing all the valid states. |
| 135 | |
| 136 | Returns: |
| 137 | String containing a C enumeration of the states. |
| 138 | """ |
| 139 | list = [] # output list |
| 140 | |
| 141 | for state in self._config.states: |
| 142 | list.append(self._StateInternalC(state)) |
| 143 | return self._CreateEnum(self._Prefix() + 'state_internal_enum', list) |
| 144 | |
| 145 | def _CreateStatesExternal(self): |
| 146 | """Print a struct with a mapping from internal to external states.""" |
| 147 | list = [] # output list |
| 148 | |
| 149 | for state_name in self._config.states: |
| 150 | list.append(self._StateExternalC( |
| 151 | self._config.states[state_name].external_name)) |
| 152 | |
| 153 | return self._CreateStructList(self._Prefix() + 'states_external', |
| 154 | 'int', |
| 155 | list) |
| 156 | |
| 157 | def _CreateStatesInternalNames(self): |
| 158 | """Return a struct mapping internal states to a strings.""" |
| 159 | out = [] # output list |
| 160 | |
| 161 | for state_name in self._config.states: |
| 162 | out.append('"' + state_name + '"') |
| 163 | |
| 164 | return self._CreateStructList(self._Prefix() + 'states_internal_names', |
| 165 | 'char *', |
| 166 | out) |
| 167 | |
| 168 | def _CreateNumStates(self): |
| 169 | """Print a Macro defining the number of states.""" |
| 170 | |
| 171 | return "#define %s_NUM_STATES %s" % (self._config.name.upper(), |
| 172 | str(len(self._config.states) + 1)) |
| 173 | |
| 174 | def _ExpandBracketExpression(self, expression): |
| 175 | """Expand ranges in a regexp bracket expression. |
| 176 | |
| 177 | Returns a string with the ranges in a bracket expression expanded. |
| 178 | |
| 179 | The bracket expression is similar to grep(1) or regular expression bracket |
| 180 | expressions but it does not support the negation (^) modifier or named |
| 181 | character classes like [:alpha:] or [:alnum:]. |
| 182 | |
| 183 | The especial character class [:default:] will expand to all elements in the |
| 184 | ascii range. |
| 185 | |
| 186 | For example, the expression 'a-c13A-D' will expand to 'abc13ABCD'. |
| 187 | |
| 188 | Args: |
| 189 | expression: A regexp bracket expression. Ie: 'A-Z0-9'. |
| 190 | |
| 191 | Returns: |
| 192 | A string with the ranges in the bracket expression expanded. |
| 193 | """ |
| 194 | |
| 195 | def ExpandRange(start, end): |
| 196 | """Return a sequence of characters between start and end. |
| 197 | |
| 198 | Args: |
| 199 | start: first character of the sequence. |
| 200 | end: last character of the sequence. |
| 201 | |
| 202 | Returns: |
| 203 | string containing the sequence of characters between start and end. |
| 204 | """ |
| 205 | return [chr(c) for c in range(ord(start), ord(end) + 1)] |
| 206 | |
| 207 | def ListNext(input_list): |
| 208 | """Pop the first element of a list. |
| 209 | |
| 210 | Args: |
| 211 | input_list: python list object. |
| 212 | |
| 213 | Returns: |
| 214 | First element of the list or None if the list is empty. |
| 215 | """ |
| 216 | if input_list: |
| 217 | return input_list.pop(0) |
| 218 | else: |
| 219 | return None |
| 220 | |
| 221 | out = [] # List containing the output |
| 222 | |
| 223 | # Special case for the character class [:default:] |
| 224 | if expression == '[:default:]': |
| 225 | out = [chr(c) for c in range(0, 255)] |
| 226 | return ''.join(out) |
| 227 | |
| 228 | chars = [c for c in expression] # list o characters in the expression. |
| 229 | |
| 230 | current = ListNext(chars) |
| 231 | while current: |
| 232 | next = ListNext(chars) |
| 233 | if next == '-': |
| 234 | next = ListNext(chars) |
| 235 | if next: |
| 236 | out.extend(ExpandRange(current, next)) |
| 237 | else: |
| 238 | out.append(current) |
| 239 | out.append('-') |
| 240 | current = ListNext(chars) |
| 241 | else: |
| 242 | out.append(current) |
| 243 | current = next |
| 244 | |
| 245 | return ''.join(out) |
| 246 | |
| 247 | def _CreateTransitionTable(self): |
| 248 | """Print the state transition list. |
| 249 | |
| 250 | Returns a set of C structures that define the transition table for the state |
| 251 | machine. This structure is a list of lists of ints (int **). The outer list |
| 252 | indexes the source state and the inner list contains the destination state |
| 253 | for each of the possible input characters: |
| 254 | |
| 255 | const int * const* transitions[source][input] == destination. |
| 256 | |
| 257 | The conditions are mapped from the conditions variable. |
| 258 | |
| 259 | Returns: |
| 260 | String containing the generated transition table in a C struct. |
| 261 | """ |
| 262 | out = [] # output list |
| 263 | default_state = 'STATEMACHINE_ERROR' |
| 264 | state_table = {} |
| 265 | |
| 266 | for state in self._config.states: |
| 267 | state_table[state] = [default_state for col in xrange(255)] |
| 268 | |
| 269 | # We process the transition in reverse order while updating the table. |
| 270 | for i_transition in range(len(self._config.transitions) - 1, -1, -1): |
| 271 | transition = self._config.transitions[i_transition] |
| 272 | (condition_name, src, dst) = (transition.condition, |
| 273 | transition.source, |
| 274 | transition.destination) |
| 275 | condition = self._config.conditions[condition_name] |
| 276 | char_list = self._ExpandBracketExpression(condition) |
| 277 | |
| 278 | for c in char_list: |
| 279 | state_table[src][ord(c)] = self._StateInternalC(dst) |
| 280 | |
| 281 | # Create the inner lists which map input characters to destination states. |
| 282 | for state in self._config.states: |
| 283 | transition_row = [] |
| 284 | for c in xrange(0, 255): |
| 285 | transition_row.append(' /* %06s */ %s' % (repr(chr(c)), |
| 286 | state_table[state][c])) |
| 287 | |
| 288 | out.append(self._CreateStructList('%stransition_row_%s' % |
| 289 | (self._Prefix(), |
| 290 | state), |
| 291 | 'int', |
| 292 | transition_row)) |
| 293 | out.append('\n') |
| 294 | |
| 295 | # Create the outer list, which map source states to input characters. |
| 296 | out.append('static const %s %s[] = {\n' % ('int *', self._Prefix() + |
| 297 | 'state_transitions')) |
| 298 | |
| 299 | row_list = [' %stransition_row_%s' % |
| 300 | (self._Prefix(), row) for row in self._config.states] |
| 301 | out.append(',\n'.join(row_list)) |
| 302 | out.append('\n};\n') |
| 303 | |
| 304 | return ''.join(out) |
| 305 | |
| 306 | def Generate(self): |
| 307 | """Returns the generated the C include statements for the statemachine.""" |
| 308 | |
| 309 | print '\n'.join((self._CreateHeader(), |
| 310 | self._CreateNumStates(), |
| 311 | self._CreateStatesEnum(), |
| 312 | self._CreateStatesExternal(), |
| 313 | self._CreateStatesInternalNames(), |
| 314 | self._CreateTransitionTable())) |
| 315 | |
| 316 | |
| 317 | def main(): |
| 318 | if len(sys.argv) != 2: |
| 319 | print "usage: generate_fsm.py config_file" |
| 320 | sys.exit(1) |
| 321 | |
| 322 | config = FSMConfig() |
| 323 | config.Load(sys.argv[1]) |
| 324 | |
| 325 | gen = FSMGenerateC(config) |
| 326 | gen.Generate() |
| 327 | |
| 328 | |
| 329 | if __name__ == "__main__": |
| 330 | main() |