Squashed 'third_party/ctemplate/' content from commit 6742f62

Change-Id: I828e4e4c906f13ba19944d78a8a78652b62949af
git-subtree-dir: third_party/ctemplate
git-subtree-split: 6742f6233db12f545e90baa8f34f5c29c4eb396a
diff --git a/src/htmlparser/generate_fsm.py b/src/htmlparser/generate_fsm.py
new file mode 100755
index 0000000..9106b96
--- /dev/null
+++ b/src/htmlparser/generate_fsm.py
@@ -0,0 +1,330 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ---
+#
+# Generate a C include file from a finite state machine definition.
+#
+# Right now the form is the one expected by htmlparser.c so this file is pretty
+# tightly coupled with htmlparser.c.
+#
+
+__author__ = 'falmeida@google.com (Filipe Almeida)'
+
+import sys
+
+from fsm_config import FSMConfig
+
+
+class FSMGenerateAbstract(object):
+
+  def __init__(self, config):
+    self._config = config
+
+  def Generate(self):
+    """Returns the generated FSM description for the specified language.
+
+    Raises a TypeError, because abstract methods can not be called.
+
+    Raises:
+      TypeError
+    """
+    raise TypeError('Abstract method %s.%s called' % (self._class.__name__,
+                                                      self._function))
+
+
+class FSMGenerateC(FSMGenerateAbstract):
+  """Generate the C definition from a statemachien configuration object."""
+
+  TABSTOP_ = 2
+
+  def _Prefix(self):
+    """Return a c declaration prefix."""
+
+    return self._config.name.lower() + '_'
+
+  def _StateInternalC(self, st):
+    """Return the internal name of the state."""
+
+    return '%sSTATE_INT_%s' % (self._Prefix().upper(), st.upper())
+
+  def _StateExternalC(self, st):
+    """Return the external name of the state."""
+
+    return '%sSTATE_%s' % (self._Prefix().upper(), st.upper())
+
+  def _MakeTuple(self, data):
+    """Converts data to a string representation of a C tuple."""
+
+    return '{ %s }' % ', '.join(data)
+
+  def _CreateHeader(self):
+    """Print the include file header."""
+
+    out = []
+
+    if self._config.comment:
+      out.append('/* ' + self._config.comment)
+    else:
+      out.append('/* State machine definition for ' + self._config.name)
+    out.append(' * Auto generated by generate_fsm.py. Please do not edit.')
+    out.append(' */')
+
+    return '\n'.join(out)
+
+  def _ListToIndentedString(self, list):
+    indented_list = ['  ' + e for e in list]
+    return ',\n'.join(indented_list)
+
+  def _CreateEnum(self, name, data):
+    """Print a c enum definition."""
+
+    return 'enum %s {\n%s\n};\n' % (name,
+                                    self._ListToIndentedString(data))
+
+  def _CreateStructList(self, name, type, data):
+    """Print a c flat list.
+
+    Generic function to print list in c in the form of a struct.
+
+    Args:
+      name: name of the structure.
+      type: type of the struct.
+      data: contents of the struct as a list of elements
+
+    Returns:
+      String with the generated list.
+    """
+
+    return "static const %s %s[] = {\n%s\n};\n" % (
+        type,
+        name,
+        self._ListToIndentedString(data))
+
+  def _CreateStatesEnum(self):
+    """Print the internal states enum.
+
+    Prints an enum containing all the valid states.
+
+    Returns:
+      String containing a C enumeration of the states.
+    """
+    list = []  # output list
+
+    for state in self._config.states:
+      list.append(self._StateInternalC(state))
+    return self._CreateEnum(self._Prefix() + 'state_internal_enum', list)
+
+  def _CreateStatesExternal(self):
+    """Print a struct with a mapping from internal to external states."""
+    list = []  # output list
+
+    for state_name in self._config.states:
+      list.append(self._StateExternalC(
+                                self._config.states[state_name].external_name))
+
+    return self._CreateStructList(self._Prefix() + 'states_external',
+                                  'int',
+                                  list)
+
+  def _CreateStatesInternalNames(self):
+    """Return a struct mapping internal states to a strings."""
+    out = []  # output list
+
+    for state_name in self._config.states:
+      out.append('"' + state_name + '"')
+
+    return self._CreateStructList(self._Prefix() + 'states_internal_names',
+                                  'char *',
+                                  out)
+
+  def _CreateNumStates(self):
+    """Print a Macro defining the number of states."""
+
+    return "#define %s_NUM_STATES %s" % (self._config.name.upper(),
+                                         str(len(self._config.states) + 1))
+
+  def _ExpandBracketExpression(self, expression):
+    """Expand ranges in a regexp bracket expression.
+
+    Returns a string with the ranges in a bracket expression expanded.
+
+    The bracket expression is similar to grep(1) or regular expression bracket
+    expressions but it does not support the negation (^) modifier or named
+    character classes like [:alpha:] or [:alnum:].
+
+    The especial character class [:default:] will expand to all elements in the
+    ascii range.
+
+    For example, the expression 'a-c13A-D' will expand to 'abc13ABCD'.
+
+    Args:
+      expression: A regexp bracket expression. Ie: 'A-Z0-9'.
+
+    Returns:
+      A string with the ranges in the bracket expression expanded.
+    """
+
+    def ExpandRange(start, end):
+      """Return a sequence of characters between start and end.
+
+      Args:
+        start: first character of the sequence.
+        end: last character of the sequence.
+
+      Returns:
+        string containing the sequence of characters between start and end.
+      """
+      return [chr(c) for c in range(ord(start), ord(end) + 1)]
+
+    def ListNext(input_list):
+      """Pop the first element of a list.
+
+      Args:
+        input_list: python list object.
+
+      Returns:
+        First element of the list or None if the list is empty.
+      """
+      if input_list:
+        return input_list.pop(0)
+      else:
+        return None
+
+    out = []  # List containing the output
+
+    # Special case for the character class [:default:]
+    if expression == '[:default:]':
+      out = [chr(c) for c in range(0, 255)]
+      return ''.join(out)
+
+    chars = [c for c in expression]  # list o characters in the expression.
+
+    current = ListNext(chars)
+    while current:
+      next = ListNext(chars)
+      if next == '-':
+        next = ListNext(chars)
+        if next:
+          out.extend(ExpandRange(current, next))
+        else:
+          out.append(current)
+          out.append('-')
+        current = ListNext(chars)
+      else:
+        out.append(current)
+        current = next
+
+    return ''.join(out)
+
+  def _CreateTransitionTable(self):
+    """Print the state transition list.
+
+    Returns a set of C structures that define the transition table for the state
+    machine. This structure is a list of lists of ints (int **). The outer list
+    indexes the source state and the inner list contains the destination state
+    for each of the possible input characters:
+
+    const int * const* transitions[source][input] == destination.
+
+    The conditions are mapped from the conditions variable.
+
+    Returns:
+      String containing the generated transition table in a C struct.
+    """
+    out = []          # output list
+    default_state = 'STATEMACHINE_ERROR'
+    state_table = {}
+
+    for state in self._config.states:
+      state_table[state] = [default_state for col in xrange(255)]
+
+    # We process the transition in reverse order while updating the table.
+    for i_transition in range(len(self._config.transitions) - 1, -1, -1):
+      transition = self._config.transitions[i_transition]
+      (condition_name, src, dst) = (transition.condition,
+                                    transition.source,
+                                    transition.destination)
+      condition = self._config.conditions[condition_name]
+      char_list = self._ExpandBracketExpression(condition)
+
+      for c in char_list:
+        state_table[src][ord(c)] = self._StateInternalC(dst)
+
+    # Create the inner lists which map input characters to destination states.
+    for state in self._config.states:
+      transition_row = []
+      for c in xrange(0, 255):
+        transition_row.append('    /* %06s */ %s' % (repr(chr(c)),
+                                                     state_table[state][c]))
+
+      out.append(self._CreateStructList('%stransition_row_%s' %
+                                        (self._Prefix(),
+                                         state),
+                                        'int',
+                                        transition_row))
+      out.append('\n')
+
+    # Create the outer list, which map source states to input characters.
+    out.append('static const %s %s[] = {\n' % ('int *', self._Prefix() +
+                                               'state_transitions'))
+
+    row_list = ['  %stransition_row_%s' %
+                (self._Prefix(), row) for row in self._config.states]
+    out.append(',\n'.join(row_list))
+    out.append('\n};\n')
+
+    return ''.join(out)
+
+  def Generate(self):
+    """Returns the generated the C include statements for the statemachine."""
+
+    print '\n'.join((self._CreateHeader(),
+                     self._CreateNumStates(),
+                     self._CreateStatesEnum(),
+                     self._CreateStatesExternal(),
+                     self._CreateStatesInternalNames(),
+                     self._CreateTransitionTable()))
+
+
+def main():
+  if len(sys.argv) != 2:
+    print "usage: generate_fsm.py config_file"
+    sys.exit(1)
+
+  config = FSMConfig()
+  config.Load(sys.argv[1])
+
+  gen = FSMGenerateC(config)
+  gen.Generate()
+
+
+if __name__ == "__main__":
+  main()