Merge commit '3e013f9387c8370dd9bd729e05799cbdcb924005' as 'third_party/ctemplate'

Change-Id: Ie20726199c36cc8bf7372e1f6f4a547c29c18cc5
diff --git a/third_party/ctemplate/src/htmlparser/statemachine.cc b/third_party/ctemplate/src/htmlparser/statemachine.cc
new file mode 100644
index 0000000..00fbe26
--- /dev/null
+++ b/third_party/ctemplate/src/htmlparser/statemachine.cc
@@ -0,0 +1,457 @@
+/* Copyright (c) 2007, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ---
+ *
+ * Author: falmeida@google.com (Filipe Almeida)
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "htmlparser/statemachine.h"
+
+/* So we can support both C and C++ compilers, we use the CAST() macro instead
+ * of using C style casts or static_cast<>() directly.
+ */
+#ifdef __cplusplus
+  #define CAST(type, expression) (static_cast<type>(expression))
+#else
+  #define CAST(type, expression) ((type)(expression))
+#endif
+
+#ifdef __cplusplus
+namespace ctemplate_htmlparser {
+#endif
+
+#define MAX_CHAR_8BIT 256
+
+/* Populates the statemachine definition.
+ */
+void statemachine_definition_populate(statemachine_definition *def,
+                                      const int* const* transition_table,
+                                      const char* const* state_names)
+{
+  assert(def != NULL);
+  assert(transition_table != NULL);
+
+  def->transition_table = transition_table;
+
+  def->state_names = state_names;
+}
+
+/* Add's the callback for the event in_state that is called when the
+ * statemachine is in state st.
+ *
+ * This event is called everytime the the statemachine is in the specified
+ * state forevery character in the input stream even if the state remains
+ * the same.
+ *
+ * This is event is the last event to be called and is fired after both events
+ * exit_state and enter_state.
+ */
+void statemachine_in_state(statemachine_definition *def, int st,
+                           state_event_function func)
+{
+    assert(def != NULL);
+    assert(st < def->num_states);
+    def->in_state_events[st] = func;
+}
+
+/* Add's the callback for the event enter_state that is called when the
+ * statemachine enters state st.
+ *
+ * This event is fired after the event exit_state but before the event
+ * in_state.
+ */
+void statemachine_enter_state(statemachine_definition *def, int st,
+                              state_event_function func)
+{
+    assert(def != NULL);
+    assert(st < def->num_states);
+    def->enter_state_events[st] = func;
+}
+
+/* Add's the callback for the event exit_state that is called when the
+ * statemachine exits from state st.
+ *
+ * This is the first event to be called and is fired before both the events
+ * enter_state and in_state.
+ */
+void statemachine_exit_state(statemachine_definition *def, int st,
+                             state_event_function func)
+{
+    assert(def != NULL);
+    assert(st < def->num_states);
+    def->exit_state_events[st] = func;
+}
+
+/* Initializes a new statemachine definition with a defined number of states.
+ *
+ * Returns NULL if initialization fails.
+ *
+ * Initialization failure is fatal, and if this function fails it may not
+ * deallocate all previsouly allocated memory.
+ */
+statemachine_definition *statemachine_definition_new(int states)
+{
+    statemachine_definition *def;
+    def = CAST(statemachine_definition *,
+               malloc(sizeof(statemachine_definition)));
+    if (def == NULL)
+      return NULL;
+
+    def->in_state_events = CAST(state_event_function *,
+                                calloc(states, sizeof(state_event_function)));
+    if (def->in_state_events == NULL)
+      return NULL;
+
+    def->enter_state_events =CAST(state_event_function *,
+                                   calloc(states,
+                                          sizeof(state_event_function)));
+    if (def->enter_state_events == NULL)
+      return NULL;
+
+    def->exit_state_events = CAST(state_event_function *,
+                                  calloc(states, sizeof(state_event_function)));
+    if (def->exit_state_events == NULL)
+      return NULL;
+
+    def->num_states = states;
+    def->state_names = NULL;
+    return def;
+}
+
+/* Deallocates a statemachine definition object
+ */
+void statemachine_definition_delete(statemachine_definition *def)
+{
+    assert(def != NULL);
+    free(def->in_state_events);
+    free(def->enter_state_events);
+    free(def->exit_state_events);
+    free(def);
+}
+
+/* Returns the current state.
+ */
+int statemachine_get_state(statemachine_ctx *ctx) {
+  return ctx->current_state;
+}
+
+/* Sets the current state.
+ *
+ * It calls the exit event for the old state and the enter event for the state
+ * we intend to move into.
+ *
+ * Since this state change was not initiated by a character in the input stream
+ * we pass a null char to the event functions.
+ */
+void statemachine_set_state(statemachine_ctx *ctx, int state)
+{
+
+  statemachine_definition *def;
+
+  assert(ctx != NULL);
+  assert(ctx->definition != NULL);
+
+  def = ctx->definition;
+
+  assert(state < def->num_states);
+
+  ctx->next_state = state;
+
+  if (ctx->current_state != ctx->next_state) {
+    if (def->exit_state_events[ctx->current_state])
+       def->exit_state_events[ctx->current_state](ctx,
+                                                 ctx->current_state,
+                                                 '\0',
+                                                 ctx->next_state);
+
+    if (def->enter_state_events[ctx->next_state])
+       def->enter_state_events[ctx->next_state](ctx,
+                                               ctx->current_state,
+                                               '\0',
+                                               ctx->next_state);
+  }
+
+  ctx->current_state = state;
+}
+
+/* Reset the statemachine.
+ *
+ * The state is set to the initialization values. This includes setting the
+ * state to the default state (0), stopping recording and setting the line
+ * number to 1.
+ */
+void statemachine_reset(statemachine_ctx *ctx)
+{
+  ctx->current_state = 0;
+  ctx->next_state = 0;
+  ctx->record_buffer[0] = '\0';
+  ctx->record_pos = 0;
+  ctx->recording = 0;
+  ctx->line_number = 1;
+  ctx->column_number = 1;
+}
+
+/* Initializes a new statemachine. Receives a statemachine definition object
+ * that should have been initialized with statemachine_definition_new() and a
+ * user reference to be used by the caller.
+ *
+ * The user reference is used by the caller to store any instance specific data
+ * the caller may need and is typically used to propagate context information
+ * to the event callbacks. The user pointer can just be set to NULL if the
+ * caller doesn't need it.
+ *
+ * Returns NULL if initialization fails.
+ *
+ * Initialization failure is fatal, and if this function fails it may not
+ * deallocate all previously allocated memory.
+ */
+statemachine_ctx *statemachine_new(statemachine_definition *def,
+                                   void *user)
+{
+    statemachine_ctx *ctx;
+    assert(def != NULL);
+    ctx = CAST(statemachine_ctx *, malloc(sizeof(statemachine_ctx)));
+    if (ctx == NULL)
+      return NULL;
+
+    statemachine_reset(ctx);
+
+    ctx->definition = def;
+    ctx->user = user;
+
+    return ctx;
+}
+
+/* Returns a pointer to a context which is a duplicate of the statemachine src.
+ * The statemachine definition and the user pointer have to be provided since
+ * these references are not owned by the statemachine itself, but this will be
+ * shallow copies as they point to data structures we do not own.
+ */
+statemachine_ctx *statemachine_duplicate(statemachine_ctx *src,
+                                         statemachine_definition *def,
+                                         void *user)
+{
+    statemachine_ctx *dst;
+    assert(src != NULL);
+    dst = statemachine_new(def, user);
+    if (dst == NULL)
+      return NULL;
+
+    statemachine_copy(dst, src, def, user);
+
+    return dst;
+}
+
+/* Copies the context of the statemachine pointed to by src to the statemachine
+ * provided by dst.
+ * The statemachine definition and the user pointer have to be provided since
+ * these references are not owned by the statemachine itself.
+ */
+void statemachine_copy(statemachine_ctx *dst,
+                       statemachine_ctx *src,
+                       statemachine_definition *def,
+                       void *user)
+{
+    memcpy(dst, src, sizeof(statemachine_ctx));
+    dst->definition = def;
+    dst->user = user;
+}
+
+/* Deallocates a statemachine object
+ */
+void statemachine_delete(statemachine_ctx *ctx)
+{
+    assert(ctx != NULL);
+    free(ctx);
+}
+
+/* Starts recording the current input stream into an internal buffer.
+ * The current input character is included in the recording.
+ */
+void statemachine_start_record(statemachine_ctx *ctx)
+{
+    assert(ctx != NULL);
+    ctx->record_buffer[0] = '\0';
+    ctx->record_pos = 0;
+    ctx->recording = 1;
+}
+
+/* Stops recording the current input stream.
+ * The last input character is not included in the recording.
+ * This function returns a pointer to the recorded string buffer.
+ */
+const char *statemachine_stop_record(statemachine_ctx *ctx)
+{
+    assert(ctx != NULL);
+    assert(ctx->recording);
+    ctx->record_buffer[ctx->record_pos] = '\0';
+    ctx->recording = 0;
+    return ctx->record_buffer;
+}
+
+ /* Returns a pointer to the record string buffer.
+ */
+const char *statemachine_record_buffer(statemachine_ctx *ctx)
+{
+    return ctx->record_buffer;
+}
+
+void statemachine_encode_char(char schr, char *output, size_t len)
+{
+  unsigned char chr = schr;
+  if (chr == '\'') {
+    strncpy(output, "\\'", len);
+  } else if (chr == '\\') {
+    strncpy(output, "\\\\", len);
+
+  /* Like isprint() but not dependent on locale. */
+  } else if (chr >= 32 && chr <= 126) {
+    snprintf(output, len, "%c", chr);
+  } else if (chr == '\n') {
+    strncpy(output, "\\n", len);
+  } else if (chr == '\r') {
+    strncpy(output, "\\r", len);
+  } else if (chr == '\t') {
+    strncpy(output, "\\t", len);
+  } else {
+    snprintf(output, len, "\\x%.2x", chr);
+  }
+
+  output[len - 1] = '\0';
+}
+
+/* Sets the error message in case of a transition error.
+ *
+ * Called from statemachine_parse to set the error message in case of a
+ * transition error.
+ */
+static void statemachine_set_transition_error_message(statemachine_ctx *ctx)
+{
+  char encoded_char[10];
+  statemachine_encode_char(ctx->current_char, encoded_char,
+                           sizeof(encoded_char));
+
+  if (ctx->definition->state_names) {
+    snprintf(ctx->error_msg, STATEMACHINE_MAX_STR_ERROR,
+             "Unexpected character '%s' in state '%s'",
+             encoded_char,
+             ctx->definition->state_names[ctx->current_state]);
+  } else {
+    snprintf(ctx->error_msg, STATEMACHINE_MAX_STR_ERROR,
+             "Unexpected character '%s'", encoded_char);
+  }
+
+}
+
+/* Parses the input html stream and returns the finishing state.
+ *
+ * Returns STATEMACHINE_ERROR if unable to parse the input. If
+ * statemachine_parse() is called after an error situation was encountered
+ * the behaviour is unspecified.
+ */
+/* TODO(falmeida): change int size to size_t size */
+int statemachine_parse(statemachine_ctx *ctx, const char *str, int size)
+{
+    int i;
+    const int* const* state_table = ctx->definition->transition_table;
+    statemachine_definition *def;
+
+    assert(ctx !=NULL &&
+           ctx->definition != NULL &&
+           ctx->definition->transition_table != NULL);
+
+    if (size < 0) {
+        snprintf(ctx->error_msg, STATEMACHINE_MAX_STR_ERROR, "%s",
+                 "Negative size in statemachine_parse().");
+        return STATEMACHINE_ERROR;
+    }
+
+    def = ctx->definition;
+
+    for (i = 0; i < size; i++) {
+        ctx->current_char = *str;
+        ctx->next_state =
+            state_table[ctx->current_state][CAST(unsigned char, *str)];
+        if (ctx->next_state == STATEMACHINE_ERROR) {
+            statemachine_set_transition_error_message(ctx);
+            return STATEMACHINE_ERROR;
+        }
+
+        if (ctx->current_state != ctx->next_state) {
+            if (def->exit_state_events[ctx->current_state])
+                def->exit_state_events[ctx->current_state](ctx,
+                                                           ctx->current_state,
+                                                           *str,
+                                                           ctx->next_state);
+        }
+        if (ctx->current_state != ctx->next_state) {
+            if (def->enter_state_events[ctx->next_state])
+                def->enter_state_events[ctx->next_state](ctx,
+                                                         ctx->current_state,
+                                                         *str,
+                                                         ctx->next_state);
+        }
+
+        if (def->in_state_events[ctx->next_state])
+            def->in_state_events[ctx->next_state](ctx,
+                                                  ctx->current_state,
+                                                  *str,
+                                                  ctx->next_state);
+
+        /* We need two bytes left so we can NULL terminate the string. */
+        if (ctx->recording &&
+            STATEMACHINE_RECORD_BUFFER_SIZE - 1 > ctx->record_pos) {
+            ctx->record_buffer[ctx->record_pos++] = *str;
+            ctx->record_buffer[ctx->record_pos] = '\0';
+        }
+
+/* TODO(falmeida): Should clarify the contract here, since an event can change
+ * ctx->next_state and we need this functionality */
+
+        ctx->current_state = ctx->next_state;
+        ctx->column_number++;
+
+        if (*str == '\n') {
+          ctx->line_number++;
+          ctx->column_number = 1;
+        }
+        str++;
+    }
+
+    return ctx->current_state;
+}
+
+#ifdef __cplusplus
+}  /* namespace security_streamhtmlparser */
+#endif