blob: 00fbe261d4bcc13424150049140903cea1e2400e [file] [log] [blame]
/* Copyright (c) 2007, Google Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Google Inc. nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* ---
*
* Author: falmeida@google.com (Filipe Almeida)
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "htmlparser/statemachine.h"
/* So we can support both C and C++ compilers, we use the CAST() macro instead
* of using C style casts or static_cast<>() directly.
*/
#ifdef __cplusplus
#define CAST(type, expression) (static_cast<type>(expression))
#else
#define CAST(type, expression) ((type)(expression))
#endif
#ifdef __cplusplus
namespace ctemplate_htmlparser {
#endif
#define MAX_CHAR_8BIT 256
/* Populates the statemachine definition.
*/
void statemachine_definition_populate(statemachine_definition *def,
const int* const* transition_table,
const char* const* state_names)
{
assert(def != NULL);
assert(transition_table != NULL);
def->transition_table = transition_table;
def->state_names = state_names;
}
/* Add's the callback for the event in_state that is called when the
* statemachine is in state st.
*
* This event is called everytime the the statemachine is in the specified
* state forevery character in the input stream even if the state remains
* the same.
*
* This is event is the last event to be called and is fired after both events
* exit_state and enter_state.
*/
void statemachine_in_state(statemachine_definition *def, int st,
state_event_function func)
{
assert(def != NULL);
assert(st < def->num_states);
def->in_state_events[st] = func;
}
/* Add's the callback for the event enter_state that is called when the
* statemachine enters state st.
*
* This event is fired after the event exit_state but before the event
* in_state.
*/
void statemachine_enter_state(statemachine_definition *def, int st,
state_event_function func)
{
assert(def != NULL);
assert(st < def->num_states);
def->enter_state_events[st] = func;
}
/* Add's the callback for the event exit_state that is called when the
* statemachine exits from state st.
*
* This is the first event to be called and is fired before both the events
* enter_state and in_state.
*/
void statemachine_exit_state(statemachine_definition *def, int st,
state_event_function func)
{
assert(def != NULL);
assert(st < def->num_states);
def->exit_state_events[st] = func;
}
/* Initializes a new statemachine definition with a defined number of states.
*
* Returns NULL if initialization fails.
*
* Initialization failure is fatal, and if this function fails it may not
* deallocate all previsouly allocated memory.
*/
statemachine_definition *statemachine_definition_new(int states)
{
statemachine_definition *def;
def = CAST(statemachine_definition *,
malloc(sizeof(statemachine_definition)));
if (def == NULL)
return NULL;
def->in_state_events = CAST(state_event_function *,
calloc(states, sizeof(state_event_function)));
if (def->in_state_events == NULL)
return NULL;
def->enter_state_events =CAST(state_event_function *,
calloc(states,
sizeof(state_event_function)));
if (def->enter_state_events == NULL)
return NULL;
def->exit_state_events = CAST(state_event_function *,
calloc(states, sizeof(state_event_function)));
if (def->exit_state_events == NULL)
return NULL;
def->num_states = states;
def->state_names = NULL;
return def;
}
/* Deallocates a statemachine definition object
*/
void statemachine_definition_delete(statemachine_definition *def)
{
assert(def != NULL);
free(def->in_state_events);
free(def->enter_state_events);
free(def->exit_state_events);
free(def);
}
/* Returns the current state.
*/
int statemachine_get_state(statemachine_ctx *ctx) {
return ctx->current_state;
}
/* Sets the current state.
*
* It calls the exit event for the old state and the enter event for the state
* we intend to move into.
*
* Since this state change was not initiated by a character in the input stream
* we pass a null char to the event functions.
*/
void statemachine_set_state(statemachine_ctx *ctx, int state)
{
statemachine_definition *def;
assert(ctx != NULL);
assert(ctx->definition != NULL);
def = ctx->definition;
assert(state < def->num_states);
ctx->next_state = state;
if (ctx->current_state != ctx->next_state) {
if (def->exit_state_events[ctx->current_state])
def->exit_state_events[ctx->current_state](ctx,
ctx->current_state,
'\0',
ctx->next_state);
if (def->enter_state_events[ctx->next_state])
def->enter_state_events[ctx->next_state](ctx,
ctx->current_state,
'\0',
ctx->next_state);
}
ctx->current_state = state;
}
/* Reset the statemachine.
*
* The state is set to the initialization values. This includes setting the
* state to the default state (0), stopping recording and setting the line
* number to 1.
*/
void statemachine_reset(statemachine_ctx *ctx)
{
ctx->current_state = 0;
ctx->next_state = 0;
ctx->record_buffer[0] = '\0';
ctx->record_pos = 0;
ctx->recording = 0;
ctx->line_number = 1;
ctx->column_number = 1;
}
/* Initializes a new statemachine. Receives a statemachine definition object
* that should have been initialized with statemachine_definition_new() and a
* user reference to be used by the caller.
*
* The user reference is used by the caller to store any instance specific data
* the caller may need and is typically used to propagate context information
* to the event callbacks. The user pointer can just be set to NULL if the
* caller doesn't need it.
*
* Returns NULL if initialization fails.
*
* Initialization failure is fatal, and if this function fails it may not
* deallocate all previously allocated memory.
*/
statemachine_ctx *statemachine_new(statemachine_definition *def,
void *user)
{
statemachine_ctx *ctx;
assert(def != NULL);
ctx = CAST(statemachine_ctx *, malloc(sizeof(statemachine_ctx)));
if (ctx == NULL)
return NULL;
statemachine_reset(ctx);
ctx->definition = def;
ctx->user = user;
return ctx;
}
/* Returns a pointer to a context which is a duplicate of the statemachine src.
* The statemachine definition and the user pointer have to be provided since
* these references are not owned by the statemachine itself, but this will be
* shallow copies as they point to data structures we do not own.
*/
statemachine_ctx *statemachine_duplicate(statemachine_ctx *src,
statemachine_definition *def,
void *user)
{
statemachine_ctx *dst;
assert(src != NULL);
dst = statemachine_new(def, user);
if (dst == NULL)
return NULL;
statemachine_copy(dst, src, def, user);
return dst;
}
/* Copies the context of the statemachine pointed to by src to the statemachine
* provided by dst.
* The statemachine definition and the user pointer have to be provided since
* these references are not owned by the statemachine itself.
*/
void statemachine_copy(statemachine_ctx *dst,
statemachine_ctx *src,
statemachine_definition *def,
void *user)
{
memcpy(dst, src, sizeof(statemachine_ctx));
dst->definition = def;
dst->user = user;
}
/* Deallocates a statemachine object
*/
void statemachine_delete(statemachine_ctx *ctx)
{
assert(ctx != NULL);
free(ctx);
}
/* Starts recording the current input stream into an internal buffer.
* The current input character is included in the recording.
*/
void statemachine_start_record(statemachine_ctx *ctx)
{
assert(ctx != NULL);
ctx->record_buffer[0] = '\0';
ctx->record_pos = 0;
ctx->recording = 1;
}
/* Stops recording the current input stream.
* The last input character is not included in the recording.
* This function returns a pointer to the recorded string buffer.
*/
const char *statemachine_stop_record(statemachine_ctx *ctx)
{
assert(ctx != NULL);
assert(ctx->recording);
ctx->record_buffer[ctx->record_pos] = '\0';
ctx->recording = 0;
return ctx->record_buffer;
}
/* Returns a pointer to the record string buffer.
*/
const char *statemachine_record_buffer(statemachine_ctx *ctx)
{
return ctx->record_buffer;
}
void statemachine_encode_char(char schr, char *output, size_t len)
{
unsigned char chr = schr;
if (chr == '\'') {
strncpy(output, "\\'", len);
} else if (chr == '\\') {
strncpy(output, "\\\\", len);
/* Like isprint() but not dependent on locale. */
} else if (chr >= 32 && chr <= 126) {
snprintf(output, len, "%c", chr);
} else if (chr == '\n') {
strncpy(output, "\\n", len);
} else if (chr == '\r') {
strncpy(output, "\\r", len);
} else if (chr == '\t') {
strncpy(output, "\\t", len);
} else {
snprintf(output, len, "\\x%.2x", chr);
}
output[len - 1] = '\0';
}
/* Sets the error message in case of a transition error.
*
* Called from statemachine_parse to set the error message in case of a
* transition error.
*/
static void statemachine_set_transition_error_message(statemachine_ctx *ctx)
{
char encoded_char[10];
statemachine_encode_char(ctx->current_char, encoded_char,
sizeof(encoded_char));
if (ctx->definition->state_names) {
snprintf(ctx->error_msg, STATEMACHINE_MAX_STR_ERROR,
"Unexpected character '%s' in state '%s'",
encoded_char,
ctx->definition->state_names[ctx->current_state]);
} else {
snprintf(ctx->error_msg, STATEMACHINE_MAX_STR_ERROR,
"Unexpected character '%s'", encoded_char);
}
}
/* Parses the input html stream and returns the finishing state.
*
* Returns STATEMACHINE_ERROR if unable to parse the input. If
* statemachine_parse() is called after an error situation was encountered
* the behaviour is unspecified.
*/
/* TODO(falmeida): change int size to size_t size */
int statemachine_parse(statemachine_ctx *ctx, const char *str, int size)
{
int i;
const int* const* state_table = ctx->definition->transition_table;
statemachine_definition *def;
assert(ctx !=NULL &&
ctx->definition != NULL &&
ctx->definition->transition_table != NULL);
if (size < 0) {
snprintf(ctx->error_msg, STATEMACHINE_MAX_STR_ERROR, "%s",
"Negative size in statemachine_parse().");
return STATEMACHINE_ERROR;
}
def = ctx->definition;
for (i = 0; i < size; i++) {
ctx->current_char = *str;
ctx->next_state =
state_table[ctx->current_state][CAST(unsigned char, *str)];
if (ctx->next_state == STATEMACHINE_ERROR) {
statemachine_set_transition_error_message(ctx);
return STATEMACHINE_ERROR;
}
if (ctx->current_state != ctx->next_state) {
if (def->exit_state_events[ctx->current_state])
def->exit_state_events[ctx->current_state](ctx,
ctx->current_state,
*str,
ctx->next_state);
}
if (ctx->current_state != ctx->next_state) {
if (def->enter_state_events[ctx->next_state])
def->enter_state_events[ctx->next_state](ctx,
ctx->current_state,
*str,
ctx->next_state);
}
if (def->in_state_events[ctx->next_state])
def->in_state_events[ctx->next_state](ctx,
ctx->current_state,
*str,
ctx->next_state);
/* We need two bytes left so we can NULL terminate the string. */
if (ctx->recording &&
STATEMACHINE_RECORD_BUFFER_SIZE - 1 > ctx->record_pos) {
ctx->record_buffer[ctx->record_pos++] = *str;
ctx->record_buffer[ctx->record_pos] = '\0';
}
/* TODO(falmeida): Should clarify the contract here, since an event can change
* ctx->next_state and we need this functionality */
ctx->current_state = ctx->next_state;
ctx->column_number++;
if (*str == '\n') {
ctx->line_number++;
ctx->column_number = 1;
}
str++;
}
return ctx->current_state;
}
#ifdef __cplusplus
} /* namespace security_streamhtmlparser */
#endif