blob: 00fbe261d4bcc13424150049140903cea1e2400e [file] [log] [blame]
Brian Silverman8ab8a652015-09-21 17:49:11 -04001/* Copyright (c) 2007, Google Inc.
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 * ---
30 *
31 * Author: falmeida@google.com (Filipe Almeida)
32 */
33
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#include <assert.h>
38
39#include "htmlparser/statemachine.h"
40
41/* So we can support both C and C++ compilers, we use the CAST() macro instead
42 * of using C style casts or static_cast<>() directly.
43 */
44#ifdef __cplusplus
45 #define CAST(type, expression) (static_cast<type>(expression))
46#else
47 #define CAST(type, expression) ((type)(expression))
48#endif
49
50#ifdef __cplusplus
51namespace ctemplate_htmlparser {
52#endif
53
54#define MAX_CHAR_8BIT 256
55
56/* Populates the statemachine definition.
57 */
58void statemachine_definition_populate(statemachine_definition *def,
59 const int* const* transition_table,
60 const char* const* state_names)
61{
62 assert(def != NULL);
63 assert(transition_table != NULL);
64
65 def->transition_table = transition_table;
66
67 def->state_names = state_names;
68}
69
70/* Add's the callback for the event in_state that is called when the
71 * statemachine is in state st.
72 *
73 * This event is called everytime the the statemachine is in the specified
74 * state forevery character in the input stream even if the state remains
75 * the same.
76 *
77 * This is event is the last event to be called and is fired after both events
78 * exit_state and enter_state.
79 */
80void statemachine_in_state(statemachine_definition *def, int st,
81 state_event_function func)
82{
83 assert(def != NULL);
84 assert(st < def->num_states);
85 def->in_state_events[st] = func;
86}
87
88/* Add's the callback for the event enter_state that is called when the
89 * statemachine enters state st.
90 *
91 * This event is fired after the event exit_state but before the event
92 * in_state.
93 */
94void statemachine_enter_state(statemachine_definition *def, int st,
95 state_event_function func)
96{
97 assert(def != NULL);
98 assert(st < def->num_states);
99 def->enter_state_events[st] = func;
100}
101
102/* Add's the callback for the event exit_state that is called when the
103 * statemachine exits from state st.
104 *
105 * This is the first event to be called and is fired before both the events
106 * enter_state and in_state.
107 */
108void statemachine_exit_state(statemachine_definition *def, int st,
109 state_event_function func)
110{
111 assert(def != NULL);
112 assert(st < def->num_states);
113 def->exit_state_events[st] = func;
114}
115
116/* Initializes a new statemachine definition with a defined number of states.
117 *
118 * Returns NULL if initialization fails.
119 *
120 * Initialization failure is fatal, and if this function fails it may not
121 * deallocate all previsouly allocated memory.
122 */
123statemachine_definition *statemachine_definition_new(int states)
124{
125 statemachine_definition *def;
126 def = CAST(statemachine_definition *,
127 malloc(sizeof(statemachine_definition)));
128 if (def == NULL)
129 return NULL;
130
131 def->in_state_events = CAST(state_event_function *,
132 calloc(states, sizeof(state_event_function)));
133 if (def->in_state_events == NULL)
134 return NULL;
135
136 def->enter_state_events =CAST(state_event_function *,
137 calloc(states,
138 sizeof(state_event_function)));
139 if (def->enter_state_events == NULL)
140 return NULL;
141
142 def->exit_state_events = CAST(state_event_function *,
143 calloc(states, sizeof(state_event_function)));
144 if (def->exit_state_events == NULL)
145 return NULL;
146
147 def->num_states = states;
148 def->state_names = NULL;
149 return def;
150}
151
152/* Deallocates a statemachine definition object
153 */
154void statemachine_definition_delete(statemachine_definition *def)
155{
156 assert(def != NULL);
157 free(def->in_state_events);
158 free(def->enter_state_events);
159 free(def->exit_state_events);
160 free(def);
161}
162
163/* Returns the current state.
164 */
165int statemachine_get_state(statemachine_ctx *ctx) {
166 return ctx->current_state;
167}
168
169/* Sets the current state.
170 *
171 * It calls the exit event for the old state and the enter event for the state
172 * we intend to move into.
173 *
174 * Since this state change was not initiated by a character in the input stream
175 * we pass a null char to the event functions.
176 */
177void statemachine_set_state(statemachine_ctx *ctx, int state)
178{
179
180 statemachine_definition *def;
181
182 assert(ctx != NULL);
183 assert(ctx->definition != NULL);
184
185 def = ctx->definition;
186
187 assert(state < def->num_states);
188
189 ctx->next_state = state;
190
191 if (ctx->current_state != ctx->next_state) {
192 if (def->exit_state_events[ctx->current_state])
193 def->exit_state_events[ctx->current_state](ctx,
194 ctx->current_state,
195 '\0',
196 ctx->next_state);
197
198 if (def->enter_state_events[ctx->next_state])
199 def->enter_state_events[ctx->next_state](ctx,
200 ctx->current_state,
201 '\0',
202 ctx->next_state);
203 }
204
205 ctx->current_state = state;
206}
207
208/* Reset the statemachine.
209 *
210 * The state is set to the initialization values. This includes setting the
211 * state to the default state (0), stopping recording and setting the line
212 * number to 1.
213 */
214void statemachine_reset(statemachine_ctx *ctx)
215{
216 ctx->current_state = 0;
217 ctx->next_state = 0;
218 ctx->record_buffer[0] = '\0';
219 ctx->record_pos = 0;
220 ctx->recording = 0;
221 ctx->line_number = 1;
222 ctx->column_number = 1;
223}
224
225/* Initializes a new statemachine. Receives a statemachine definition object
226 * that should have been initialized with statemachine_definition_new() and a
227 * user reference to be used by the caller.
228 *
229 * The user reference is used by the caller to store any instance specific data
230 * the caller may need and is typically used to propagate context information
231 * to the event callbacks. The user pointer can just be set to NULL if the
232 * caller doesn't need it.
233 *
234 * Returns NULL if initialization fails.
235 *
236 * Initialization failure is fatal, and if this function fails it may not
237 * deallocate all previously allocated memory.
238 */
239statemachine_ctx *statemachine_new(statemachine_definition *def,
240 void *user)
241{
242 statemachine_ctx *ctx;
243 assert(def != NULL);
244 ctx = CAST(statemachine_ctx *, malloc(sizeof(statemachine_ctx)));
245 if (ctx == NULL)
246 return NULL;
247
248 statemachine_reset(ctx);
249
250 ctx->definition = def;
251 ctx->user = user;
252
253 return ctx;
254}
255
256/* Returns a pointer to a context which is a duplicate of the statemachine src.
257 * The statemachine definition and the user pointer have to be provided since
258 * these references are not owned by the statemachine itself, but this will be
259 * shallow copies as they point to data structures we do not own.
260 */
261statemachine_ctx *statemachine_duplicate(statemachine_ctx *src,
262 statemachine_definition *def,
263 void *user)
264{
265 statemachine_ctx *dst;
266 assert(src != NULL);
267 dst = statemachine_new(def, user);
268 if (dst == NULL)
269 return NULL;
270
271 statemachine_copy(dst, src, def, user);
272
273 return dst;
274}
275
276/* Copies the context of the statemachine pointed to by src to the statemachine
277 * provided by dst.
278 * The statemachine definition and the user pointer have to be provided since
279 * these references are not owned by the statemachine itself.
280 */
281void statemachine_copy(statemachine_ctx *dst,
282 statemachine_ctx *src,
283 statemachine_definition *def,
284 void *user)
285{
286 memcpy(dst, src, sizeof(statemachine_ctx));
287 dst->definition = def;
288 dst->user = user;
289}
290
291/* Deallocates a statemachine object
292 */
293void statemachine_delete(statemachine_ctx *ctx)
294{
295 assert(ctx != NULL);
296 free(ctx);
297}
298
299/* Starts recording the current input stream into an internal buffer.
300 * The current input character is included in the recording.
301 */
302void statemachine_start_record(statemachine_ctx *ctx)
303{
304 assert(ctx != NULL);
305 ctx->record_buffer[0] = '\0';
306 ctx->record_pos = 0;
307 ctx->recording = 1;
308}
309
310/* Stops recording the current input stream.
311 * The last input character is not included in the recording.
312 * This function returns a pointer to the recorded string buffer.
313 */
314const char *statemachine_stop_record(statemachine_ctx *ctx)
315{
316 assert(ctx != NULL);
317 assert(ctx->recording);
318 ctx->record_buffer[ctx->record_pos] = '\0';
319 ctx->recording = 0;
320 return ctx->record_buffer;
321}
322
323 /* Returns a pointer to the record string buffer.
324 */
325const char *statemachine_record_buffer(statemachine_ctx *ctx)
326{
327 return ctx->record_buffer;
328}
329
330void statemachine_encode_char(char schr, char *output, size_t len)
331{
332 unsigned char chr = schr;
333 if (chr == '\'') {
334 strncpy(output, "\\'", len);
335 } else if (chr == '\\') {
336 strncpy(output, "\\\\", len);
337
338 /* Like isprint() but not dependent on locale. */
339 } else if (chr >= 32 && chr <= 126) {
340 snprintf(output, len, "%c", chr);
341 } else if (chr == '\n') {
342 strncpy(output, "\\n", len);
343 } else if (chr == '\r') {
344 strncpy(output, "\\r", len);
345 } else if (chr == '\t') {
346 strncpy(output, "\\t", len);
347 } else {
348 snprintf(output, len, "\\x%.2x", chr);
349 }
350
351 output[len - 1] = '\0';
352}
353
354/* Sets the error message in case of a transition error.
355 *
356 * Called from statemachine_parse to set the error message in case of a
357 * transition error.
358 */
359static void statemachine_set_transition_error_message(statemachine_ctx *ctx)
360{
361 char encoded_char[10];
362 statemachine_encode_char(ctx->current_char, encoded_char,
363 sizeof(encoded_char));
364
365 if (ctx->definition->state_names) {
366 snprintf(ctx->error_msg, STATEMACHINE_MAX_STR_ERROR,
367 "Unexpected character '%s' in state '%s'",
368 encoded_char,
369 ctx->definition->state_names[ctx->current_state]);
370 } else {
371 snprintf(ctx->error_msg, STATEMACHINE_MAX_STR_ERROR,
372 "Unexpected character '%s'", encoded_char);
373 }
374
375}
376
377/* Parses the input html stream and returns the finishing state.
378 *
379 * Returns STATEMACHINE_ERROR if unable to parse the input. If
380 * statemachine_parse() is called after an error situation was encountered
381 * the behaviour is unspecified.
382 */
383/* TODO(falmeida): change int size to size_t size */
384int statemachine_parse(statemachine_ctx *ctx, const char *str, int size)
385{
386 int i;
387 const int* const* state_table = ctx->definition->transition_table;
388 statemachine_definition *def;
389
390 assert(ctx !=NULL &&
391 ctx->definition != NULL &&
392 ctx->definition->transition_table != NULL);
393
394 if (size < 0) {
395 snprintf(ctx->error_msg, STATEMACHINE_MAX_STR_ERROR, "%s",
396 "Negative size in statemachine_parse().");
397 return STATEMACHINE_ERROR;
398 }
399
400 def = ctx->definition;
401
402 for (i = 0; i < size; i++) {
403 ctx->current_char = *str;
404 ctx->next_state =
405 state_table[ctx->current_state][CAST(unsigned char, *str)];
406 if (ctx->next_state == STATEMACHINE_ERROR) {
407 statemachine_set_transition_error_message(ctx);
408 return STATEMACHINE_ERROR;
409 }
410
411 if (ctx->current_state != ctx->next_state) {
412 if (def->exit_state_events[ctx->current_state])
413 def->exit_state_events[ctx->current_state](ctx,
414 ctx->current_state,
415 *str,
416 ctx->next_state);
417 }
418 if (ctx->current_state != ctx->next_state) {
419 if (def->enter_state_events[ctx->next_state])
420 def->enter_state_events[ctx->next_state](ctx,
421 ctx->current_state,
422 *str,
423 ctx->next_state);
424 }
425
426 if (def->in_state_events[ctx->next_state])
427 def->in_state_events[ctx->next_state](ctx,
428 ctx->current_state,
429 *str,
430 ctx->next_state);
431
432 /* We need two bytes left so we can NULL terminate the string. */
433 if (ctx->recording &&
434 STATEMACHINE_RECORD_BUFFER_SIZE - 1 > ctx->record_pos) {
435 ctx->record_buffer[ctx->record_pos++] = *str;
436 ctx->record_buffer[ctx->record_pos] = '\0';
437 }
438
439/* TODO(falmeida): Should clarify the contract here, since an event can change
440 * ctx->next_state and we need this functionality */
441
442 ctx->current_state = ctx->next_state;
443 ctx->column_number++;
444
445 if (*str == '\n') {
446 ctx->line_number++;
447 ctx->column_number = 1;
448 }
449 str++;
450 }
451
452 return ctx->current_state;
453}
454
455#ifdef __cplusplus
456} /* namespace security_streamhtmlparser */
457#endif