blob: 2784604897d4df7431d2336bddac715f6cea706f [file] [log] [blame]
Brian Silverman8ab8a652015-09-21 17:49:11 -04001/* Copyright (c) 2007, Google Inc.
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 * ---
30 *
31 * Author: falmeida@google.com (Filipe Almeida)
32 */
33
34#ifndef SECURITY_STREAMHTMLPARSER_STATEMACHINE_H
35#define SECURITY_STREAMHTMLPARSER_STATEMACHINE_H
36
37#include <config.h>
38#ifdef __cplusplus
39namespace ctemplate_htmlparser {
40#endif
41
42/* TODO(falmeida): I'm not sure about these limits, but since right now we only
43 * have 24 states it should be fine */
44
45enum {
46 STATEMACHINE_ERROR = 127
47};
48
49#define STATEMACHINE_RECORD_BUFFER_SIZE 256
50
51#define STATEMACHINE_MAX_STR_ERROR 80
52
53struct statemachine_ctx_s;
54
55typedef void(*state_event_function)(struct statemachine_ctx_s *, int, char,
56 int);
57
58typedef struct statemachine_definition_s {
59 int num_states;
60 const int* const* transition_table;
61
62 /* Array containing the name of the states as a C string.
63 * This field is optional and if not in use it should be set to NULL.
64 */
65 const char* const* state_names;
66 state_event_function *in_state_events;
67 state_event_function *enter_state_events;
68 state_event_function *exit_state_events;
69} statemachine_definition;
70
71typedef struct statemachine_ctx_s {
72 int current_state;
73 int next_state;
74 statemachine_definition *definition;
75 char current_char;
76
77 /* Current line number. */
78 int line_number;
79
80 /* Current column number. */
81 int column_number;
82 char record_buffer[STATEMACHINE_RECORD_BUFFER_SIZE];
83 size_t record_pos;
84
85 /* True if we are recording the stream to record_buffer. */
86 int recording;
87
88 /* In case there was an error (we are in state STATEMACHINE_ERROR), it will
89 * contain a human readable description of the error.
90 */
91 char error_msg[STATEMACHINE_MAX_STR_ERROR];
92
93 /* Storage space for the layer above. */
94 void *user;
95} statemachine_ctx;
96
97/* Populates the statemachine definition.
98 *
99 * Receives a transition table and an optional array of state names. It uses
100 * this data to populate the state machine definition.
101 *
102 * The transition table structure is a list of lists of ints (int **). The
103 * outer list indexes the source state and the inner list contains the
104 * destination state for each of the possible input characters:
105 *
106 * const int* const* transitions[source][input] == destination.
107 *
108 * The optional argument state_names points to a list of strings containing
109 * human readable state names. These strings are used when reporting error
110 * messages.
111 */
112void statemachine_definition_populate(statemachine_definition *def,
113 const int* const* transition_table,
114 const char* const* state_names);
115
116void statemachine_in_state(statemachine_definition *def, int st,
117 state_event_function func);
118void statemachine_enter_state(statemachine_definition *def, int st,
119 state_event_function func);
120void statemachine_exit_state(statemachine_definition *def, int st,
121 state_event_function func);
122
123statemachine_definition *statemachine_definition_new(int states);
124void statemachine_definition_delete(statemachine_definition *def);
125
126int statemachine_get_state(statemachine_ctx *ctx);
127void statemachine_set_state(statemachine_ctx *ctx, int state);
128
129void statemachine_start_record(statemachine_ctx *ctx);
130const char *statemachine_stop_record(statemachine_ctx *ctx);
131const char *statemachine_record_buffer(statemachine_ctx *ctx);
132
133/* Returns the the number of characters currently stored in the record buffer.
134 */
135static inline size_t statemachine_record_length(statemachine_ctx *ctx) {
136 return ctx->record_pos + 1;
137}
138
139/* Return the current line number. */
140static inline int statemachine_get_line_number(statemachine_ctx *ctx) {
141 return ctx->line_number;
142}
143
144/* Set the current line number. */
145static inline void statemachine_set_line_number(statemachine_ctx *ctx,
146 int line) {
147 ctx->line_number = line;
148}
149
150/* Return the current column number. */
151static inline int statemachine_get_column_number(statemachine_ctx *ctx) {
152 return ctx->column_number;
153}
154
155/* Set the current column number. */
156static inline void statemachine_set_column_number(statemachine_ctx *ctx,
157 int column) {
158 ctx->column_number = column;
159}
160
161
162/* Retrieve a human readable error message in case an error occurred.
163 *
164 * NULL is returned if the parser didn't encounter an error.
165 */
166static inline const char *statemachine_get_error_msg(statemachine_ctx *ctx) {
167 if (ctx->next_state == STATEMACHINE_ERROR) {
168 return ctx->error_msg;
169 } else {
170 return NULL;
171 }
172}
173
174/* Reset the statemachine.
175 *
176 * The state is set to the initialization values. This includes setting the
177 * state to the default state (0), stopping recording and setting the line
178 * number to 1.
179 */
180void statemachine_reset(statemachine_ctx *ctx);
181
182/* Initializes a new statemachine. Receives a statemachine definition object
183 * that should have been initialized with statemachine_definition_new() and a
184 * user reference to be used by the caller.
185 *
186 * Returns NULL if initialization fails.
187 *
188 * Initialization failure is fatal, and if this function fails it may not
189 * deallocate all previsouly allocated memory.
190 */
191statemachine_ctx *statemachine_new(statemachine_definition *def,
192 void *user);
193
194/* Returns a pointer to a context which is a duplicate of the statemachine src.
195 * The statemachine definition and the user pointer have to be provided since
196 * these references are not owned by the statemachine itself.
197 */
198statemachine_ctx *statemachine_duplicate(statemachine_ctx *ctx,
199 statemachine_definition *def,
200 void *user);
201
202/* Copies the context of the statemachine pointed to by src to the statemachine
203 * provided by dst.
204 * The statemachine definition and the user pointer have to be provided since
205 * these references are not owned by the statemachine itself.
206 */
207void statemachine_copy(statemachine_ctx *dst,
208 statemachine_ctx *src,
209 statemachine_definition *def,
210 void *user);
211
212int statemachine_parse(statemachine_ctx *ctx, const char *str, int size);
213
214void statemachine_delete(statemachine_ctx *ctx);
215
216
217/*****
218 * The following functions are only exported for testing purposes and should
219 * be treated as private. */
220
221
222/* Encode the character as an escaped C string.
223 *
224 * Encode the character chr into the string output. Writes at most len
225 * characters to the output string but makes sure output is NULL terminated.
226 */
227void statemachine_encode_char(char chr, char *output, size_t len);
228
229
230#ifdef __cplusplus
231} /* namespace security_streamhtmlparser */
232#endif
233
234#endif /* SECURITY_STREAMHTMLPARSER_STATEMACHINE_H */