blob: 1983fad6ec4444358ff749c060f523546842ea00 [file] [log] [blame]
James Kuszmaul82f6c042021-01-17 11:30:16 -08001/**
2 * @file regex.c Implements basic regular expressions
3 *
4 * Copyright (C) 2010 Creytiv.com
5 */
6#include <ctype.h>
7#include <re_types.h>
8#include <re_fmt.h>
9
10
11/** Defines a character range */
12struct chr {
13 uint8_t min; /**< Minimum value */
14 uint8_t max; /**< Maximum value */
15};
16
17
18static bool expr_match(const struct chr *chrv, uint32_t n, uint8_t c,
19 bool neg)
20{
21 uint32_t i;
22
23 for (i=0; i<n; i++) {
24
25 if (c < chrv[i].min)
26 continue;
27
28 if (c > chrv[i].max)
29 continue;
30
31 break;
32 }
33
34 return neg ? (i == n) : (i != n);
35}
36
37
38/**
39 * Parse a string using basic regular expressions. Any number of matching
40 * expressions can be given, and each match will be stored in a "struct pl"
41 * pointer-length type.
42 *
43 * @param ptr String to parse
44 * @param len Length of string
45 * @param expr Regular expressions string
46 *
47 * @return 0 if success, otherwise errorcode
48 *
49 * Example:
50 *
51 * We parse the buffer for any numerical values, to get a match we must have
52 * 1 or more occurences of the digits 0-9. The result is stored in 'num',
53 * which is of pointer-length type and will point to the first location in
54 * the buffer that contains "42".
55 *
56 * <pre>
57 const char buf[] = "foo 42 bar";
58 struct pl num;
59 int err = re_regex(buf, strlen(buf), "[0-9]+", &num);
60
61 here num contains a pointer to '42'
62 * </pre>
63 */
64int re_regex(const char *ptr, size_t len, const char *expr, ...)
65{
66 struct chr chrv[64];
67 const char *p, *ep;
68 bool fm, range = false, ec = false, neg = false, qesc = false;
69 uint32_t n = 0;
70 va_list ap;
71 bool eesc;
72 size_t l;
73
74 if (!ptr || !expr)
75 return EINVAL;
76
77 again:
78 eesc = false;
79 fm = false;
80 l = len--;
81 p = ptr++;
82 ep = expr;
83
84 va_start(ap, expr);
85
86 if (!l)
87 goto out;
88
89 for (; *ep; ep++) {
90
91 if ('\\' == *ep && !eesc) {
92 eesc = true;
93 continue;
94 }
95
96 if (!fm) {
97
98 /* Start of character class */
99 if ('[' == *ep && !eesc) {
100 n = 0;
101 fm = true;
102 ec = false;
103 neg = false;
104 range = false;
105 qesc = false;
106 continue;
107 }
108
109 if (!l)
110 break;
111
112 if (tolower(*ep) != tolower(*p)) {
113 va_end(ap);
114 goto again;
115 }
116
117 eesc = false;
118 ++p;
119 --l;
120 continue;
121 }
122 /* End of character class */
123 else if (ec) {
124
125 uint32_t nm, nmin, nmax;
126 struct pl lpl, *pl = va_arg(ap, struct pl *);
127 bool quote = false, esc = false;
128
129 /* Match 0 or more times */
130 if ('*' == *ep) {
131 nmin = 0;
132 nmax = -1;
133 }
134 /* Match 1 or more times */
135 else if ('+' == *ep) {
136 nmin = 1;
137 nmax = -1;
138 }
139 /* Match exactly n times */
140 else if ('1' <= *ep && *ep <= '9') {
141 nmin = *ep - '0';
142 nmax = *ep - '0';
143 }
144 else
145 break;
146
147 fm = false;
148
149 lpl.p = p;
150 lpl.l = 0;
151
152 for (nm = 0; l && nm < nmax; nm++, p++, l--, lpl.l++) {
153
154 if (qesc) {
155
156 if (esc) {
157 esc = false;
158 continue;
159 }
160
161 switch (*p) {
162
163 case '\\':
164 esc = true;
165 continue;
166
167 case '"':
168 quote = !quote;
169 continue;
170 }
171
172 if (quote)
173 continue;
174 }
175
176 if (!expr_match(chrv, n, tolower(*p), neg))
177 break;
178 }
179
180 /* Strip quotes */
181 if (qesc && lpl.l > 1 &&
182 lpl.p[0] == '"' && lpl.p[lpl.l - 1] == '"') {
183
184 lpl.p += 1;
185 lpl.l -= 2;
186 nm -= 2;
187 }
188
189 if ((nm < nmin) || (nm > nmax)) {
190 va_end(ap);
191 goto again;
192 }
193
194 if (pl)
195 *pl = lpl;
196
197 eesc = false;
198 continue;
199 }
200
201 if (eesc) {
202 eesc = false;
203 goto chr;
204 }
205
206 switch (*ep) {
207
208 /* End of character class */
209 case ']':
210 ec = true;
211 continue;
212
213 /* Negate with quote escape */
214 case '~':
215 if (n)
216 break;
217
218 qesc = true;
219 neg = true;
220 continue;
221
222 /* Negate */
223 case '^':
224 if (n)
225 break;
226
227 neg = true;
228 continue;
229
230 /* Range */
231 case '-':
232 if (!n || range)
233 break;
234
235 range = true;
236 --n;
237 continue;
238 }
239
240 chr:
241 chrv[n].max = tolower(*ep);
242
243 if (range)
244 range = false;
245 else
246 chrv[n].min = tolower(*ep);
247
248 if (++n > ARRAY_SIZE(chrv))
249 break;
250 }
251 out:
252 va_end(ap);
253
254 if (fm)
255 return EINVAL;
256
257 return *ep ? ENOENT : 0;
258}