blob: 1983fad6ec4444358ff749c060f523546842ea00 [file] [log] [blame]
/**
* @file regex.c Implements basic regular expressions
*
* Copyright (C) 2010 Creytiv.com
*/
#include <ctype.h>
#include <re_types.h>
#include <re_fmt.h>
/** Defines a character range */
struct chr {
uint8_t min; /**< Minimum value */
uint8_t max; /**< Maximum value */
};
static bool expr_match(const struct chr *chrv, uint32_t n, uint8_t c,
bool neg)
{
uint32_t i;
for (i=0; i<n; i++) {
if (c < chrv[i].min)
continue;
if (c > chrv[i].max)
continue;
break;
}
return neg ? (i == n) : (i != n);
}
/**
* Parse a string using basic regular expressions. Any number of matching
* expressions can be given, and each match will be stored in a "struct pl"
* pointer-length type.
*
* @param ptr String to parse
* @param len Length of string
* @param expr Regular expressions string
*
* @return 0 if success, otherwise errorcode
*
* Example:
*
* We parse the buffer for any numerical values, to get a match we must have
* 1 or more occurences of the digits 0-9. The result is stored in 'num',
* which is of pointer-length type and will point to the first location in
* the buffer that contains "42".
*
* <pre>
const char buf[] = "foo 42 bar";
struct pl num;
int err = re_regex(buf, strlen(buf), "[0-9]+", &num);
here num contains a pointer to '42'
* </pre>
*/
int re_regex(const char *ptr, size_t len, const char *expr, ...)
{
struct chr chrv[64];
const char *p, *ep;
bool fm, range = false, ec = false, neg = false, qesc = false;
uint32_t n = 0;
va_list ap;
bool eesc;
size_t l;
if (!ptr || !expr)
return EINVAL;
again:
eesc = false;
fm = false;
l = len--;
p = ptr++;
ep = expr;
va_start(ap, expr);
if (!l)
goto out;
for (; *ep; ep++) {
if ('\\' == *ep && !eesc) {
eesc = true;
continue;
}
if (!fm) {
/* Start of character class */
if ('[' == *ep && !eesc) {
n = 0;
fm = true;
ec = false;
neg = false;
range = false;
qesc = false;
continue;
}
if (!l)
break;
if (tolower(*ep) != tolower(*p)) {
va_end(ap);
goto again;
}
eesc = false;
++p;
--l;
continue;
}
/* End of character class */
else if (ec) {
uint32_t nm, nmin, nmax;
struct pl lpl, *pl = va_arg(ap, struct pl *);
bool quote = false, esc = false;
/* Match 0 or more times */
if ('*' == *ep) {
nmin = 0;
nmax = -1;
}
/* Match 1 or more times */
else if ('+' == *ep) {
nmin = 1;
nmax = -1;
}
/* Match exactly n times */
else if ('1' <= *ep && *ep <= '9') {
nmin = *ep - '0';
nmax = *ep - '0';
}
else
break;
fm = false;
lpl.p = p;
lpl.l = 0;
for (nm = 0; l && nm < nmax; nm++, p++, l--, lpl.l++) {
if (qesc) {
if (esc) {
esc = false;
continue;
}
switch (*p) {
case '\\':
esc = true;
continue;
case '"':
quote = !quote;
continue;
}
if (quote)
continue;
}
if (!expr_match(chrv, n, tolower(*p), neg))
break;
}
/* Strip quotes */
if (qesc && lpl.l > 1 &&
lpl.p[0] == '"' && lpl.p[lpl.l - 1] == '"') {
lpl.p += 1;
lpl.l -= 2;
nm -= 2;
}
if ((nm < nmin) || (nm > nmax)) {
va_end(ap);
goto again;
}
if (pl)
*pl = lpl;
eesc = false;
continue;
}
if (eesc) {
eesc = false;
goto chr;
}
switch (*ep) {
/* End of character class */
case ']':
ec = true;
continue;
/* Negate with quote escape */
case '~':
if (n)
break;
qesc = true;
neg = true;
continue;
/* Negate */
case '^':
if (n)
break;
neg = true;
continue;
/* Range */
case '-':
if (!n || range)
break;
range = true;
--n;
continue;
}
chr:
chrv[n].max = tolower(*ep);
if (range)
range = false;
else
chrv[n].min = tolower(*ep);
if (++n > ARRAY_SIZE(chrv))
break;
}
out:
va_end(ap);
if (fm)
return EINVAL;
return *ep ? ENOENT : 0;
}