optimizations and fixes for friday
This commit is contained in:
parent
29a99ffa9d
commit
44f7981013
1 changed files with 36 additions and 34 deletions
70
regex.c
70
regex.c
|
@ -7,11 +7,13 @@
|
||||||
|
|
||||||
// Checks if the first char in needle matches on c
|
// Checks if the first char in needle matches on c
|
||||||
// needle may begin with a plain character or a character class
|
// needle may begin with a plain character or a character class
|
||||||
bool char_match(char c, char *needle)
|
bool char_match(char c, char *needle, char **nneedl)
|
||||||
{
|
{
|
||||||
// depth of brackets
|
// depth of brackets
|
||||||
int db;
|
int db;
|
||||||
bool neg;
|
bool neg;
|
||||||
|
bool m;
|
||||||
|
m = false;
|
||||||
if (*needle == '[')
|
if (*needle == '[')
|
||||||
{
|
{
|
||||||
needle++;
|
needle++;
|
||||||
|
@ -21,7 +23,7 @@ bool char_match(char c, char *needle)
|
||||||
for (db = 1; *needle && (*needle != ']' || db); needle++)
|
for (db = 1; *needle && (*needle != ']' || db); needle++)
|
||||||
{
|
{
|
||||||
if (db > 1 && ((*needle == c) != neg))
|
if (db > 1 && ((*needle == c) != neg))
|
||||||
return true;
|
m = true;
|
||||||
else if (db > 1)
|
else if (db > 1)
|
||||||
db--;
|
db--;
|
||||||
else if (*needle == '\\')
|
else if (*needle == '\\')
|
||||||
|
@ -29,15 +31,18 @@ bool char_match(char c, char *needle)
|
||||||
else if (*needle == ']')
|
else if (*needle == ']')
|
||||||
db--;
|
db--;
|
||||||
else if ((*needle == c) != neg)
|
else if ((*needle == c) != neg)
|
||||||
return true;
|
m = true;
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
if (*needle == '$')
|
else if (*needle == '$')
|
||||||
return c == 0;
|
m = (c == 0);
|
||||||
else if (*needle == '.')
|
else if (*needle == '.')
|
||||||
return true;
|
m = true;
|
||||||
return *needle == c;
|
else
|
||||||
|
m = (*needle == c);
|
||||||
|
if (*needle && nneedl)
|
||||||
|
*nneedl = needle + 1;
|
||||||
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find next variant, or closing parenthesis of open group, or end of string
|
// Find next variant, or closing parenthesis of open group, or end of string
|
||||||
|
@ -100,6 +105,8 @@ char *end_of_group(char *needle)
|
||||||
// Returns opening parenthesis, or needle0
|
// Returns opening parenthesis, or needle0
|
||||||
char *start_of_group(char *needle0, char *needle)
|
char *start_of_group(char *needle0, char *needle)
|
||||||
{
|
{
|
||||||
|
if (*needle == ')' && needle0 < needle)
|
||||||
|
needle--;
|
||||||
for (int dp = 0, db = 0;
|
for (int dp = 0, db = 0;
|
||||||
needle > needle0 && (*needle != '(' || dp > 0 || db > 0 || *needle == ')');
|
needle > needle0 && (*needle != '(' || dp > 0 || db > 0 || *needle == ')');
|
||||||
needle--)
|
needle--)
|
||||||
|
@ -118,20 +125,14 @@ char *start_of_group(char *needle0, char *needle)
|
||||||
return needle;
|
return needle;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine quantifier for group, character class, or character
|
char quant(char c)
|
||||||
// Returns quantifier character, or 0 if absent.
|
|
||||||
char quant(char *needle)
|
|
||||||
{
|
{
|
||||||
if (*needle == '(')
|
switch (c)
|
||||||
needle = end_of_group(needle + 1);
|
|
||||||
else
|
|
||||||
needle = next_char(needle);
|
|
||||||
switch (*needle)
|
|
||||||
{
|
{
|
||||||
case '*':
|
case '*':
|
||||||
case '+':
|
case '+':
|
||||||
case '?':
|
case '?':
|
||||||
return *needle;
|
return c;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -166,7 +167,7 @@ bool regex_match(char *haystack, char *needle)
|
||||||
}
|
}
|
||||||
|
|
||||||
// iterate over the haystack until accepting
|
// iterate over the haystack until accepting
|
||||||
for (; !bv[accept] && !na; haystack++) {
|
do {
|
||||||
// handle groups, variants, and quantifier
|
// handle groups, variants, and quantifier
|
||||||
for (size_t st = 0; st < states - 1; st++)
|
for (size_t st = 0; st < states - 1; st++)
|
||||||
{
|
{
|
||||||
|
@ -186,7 +187,7 @@ bool regex_match(char *haystack, char *needle)
|
||||||
{
|
{
|
||||||
// at end of group, look for quantifier
|
// at end of group, look for quantifier
|
||||||
bv[st] = false;
|
bv[st] = false;
|
||||||
q = quant(needle + st);
|
q = quant(needle[st+1]);
|
||||||
// first, activate subsequent state after group
|
// first, activate subsequent state after group
|
||||||
bv[q ? st + 2 : st + 1] = true;
|
bv[q ? st + 2 : st + 1] = true;
|
||||||
// then, handle quantifiers
|
// then, handle quantifiers
|
||||||
|
@ -205,23 +206,23 @@ bool regex_match(char *haystack, char *needle)
|
||||||
{
|
{
|
||||||
// at start of group, look at variants and quantifier
|
// at start of group, look at variants and quantifier
|
||||||
bv[st] = false;
|
bv[st] = false;
|
||||||
for (var = needle + 1; *var && *var != ')'; var = next_var(var))
|
for (var = needle + st + 1; *var && *var != ')'; var = next_var(var))
|
||||||
bv[var - needle] = true;
|
bv[var - needle] = true;
|
||||||
q = quant(var);
|
|
||||||
if (*var == ')')
|
if (*var == ')')
|
||||||
var++;
|
var++;
|
||||||
|
q = quant(*var);
|
||||||
if (q == '*' || q == '?')
|
if (q == '*' || q == '?')
|
||||||
bv[var + 1 - needle] = true;
|
bv[var + 1 - needle] = true;
|
||||||
}
|
}
|
||||||
else if ((q = quant(needle + st)) && (q == '*' || q == '?'))
|
else
|
||||||
{
|
{
|
||||||
// current needle has quantifier
|
// check if current needle is optional
|
||||||
bv[next_char(needle + st) + 1 - needle] = true;
|
nneedl = next_char(needle + st);
|
||||||
|
q = *nneedl;
|
||||||
|
if (q == '*' || q == '?')
|
||||||
|
bv[nneedl + 1 - needle] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// check if we are only tidying up
|
|
||||||
if (!*haystack)
|
|
||||||
break;
|
|
||||||
memcpy(bvtmp, bv, sizeof(bv));
|
memcpy(bvtmp, bv, sizeof(bv));
|
||||||
// actual match, backwards
|
// actual match, backwards
|
||||||
na = true;
|
na = true;
|
||||||
|
@ -233,22 +234,23 @@ bool regex_match(char *haystack, char *needle)
|
||||||
// perform match for active state
|
// perform match for active state
|
||||||
fprintf(stderr, "state %lu: \"%s\" ~ \"%s\"\n", st-1, haystack, needle + st - 1);
|
fprintf(stderr, "state %lu: \"%s\" ~ \"%s\"\n", st-1, haystack, needle + st - 1);
|
||||||
bvtmp[st-1] = false;
|
bvtmp[st-1] = false;
|
||||||
char q = quant(needle + st-1);
|
m = char_match(*haystack, needle + st-1, &nneedl);
|
||||||
q = quant(needle + st-1);
|
q = quant(*nneedl);
|
||||||
m = char_match(*haystack, needle + st-1);
|
|
||||||
if (m || q == '*' || q == '?')
|
if (m || q == '*' || q == '?')
|
||||||
{
|
{
|
||||||
if (q == '*' || q == '+')
|
|
||||||
bvtmp[st-1] = true;
|
|
||||||
nneedl = next_char(needle + st-1);
|
|
||||||
if (q)
|
if (q)
|
||||||
nneedl++;
|
nneedl++;
|
||||||
|
if (q == '*' || q == '+')
|
||||||
|
bvtmp[st-1] = true;
|
||||||
bvtmp[nneedl - needle] = true;
|
bvtmp[nneedl - needle] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
memcpy(bv, bvtmp, sizeof(bv));
|
memcpy(bv, bvtmp, sizeof(bv));
|
||||||
putc('\n', stderr);
|
putc('\n', stderr);
|
||||||
}
|
// check if we have reached the end of haystack, and advance
|
||||||
|
if (!*(haystack++))
|
||||||
|
break;
|
||||||
|
} while (!bv[accept] && !na);
|
||||||
|
|
||||||
return bv[accept];
|
return bv[accept];
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue