|
|
@@ -45,9 +45,9 @@ RegularExpression::RegularExpression(const RegularExpression& rxp)
|
|
|
this->program = new char[this->progsize]; // Allocate storage
|
|
|
for (ind = this->progsize; ind-- != 0;) // Copy regular expresion
|
|
|
this->program[ind] = rxp.program[ind];
|
|
|
- this->startp[0] = rxp.startp[0]; // Copy pointers into last
|
|
|
- this->endp[0] = rxp.endp[0]; // Successful "find" operation
|
|
|
- this->regmust = rxp.regmust; // Copy field
|
|
|
+ // Copy pointers into last successful "find" operation
|
|
|
+ this->regmatch = rxp.regmatch;
|
|
|
+ this->regmust = rxp.regmust; // Copy field
|
|
|
if (rxp.regmust != 0) {
|
|
|
char* dum = rxp.program;
|
|
|
ind = 0;
|
|
|
@@ -78,9 +78,9 @@ RegularExpression& RegularExpression::operator=(const RegularExpression& rxp)
|
|
|
this->program = new char[this->progsize]; // Allocate storage
|
|
|
for (ind = this->progsize; ind-- != 0;) // Copy regular expresion
|
|
|
this->program[ind] = rxp.program[ind];
|
|
|
- this->startp[0] = rxp.startp[0]; // Copy pointers into last
|
|
|
- this->endp[0] = rxp.endp[0]; // Successful "find" operation
|
|
|
- this->regmust = rxp.regmust; // Copy field
|
|
|
+ // Copy pointers into last successful "find" operation
|
|
|
+ this->regmatch = rxp.regmatch;
|
|
|
+ this->regmust = rxp.regmust; // Copy field
|
|
|
if (rxp.regmust != 0) {
|
|
|
char* dum = rxp.program;
|
|
|
ind = 0;
|
|
|
@@ -123,8 +123,9 @@ bool RegularExpression::deep_equal(const RegularExpression& rxp) const
|
|
|
while (ind-- != 0) // Else while still characters
|
|
|
if (this->program[ind] != rxp.program[ind]) // If regexp are different
|
|
|
return false; // Return failure
|
|
|
- return (this->startp[0] == rxp.startp[0] && // Else if same start/end ptrs,
|
|
|
- this->endp[0] == rxp.endp[0]); // Return true
|
|
|
+ // Else if same start/end ptrs, return true
|
|
|
+ return (this->regmatch.start() == rxp.regmatch.start() &&
|
|
|
+ this->regmatch.end() == rxp.regmatch.end());
|
|
|
}
|
|
|
|
|
|
// The remaining code in this file is derived from the regular expression code
|
|
|
@@ -276,31 +277,35 @@ const unsigned char MAGIC = 0234;
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
/*
|
|
|
- * Global work variables for compile().
|
|
|
+ * Read only utility variables.
|
|
|
*/
|
|
|
-static const char* regparse; // Input-scan pointer.
|
|
|
-static int regnpar; // () count.
|
|
|
static char regdummy;
|
|
|
-static char* regcode; // Code-emit pointer; ®dummy = don't.
|
|
|
-static long regsize; // Code size.
|
|
|
+static char* const regdummyptr = ®dummy;
|
|
|
|
|
|
/*
|
|
|
- * Forward declarations for compile()'s friends.
|
|
|
+ * Utility class for RegularExpression::compile().
|
|
|
*/
|
|
|
-// #ifndef static
|
|
|
-// #define static static
|
|
|
-// #endif
|
|
|
-static char* reg(int, int*);
|
|
|
-static char* regbranch(int*);
|
|
|
-static char* regpiece(int*);
|
|
|
-static char* regatom(int*);
|
|
|
-static char* regnode(char);
|
|
|
+class RegExpCompile
|
|
|
+{
|
|
|
+public:
|
|
|
+ const char* regparse; // Input-scan pointer.
|
|
|
+ int regnpar; // () count.
|
|
|
+ char* regcode; // Code-emit pointer; regdummyptr = don't.
|
|
|
+ long regsize; // Code size.
|
|
|
+
|
|
|
+ char* reg(int, int*);
|
|
|
+ char* regbranch(int*);
|
|
|
+ char* regpiece(int*);
|
|
|
+ char* regatom(int*);
|
|
|
+ char* regnode(char);
|
|
|
+ void regc(char);
|
|
|
+ void reginsert(char, char*);
|
|
|
+ static void regtail(char*, const char*);
|
|
|
+ static void regoptail(char*, const char*);
|
|
|
+};
|
|
|
+
|
|
|
static const char* regnext(const char*);
|
|
|
static char* regnext(char*);
|
|
|
-static void regc(char);
|
|
|
-static void reginsert(char, char*);
|
|
|
-static void regtail(char*, const char*);
|
|
|
-static void regoptail(char*, const char*);
|
|
|
|
|
|
#ifdef STRCSPN
|
|
|
static int strcspn();
|
|
|
@@ -337,19 +342,20 @@ bool RegularExpression::compile(const char* exp)
|
|
|
}
|
|
|
|
|
|
// First pass: determine size, legality.
|
|
|
- regparse = exp;
|
|
|
- regnpar = 1;
|
|
|
- regsize = 0L;
|
|
|
- regcode = ®dummy;
|
|
|
- regc(static_cast<char>(MAGIC));
|
|
|
- if (!reg(0, &flags)) {
|
|
|
+ RegExpCompile comp;
|
|
|
+ comp.regparse = exp;
|
|
|
+ comp.regnpar = 1;
|
|
|
+ comp.regsize = 0L;
|
|
|
+ comp.regcode = regdummyptr;
|
|
|
+ comp.regc(static_cast<char>(MAGIC));
|
|
|
+ if (!comp.reg(0, &flags)) {
|
|
|
printf("RegularExpression::compile(): Error in compile.\n");
|
|
|
return false;
|
|
|
}
|
|
|
- this->startp[0] = this->endp[0] = this->searchstring = 0;
|
|
|
+ this->regmatch.clear();
|
|
|
|
|
|
// Small enough for pointer-storage convention?
|
|
|
- if (regsize >= 32767L) { // Probably could be 65535L.
|
|
|
+ if (comp.regsize >= 32767L) { // Probably could be 65535L.
|
|
|
// RAISE Error, SYM(RegularExpression), SYM(Expr_Too_Big),
|
|
|
printf("RegularExpression::compile(): Expression too big.\n");
|
|
|
return false;
|
|
|
@@ -360,8 +366,8 @@ bool RegularExpression::compile(const char* exp)
|
|
|
if (this->program != 0)
|
|
|
delete[] this->program;
|
|
|
//#endif
|
|
|
- this->program = new char[regsize];
|
|
|
- this->progsize = static_cast<int>(regsize);
|
|
|
+ this->program = new char[comp.regsize];
|
|
|
+ this->progsize = static_cast<int>(comp.regsize);
|
|
|
|
|
|
if (this->program == 0) {
|
|
|
// RAISE Error, SYM(RegularExpression), SYM(Out_Of_Memory),
|
|
|
@@ -370,11 +376,11 @@ bool RegularExpression::compile(const char* exp)
|
|
|
}
|
|
|
|
|
|
// Second pass: emit code.
|
|
|
- regparse = exp;
|
|
|
- regnpar = 1;
|
|
|
- regcode = this->program;
|
|
|
- regc(static_cast<char>(MAGIC));
|
|
|
- reg(0, &flags);
|
|
|
+ comp.regparse = exp;
|
|
|
+ comp.regnpar = 1;
|
|
|
+ comp.regcode = this->program;
|
|
|
+ comp.regc(static_cast<char>(MAGIC));
|
|
|
+ comp.reg(0, &flags);
|
|
|
|
|
|
// Dig out information for optimizations.
|
|
|
this->regstart = '\0'; // Worst-case defaults.
|
|
|
@@ -423,7 +429,7 @@ bool RegularExpression::compile(const char* exp)
|
|
|
* is a trifle forced, but the need to tie the tails of the branches to what
|
|
|
* follows makes it hard to avoid.
|
|
|
*/
|
|
|
-static char* reg(int paren, int* flagp)
|
|
|
+char* RegExpCompile::reg(int paren, int* flagp)
|
|
|
{
|
|
|
char* ret;
|
|
|
char* br;
|
|
|
@@ -435,7 +441,7 @@ static char* reg(int paren, int* flagp)
|
|
|
|
|
|
// Make an OPEN node, if parenthesized.
|
|
|
if (paren) {
|
|
|
- if (regnpar >= RegularExpression::NSUBEXP) {
|
|
|
+ if (regnpar >= RegularExpressionMatch::NSUBEXP) {
|
|
|
// RAISE Error, SYM(RegularExpression), SYM(Too_Many_Parens),
|
|
|
printf("RegularExpression::compile(): Too many parentheses.\n");
|
|
|
return 0;
|
|
|
@@ -501,7 +507,7 @@ static char* reg(int paren, int* flagp)
|
|
|
*
|
|
|
* Implements the concatenation operator.
|
|
|
*/
|
|
|
-static char* regbranch(int* flagp)
|
|
|
+char* RegExpCompile::regbranch(int* flagp)
|
|
|
{
|
|
|
char* ret;
|
|
|
char* chain;
|
|
|
@@ -538,7 +544,7 @@ static char* regbranch(int* flagp)
|
|
|
* It might seem that this node could be dispensed with entirely, but the
|
|
|
* endmarker role is not redundant.
|
|
|
*/
|
|
|
-static char* regpiece(int* flagp)
|
|
|
+char* RegExpCompile::regpiece(int* flagp)
|
|
|
{
|
|
|
char* ret;
|
|
|
char op;
|
|
|
@@ -605,7 +611,7 @@ static char* regpiece(int* flagp)
|
|
|
* faster to run. Backslashed characters are exceptions, each becoming a
|
|
|
* separate node; the code is simpler that way and it's not worth fixing.
|
|
|
*/
|
|
|
-static char* regatom(int* flagp)
|
|
|
+char* RegExpCompile::regatom(int* flagp)
|
|
|
{
|
|
|
char* ret;
|
|
|
int flags;
|
|
|
@@ -724,13 +730,13 @@ static char* regatom(int* flagp)
|
|
|
- regnode - emit a node
|
|
|
Location.
|
|
|
*/
|
|
|
-static char* regnode(char op)
|
|
|
+char* RegExpCompile::regnode(char op)
|
|
|
{
|
|
|
char* ret;
|
|
|
char* ptr;
|
|
|
|
|
|
ret = regcode;
|
|
|
- if (ret == ®dummy) {
|
|
|
+ if (ret == regdummyptr) {
|
|
|
regsize += 3;
|
|
|
return (ret);
|
|
|
}
|
|
|
@@ -747,9 +753,9 @@ static char* regnode(char op)
|
|
|
/*
|
|
|
- regc - emit (if appropriate) a byte of code
|
|
|
*/
|
|
|
-static void regc(char b)
|
|
|
+void RegExpCompile::regc(char b)
|
|
|
{
|
|
|
- if (regcode != ®dummy)
|
|
|
+ if (regcode != regdummyptr)
|
|
|
*regcode++ = b;
|
|
|
else
|
|
|
regsize++;
|
|
|
@@ -760,13 +766,13 @@ static void regc(char b)
|
|
|
*
|
|
|
* Means relocating the operand.
|
|
|
*/
|
|
|
-static void reginsert(char op, char* opnd)
|
|
|
+void RegExpCompile::reginsert(char op, char* opnd)
|
|
|
{
|
|
|
char* src;
|
|
|
char* dst;
|
|
|
char* place;
|
|
|
|
|
|
- if (regcode == ®dummy) {
|
|
|
+ if (regcode == regdummyptr) {
|
|
|
regsize += 3;
|
|
|
return;
|
|
|
}
|
|
|
@@ -786,13 +792,13 @@ static void reginsert(char op, char* opnd)
|
|
|
/*
|
|
|
- regtail - set the next-pointer at the end of a node chain
|
|
|
*/
|
|
|
-static void regtail(char* p, const char* val)
|
|
|
+void RegExpCompile::regtail(char* p, const char* val)
|
|
|
{
|
|
|
char* scan;
|
|
|
char* temp;
|
|
|
int offset;
|
|
|
|
|
|
- if (p == ®dummy)
|
|
|
+ if (p == regdummyptr)
|
|
|
return;
|
|
|
|
|
|
// Find last node.
|
|
|
@@ -815,10 +821,10 @@ static void regtail(char* p, const char* val)
|
|
|
/*
|
|
|
- regoptail - regtail on operand of first argument; nop if operandless
|
|
|
*/
|
|
|
-static void regoptail(char* p, const char* val)
|
|
|
+void RegExpCompile::regoptail(char* p, const char* val)
|
|
|
{
|
|
|
// "Operandless" and "op != BRANCH" are synonymous in practice.
|
|
|
- if (p == 0 || p == ®dummy || OP(p) != BRANCH)
|
|
|
+ if (p == 0 || p == regdummyptr || OP(p) != BRANCH)
|
|
|
return;
|
|
|
regtail(OPERAND(p), val);
|
|
|
}
|
|
|
@@ -830,34 +836,30 @@ static void regoptail(char* p, const char* val)
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
/*
|
|
|
- * Global work variables for find().
|
|
|
+ * Utility class for RegularExpression::find().
|
|
|
*/
|
|
|
-static const char* reginput; // String-input pointer.
|
|
|
-static const char* regbol; // Beginning of input, for ^ check.
|
|
|
-static const char** regstartp; // Pointer to startp array.
|
|
|
-static const char** regendp; // Ditto for endp.
|
|
|
+class RegExpFind
|
|
|
+{
|
|
|
+public:
|
|
|
+ const char* reginput; // String-input pointer.
|
|
|
+ const char* regbol; // Beginning of input, for ^ check.
|
|
|
+ const char** regstartp; // Pointer to startp array.
|
|
|
+ const char** regendp; // Ditto for endp.
|
|
|
|
|
|
-/*
|
|
|
- * Forwards.
|
|
|
- */
|
|
|
-static int regtry(const char*, const char**, const char**, const char*);
|
|
|
-static int regmatch(const char*);
|
|
|
-static int regrepeat(const char*);
|
|
|
-
|
|
|
-#ifdef DEBUG
|
|
|
-int regnarrate = 0;
|
|
|
-void regdump();
|
|
|
-static char* regprop();
|
|
|
-#endif
|
|
|
+ int regtry(const char*, const char**, const char**, const char*);
|
|
|
+ int regmatch(const char*);
|
|
|
+ int regrepeat(const char*);
|
|
|
+};
|
|
|
|
|
|
// find -- Matches the regular expression to the given string.
|
|
|
// Returns true if found, and sets start and end indexes accordingly.
|
|
|
-
|
|
|
-bool RegularExpression::find(const char* string)
|
|
|
+bool RegularExpression::find(char const* string,
|
|
|
+ RegularExpressionMatch& rmatch) const
|
|
|
{
|
|
|
const char* s;
|
|
|
|
|
|
- this->searchstring = string;
|
|
|
+ rmatch.clear();
|
|
|
+ rmatch.searchstring = string;
|
|
|
|
|
|
if (!this->program) {
|
|
|
return false;
|
|
|
@@ -868,7 +870,7 @@ bool RegularExpression::find(const char* string)
|
|
|
// RAISE Error, SYM(RegularExpression), SYM(Internal_Error),
|
|
|
printf(
|
|
|
"RegularExpression::find(): Compiled regular expression corrupted.\n");
|
|
|
- return 0;
|
|
|
+ return false;
|
|
|
}
|
|
|
|
|
|
// If there is a "must appear" string, look for it.
|
|
|
@@ -880,42 +882,45 @@ bool RegularExpression::find(const char* string)
|
|
|
s++;
|
|
|
}
|
|
|
if (s == 0) // Not present.
|
|
|
- return (0);
|
|
|
+ return false;
|
|
|
}
|
|
|
|
|
|
+ RegExpFind regFind;
|
|
|
+
|
|
|
// Mark beginning of line for ^ .
|
|
|
- regbol = string;
|
|
|
+ regFind.regbol = string;
|
|
|
|
|
|
// Simplest case: anchored match need be tried only once.
|
|
|
if (this->reganch)
|
|
|
- return (regtry(string, this->startp, this->endp, this->program) != 0);
|
|
|
+ return (
|
|
|
+ regFind.regtry(string, rmatch.startp, rmatch.endp, this->program) != 0);
|
|
|
|
|
|
// Messy cases: unanchored match.
|
|
|
s = string;
|
|
|
if (this->regstart != '\0')
|
|
|
// We know what char it must start with.
|
|
|
while ((s = strchr(s, this->regstart)) != 0) {
|
|
|
- if (regtry(s, this->startp, this->endp, this->program))
|
|
|
- return (1);
|
|
|
+ if (regFind.regtry(s, rmatch.startp, rmatch.endp, this->program))
|
|
|
+ return true;
|
|
|
s++;
|
|
|
}
|
|
|
else
|
|
|
// We don't -- general case.
|
|
|
do {
|
|
|
- if (regtry(s, this->startp, this->endp, this->program))
|
|
|
- return (1);
|
|
|
+ if (regFind.regtry(s, rmatch.startp, rmatch.endp, this->program))
|
|
|
+ return true;
|
|
|
} while (*s++ != '\0');
|
|
|
|
|
|
// Failure.
|
|
|
- return (0);
|
|
|
+ return false;
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- regtry - try match at specific point
|
|
|
0 failure, 1 success
|
|
|
*/
|
|
|
-static int regtry(const char* string, const char** start, const char** end,
|
|
|
- const char* prog)
|
|
|
+int RegExpFind::regtry(const char* string, const char** start,
|
|
|
+ const char** end, const char* prog)
|
|
|
{
|
|
|
int i;
|
|
|
const char** sp1;
|
|
|
@@ -927,7 +932,7 @@ static int regtry(const char* string, const char** start, const char** end,
|
|
|
|
|
|
sp1 = start;
|
|
|
ep = end;
|
|
|
- for (i = RegularExpression::NSUBEXP; i > 0; i--) {
|
|
|
+ for (i = RegularExpressionMatch::NSUBEXP; i > 0; i--) {
|
|
|
*sp1++ = 0;
|
|
|
*ep++ = 0;
|
|
|
}
|
|
|
@@ -950,7 +955,7 @@ static int regtry(const char* string, const char** start, const char** end,
|
|
|
* by recursion.
|
|
|
* 0 failure, 1 success
|
|
|
*/
|
|
|
-static int regmatch(const char* prog)
|
|
|
+int RegExpFind::regmatch(const char* prog)
|
|
|
{
|
|
|
const char* scan; // Current node.
|
|
|
const char* next; // Next node.
|
|
|
@@ -1129,7 +1134,7 @@ static int regmatch(const char* prog)
|
|
|
/*
|
|
|
- regrepeat - repeatedly match something simple, report how many
|
|
|
*/
|
|
|
-static int regrepeat(const char* p)
|
|
|
+int RegExpFind::regrepeat(const char* p)
|
|
|
{
|
|
|
int count = 0;
|
|
|
const char* scan;
|
|
|
@@ -1176,7 +1181,7 @@ static const char* regnext(const char* p)
|
|
|
{
|
|
|
int offset;
|
|
|
|
|
|
- if (p == ®dummy)
|
|
|
+ if (p == regdummyptr)
|
|
|
return (0);
|
|
|
|
|
|
offset = NEXT(p);
|
|
|
@@ -1193,7 +1198,7 @@ static char* regnext(char* p)
|
|
|
{
|
|
|
int offset;
|
|
|
|
|
|
- if (p == ®dummy)
|
|
|
+ if (p == regdummyptr)
|
|
|
return (0);
|
|
|
|
|
|
offset = NEXT(p);
|