Regex: optimize matching characters

Instead of having an opcode 'CHAR', let's have the opcodes that fall
within the range of a char *be* the opcode 'match this character'.

While at it, break the ranges of the different types of opcodes apart
into ranges so that related operations are clustered.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
Johannes Schindelin 2013-11-09 14:13:19 -06:00
parent b03283033e
commit 63b06ebde8
3 changed files with 19 additions and 17 deletions

View File

@ -52,13 +52,13 @@ public abstract class Pattern implements PikeVMOpcodes {
if ("a(bb)?a".equals(regex)) {
int[] program = new int[] {
SAVE_OFFSET, 0,
CHAR, 'a',
SPLIT, 14,
'a',
SPLIT, 11,
SAVE_OFFSET, 2,
CHAR, 'b',
CHAR, 'b',
'b',
'b',
SAVE_OFFSET, 3,
/* 14 */ CHAR, 'a',
/* 11 */ 'a',
SAVE_OFFSET, 1
};
return new RegexPattern(regex, flags, new PikeVM(program, 1));

View File

@ -256,12 +256,6 @@ class PikeVM implements PikeVMOpcodes {
int opcode = program[pc];
switch (opcode) {
/* Possible optimization: make all opcodes <= 0xffff implicit chars */
case CHAR:
if (c == (char)program[pc + 1]) {
current.queueNext(pc, pc + 2, next);
}
break;
case DOT:
if (c != '\0' && c != '\r' && c != '\n') {
current.queueNext(pc, pc + 1, next);
@ -270,6 +264,7 @@ class PikeVM implements PikeVMOpcodes {
case DOTALL:
current.queueNext(pc, pc + 1, next);
break;
/* immediate opcodes, i.e. thread continues within the same step */
case SAVE_OFFSET:
int index = program[pc + 1];
current.saveOffset(pc, index, i);
@ -283,6 +278,12 @@ class PikeVM implements PikeVMOpcodes {
current.queueImmediately(pc, program[pc + 1], false);
break;
default:
if (program[pc] >= 0 && program[pc] <= 0xffff) {
if (c == (char)program[pc]) {
current.queueNext(pc, pc + 1, next);
}
break;
}
throw new RuntimeException("Invalid opcode: " + opcode
+ " at pc " + pc);
}

View File

@ -19,10 +19,11 @@ package regex;
* @author Johannes Schindelin
*/
interface PikeVMOpcodes {
final static int CHAR = 1;
final static int DOT = 2;
final static int DOTALL = 3;
final static int SAVE_OFFSET = 4;
final static int SPLIT = 5;
final static int JMP = 6;
final static int DOT = -1;
final static int DOTALL = -2;
final static int SAVE_OFFSET = -40;
final static int SPLIT = -50;
final static int JMP = -51;
}