mirror of
https://github.com/corda/corda.git
synced 2025-01-23 04:48:09 +00:00
Regex: optimize matching characters
Instead of having an opcode 'CHAR', let's have the opcodes that fall within the range of a char *be* the opcode 'match this character'. While at it, break the ranges of the different types of opcodes apart into ranges so that related operations are clustered. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
parent
b03283033e
commit
63b06ebde8
@ -52,13 +52,13 @@ public abstract class Pattern implements PikeVMOpcodes {
|
||||
if ("a(bb)?a".equals(regex)) {
|
||||
int[] program = new int[] {
|
||||
SAVE_OFFSET, 0,
|
||||
CHAR, 'a',
|
||||
SPLIT, 14,
|
||||
'a',
|
||||
SPLIT, 11,
|
||||
SAVE_OFFSET, 2,
|
||||
CHAR, 'b',
|
||||
CHAR, 'b',
|
||||
'b',
|
||||
'b',
|
||||
SAVE_OFFSET, 3,
|
||||
/* 14 */ CHAR, 'a',
|
||||
/* 11 */ 'a',
|
||||
SAVE_OFFSET, 1
|
||||
};
|
||||
return new RegexPattern(regex, flags, new PikeVM(program, 1));
|
||||
|
@ -256,12 +256,6 @@ class PikeVM implements PikeVMOpcodes {
|
||||
|
||||
int opcode = program[pc];
|
||||
switch (opcode) {
|
||||
/* Possible optimization: make all opcodes <= 0xffff implicit chars */
|
||||
case CHAR:
|
||||
if (c == (char)program[pc + 1]) {
|
||||
current.queueNext(pc, pc + 2, next);
|
||||
}
|
||||
break;
|
||||
case DOT:
|
||||
if (c != '\0' && c != '\r' && c != '\n') {
|
||||
current.queueNext(pc, pc + 1, next);
|
||||
@ -270,6 +264,7 @@ class PikeVM implements PikeVMOpcodes {
|
||||
case DOTALL:
|
||||
current.queueNext(pc, pc + 1, next);
|
||||
break;
|
||||
/* immediate opcodes, i.e. thread continues within the same step */
|
||||
case SAVE_OFFSET:
|
||||
int index = program[pc + 1];
|
||||
current.saveOffset(pc, index, i);
|
||||
@ -283,6 +278,12 @@ class PikeVM implements PikeVMOpcodes {
|
||||
current.queueImmediately(pc, program[pc + 1], false);
|
||||
break;
|
||||
default:
|
||||
if (program[pc] >= 0 && program[pc] <= 0xffff) {
|
||||
if (c == (char)program[pc]) {
|
||||
current.queueNext(pc, pc + 1, next);
|
||||
}
|
||||
break;
|
||||
}
|
||||
throw new RuntimeException("Invalid opcode: " + opcode
|
||||
+ " at pc " + pc);
|
||||
}
|
||||
|
@ -19,10 +19,11 @@ package regex;
|
||||
* @author Johannes Schindelin
|
||||
*/
|
||||
interface PikeVMOpcodes {
|
||||
final static int CHAR = 1;
|
||||
final static int DOT = 2;
|
||||
final static int DOTALL = 3;
|
||||
final static int SAVE_OFFSET = 4;
|
||||
final static int SPLIT = 5;
|
||||
final static int JMP = 6;
|
||||
final static int DOT = -1;
|
||||
final static int DOTALL = -2;
|
||||
|
||||
final static int SAVE_OFFSET = -40;
|
||||
|
||||
final static int SPLIT = -50;
|
||||
final static int JMP = -51;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user