mirror of
https://github.com/corda/corda.git
synced 2025-01-08 14:03:06 +00:00
Regex: optimize matching characters
Instead of having an opcode 'CHAR', let's have the opcodes that fall within the range of a char *be* the opcode 'match this character'. While at it, break the ranges of the different types of opcodes apart into ranges so that related operations are clustered. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
parent
b03283033e
commit
63b06ebde8
@ -52,13 +52,13 @@ public abstract class Pattern implements PikeVMOpcodes {
|
|||||||
if ("a(bb)?a".equals(regex)) {
|
if ("a(bb)?a".equals(regex)) {
|
||||||
int[] program = new int[] {
|
int[] program = new int[] {
|
||||||
SAVE_OFFSET, 0,
|
SAVE_OFFSET, 0,
|
||||||
CHAR, 'a',
|
'a',
|
||||||
SPLIT, 14,
|
SPLIT, 11,
|
||||||
SAVE_OFFSET, 2,
|
SAVE_OFFSET, 2,
|
||||||
CHAR, 'b',
|
'b',
|
||||||
CHAR, 'b',
|
'b',
|
||||||
SAVE_OFFSET, 3,
|
SAVE_OFFSET, 3,
|
||||||
/* 14 */ CHAR, 'a',
|
/* 11 */ 'a',
|
||||||
SAVE_OFFSET, 1
|
SAVE_OFFSET, 1
|
||||||
};
|
};
|
||||||
return new RegexPattern(regex, flags, new PikeVM(program, 1));
|
return new RegexPattern(regex, flags, new PikeVM(program, 1));
|
||||||
|
@ -256,12 +256,6 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
|
|
||||||
int opcode = program[pc];
|
int opcode = program[pc];
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
/* Possible optimization: make all opcodes <= 0xffff implicit chars */
|
|
||||||
case CHAR:
|
|
||||||
if (c == (char)program[pc + 1]) {
|
|
||||||
current.queueNext(pc, pc + 2, next);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case DOT:
|
case DOT:
|
||||||
if (c != '\0' && c != '\r' && c != '\n') {
|
if (c != '\0' && c != '\r' && c != '\n') {
|
||||||
current.queueNext(pc, pc + 1, next);
|
current.queueNext(pc, pc + 1, next);
|
||||||
@ -270,6 +264,7 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
case DOTALL:
|
case DOTALL:
|
||||||
current.queueNext(pc, pc + 1, next);
|
current.queueNext(pc, pc + 1, next);
|
||||||
break;
|
break;
|
||||||
|
/* immediate opcodes, i.e. thread continues within the same step */
|
||||||
case SAVE_OFFSET:
|
case SAVE_OFFSET:
|
||||||
int index = program[pc + 1];
|
int index = program[pc + 1];
|
||||||
current.saveOffset(pc, index, i);
|
current.saveOffset(pc, index, i);
|
||||||
@ -283,6 +278,12 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
current.queueImmediately(pc, program[pc + 1], false);
|
current.queueImmediately(pc, program[pc + 1], false);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
if (program[pc] >= 0 && program[pc] <= 0xffff) {
|
||||||
|
if (c == (char)program[pc]) {
|
||||||
|
current.queueNext(pc, pc + 1, next);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
throw new RuntimeException("Invalid opcode: " + opcode
|
throw new RuntimeException("Invalid opcode: " + opcode
|
||||||
+ " at pc " + pc);
|
+ " at pc " + pc);
|
||||||
}
|
}
|
||||||
|
@ -19,10 +19,11 @@ package regex;
|
|||||||
* @author Johannes Schindelin
|
* @author Johannes Schindelin
|
||||||
*/
|
*/
|
||||||
interface PikeVMOpcodes {
|
interface PikeVMOpcodes {
|
||||||
final static int CHAR = 1;
|
final static int DOT = -1;
|
||||||
final static int DOT = 2;
|
final static int DOTALL = -2;
|
||||||
final static int DOTALL = 3;
|
|
||||||
final static int SAVE_OFFSET = 4;
|
final static int SAVE_OFFSET = -40;
|
||||||
final static int SPLIT = 5;
|
|
||||||
final static int JMP = 6;
|
final static int SPLIT = -50;
|
||||||
|
final static int JMP = -51;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user