From 63b06ebde8b016f811b906f50d45af53516ab287 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 9 Nov 2013 14:13:19 -0600 Subject: [PATCH] Regex: optimize matching characters Instead of having an opcode 'CHAR', let's have the opcodes that fall within the range of a char *be* the opcode 'match this character'. While at it, break the ranges of the different types of opcodes apart into ranges so that related operations are clustered. Signed-off-by: Johannes Schindelin --- test/regex/Pattern.java | 10 +++++----- test/regex/PikeVM.java | 13 +++++++------ test/regex/PikeVMOpcodes.java | 13 +++++++------ 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/test/regex/Pattern.java b/test/regex/Pattern.java index 0089b45564..d10fe8fa3b 100644 --- a/test/regex/Pattern.java +++ b/test/regex/Pattern.java @@ -52,13 +52,13 @@ public abstract class Pattern implements PikeVMOpcodes { if ("a(bb)?a".equals(regex)) { int[] program = new int[] { SAVE_OFFSET, 0, - CHAR, 'a', - SPLIT, 14, + 'a', + SPLIT, 11, SAVE_OFFSET, 2, - CHAR, 'b', - CHAR, 'b', + 'b', + 'b', SAVE_OFFSET, 3, - /* 14 */ CHAR, 'a', + /* 11 */ 'a', SAVE_OFFSET, 1 }; return new RegexPattern(regex, flags, new PikeVM(program, 1)); diff --git a/test/regex/PikeVM.java b/test/regex/PikeVM.java index c4b14e07b7..9219280c68 100644 --- a/test/regex/PikeVM.java +++ b/test/regex/PikeVM.java @@ -256,12 +256,6 @@ class PikeVM implements PikeVMOpcodes { int opcode = program[pc]; switch (opcode) { - /* Possible optimization: make all opcodes <= 0xffff implicit chars */ - case CHAR: - if (c == (char)program[pc + 1]) { - current.queueNext(pc, pc + 2, next); - } - break; case DOT: if (c != '\0' && c != '\r' && c != '\n') { current.queueNext(pc, pc + 1, next); @@ -270,6 +264,7 @@ class PikeVM implements PikeVMOpcodes { case DOTALL: current.queueNext(pc, pc + 1, next); break; + /* immediate opcodes, i.e. thread continues within the same step */ case SAVE_OFFSET: int index = program[pc + 1]; current.saveOffset(pc, index, i); @@ -283,6 +278,12 @@ class PikeVM implements PikeVMOpcodes { current.queueImmediately(pc, program[pc + 1], false); break; default: + if (program[pc] >= 0 && program[pc] <= 0xffff) { + if (c == (char)program[pc]) { + current.queueNext(pc, pc + 1, next); + } + break; + } throw new RuntimeException("Invalid opcode: " + opcode + " at pc " + pc); } diff --git a/test/regex/PikeVMOpcodes.java b/test/regex/PikeVMOpcodes.java index 4518130700..c12ad99427 100644 --- a/test/regex/PikeVMOpcodes.java +++ b/test/regex/PikeVMOpcodes.java @@ -19,10 +19,11 @@ package regex; * @author Johannes Schindelin */ interface PikeVMOpcodes { - final static int CHAR = 1; - final static int DOT = 2; - final static int DOTALL = 3; - final static int SAVE_OFFSET = 4; - final static int SPLIT = 5; - final static int JMP = 6; + final static int DOT = -1; + final static int DOTALL = -2; + + final static int SAVE_OFFSET = -40; + + final static int SPLIT = -50; + final static int JMP = -51; }