mirror of
https://github.com/corda/corda.git
synced 2025-01-22 12:28:11 +00:00
Regex: support prioritized threads
If we want to match greedy or reluctant regular expressions, we have to make sure that certain threads are split off with a higher priority than others. We will use the ThreadQueues' natural order as priority order: high to low. To support splitting into different-priority threads, let's introduce a second SPLIT opcode: SPLIT_JMP. The latter prefers to jump while the former prefers to execute the opcode directly after the SPLIT opcode. There is a subtle challenge here, though: let's assume that there are two current threads and the higher-priority one wants to jump where the lower-priority one is already. In the PikeVM implementation before this change, queueImmediately() would see that there is already a thread queued for that program counter and *not* queue the higher-priority one. Example: when matching the pattern '(a?)(a??)(a?)' against the string 'aa', after the first character, the first (high priority) thread will have matched the first group while the second thread matched the second group. In the following step, therefore, the first thread will want to SPLIT_JMP to match the final 'a' to the third group but the second thread already queued that program counter. The proposed solution is to introduce a third thread queue: 'queued'. When queuing threads to be executed after reading the next character from the string to match, they are not directly queued into 'next' but into 'queued'. Every thread requiring immediate execution (i.e. before reading the next character) will be queued into 'current'. Whenever 'current' is drained, the next thread from 'queued' that has not been queued to 'current' yet will be executed. That way, we can guarantee that 1) no lower-priority thread can override a higher-priority thread and 2) infinite loop are prevented. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
parent
63b06ebde8
commit
edb48ffec2
@ -70,6 +70,34 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
offsets = new int[program.length + 1][];
|
offsets = new int[program.length + 1][];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ThreadQueue(int startPC) {
|
||||||
|
head = tail = startPC;
|
||||||
|
next = new int[program.length + 1];
|
||||||
|
offsets = new int[program.length + 1][];
|
||||||
|
offsets[head] = new int[offsetsCount];
|
||||||
|
}
|
||||||
|
|
||||||
|
public int queueOneImmediately(ThreadQueue into) {
|
||||||
|
for (;;) {
|
||||||
|
if (head < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
boolean wasQueued = queueNext(head, head, into);
|
||||||
|
int pc = head;
|
||||||
|
if (head == tail) {
|
||||||
|
head = tail = -1;
|
||||||
|
} else {
|
||||||
|
head = next[pc] - 1;
|
||||||
|
next[pc] = 0;
|
||||||
|
}
|
||||||
|
offsets[pc] = null;
|
||||||
|
if (wasQueued) {
|
||||||
|
into.tail = pc;
|
||||||
|
return pc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Schedules the instruction at {@code nextPC} to be executed immediately.
|
* Schedules the instruction at {@code nextPC} to be executed immediately.
|
||||||
* <p>
|
* <p>
|
||||||
@ -141,8 +169,7 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
} else {
|
} else {
|
||||||
next.next[next.tail] = nextPC + 1;
|
next.next[next.tail] = nextPC + 1;
|
||||||
}
|
}
|
||||||
next.offsets[nextPC] =
|
next.offsets[nextPC] = offsets[currentPC];
|
||||||
currentPC < 0 ? new int[offsetsCount] : offsets[currentPC];
|
|
||||||
next.tail = nextPC;
|
next.tail = nextPC;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -223,7 +250,7 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
ThreadQueue next = new ThreadQueue();
|
ThreadQueue next = new ThreadQueue();
|
||||||
|
|
||||||
// initialize the first thread
|
// initialize the first thread
|
||||||
current.queueNext(-1, 0, current);
|
ThreadQueue queued = new ThreadQueue(0);
|
||||||
if (!anchorStart) {
|
if (!anchorStart) {
|
||||||
// this requires non-greedy matching
|
// this requires non-greedy matching
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
@ -231,7 +258,7 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
|
|
||||||
boolean foundMatch = false;
|
boolean foundMatch = false;
|
||||||
for (int i = start; i <= end; ++i) {
|
for (int i = start; i <= end; ++i) {
|
||||||
if (current.isEmpty()) {
|
if (queued.isEmpty()) {
|
||||||
// no threads left
|
// no threads left
|
||||||
return foundMatch;
|
return foundMatch;
|
||||||
}
|
}
|
||||||
@ -240,6 +267,9 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
int pc = -1;
|
int pc = -1;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
pc = current.next(pc);
|
pc = current.next(pc);
|
||||||
|
if (pc < 0) {
|
||||||
|
pc = queued.queueOneImmediately(current);
|
||||||
|
}
|
||||||
if (pc < 0) {
|
if (pc < 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -274,6 +304,10 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
current.queueImmediately(pc, program[pc + 1], true);
|
current.queueImmediately(pc, program[pc + 1], true);
|
||||||
current.queueImmediately(pc, pc + 2, false);
|
current.queueImmediately(pc, pc + 2, false);
|
||||||
break;
|
break;
|
||||||
|
case SPLIT_JMP:
|
||||||
|
current.queueImmediately(pc, pc + 2, true);
|
||||||
|
current.queueImmediately(pc, program[pc + 1], false);
|
||||||
|
break;
|
||||||
case JMP:
|
case JMP:
|
||||||
current.queueImmediately(pc, program[pc + 1], false);
|
current.queueImmediately(pc, program[pc + 1], false);
|
||||||
break;
|
break;
|
||||||
@ -292,8 +326,8 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
current.clean();
|
current.clean();
|
||||||
|
|
||||||
// prepare for next step
|
// prepare for next step
|
||||||
ThreadQueue swap = current;
|
ThreadQueue swap = queued;
|
||||||
current = next;
|
queued = next;
|
||||||
next = swap;
|
next = swap;
|
||||||
}
|
}
|
||||||
return foundMatch;
|
return foundMatch;
|
||||||
|
@ -25,5 +25,6 @@ interface PikeVMOpcodes {
|
|||||||
final static int SAVE_OFFSET = -40;
|
final static int SAVE_OFFSET = -40;
|
||||||
|
|
||||||
final static int SPLIT = -50;
|
final static int SPLIT = -50;
|
||||||
final static int JMP = -51;
|
final static int SPLIT_JMP = -51; // this split prefers to jump
|
||||||
|
final static int JMP = -52;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user