mirror of
https://github.com/corda/corda.git
synced 2025-01-19 11:16:54 +00:00
Regex: support lookaheads
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
parent
d4a2f58eb5
commit
85af36ef90
@ -59,5 +59,6 @@ public class Regex {
|
||||
expectGroups("(?:a)", "a");
|
||||
expectGroups("a|(b|c)", "a", (String)null);
|
||||
expectGroups("a|(b|c)", "c", "c");
|
||||
expectGroups("(?=a)a", "a");
|
||||
}
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ class Compiler implements PikeVMOpcodes {
|
||||
private int groupCount = -1;
|
||||
private int findPreambleSize;
|
||||
private ArrayList<CharacterMatcher> classes;
|
||||
private ArrayList<PikeVM> lookaheads;
|
||||
|
||||
public Output(Expression expr) {
|
||||
// try-run to determine the code size
|
||||
@ -36,6 +37,7 @@ class Compiler implements PikeVMOpcodes {
|
||||
offset = 0;
|
||||
groupCount = -1;
|
||||
classes = new ArrayList<CharacterMatcher>();
|
||||
lookaheads = new ArrayList<PikeVM>();
|
||||
// write it out!
|
||||
expr.writeCode(this);
|
||||
}
|
||||
@ -64,7 +66,10 @@ class Compiler implements PikeVMOpcodes {
|
||||
public PikeVM toVM() {
|
||||
CharacterMatcher[] classes = new CharacterMatcher[this.classes.size()];
|
||||
this.classes.toArray(classes);
|
||||
return new PikeVM(program, findPreambleSize, groupCount, classes);
|
||||
PikeVM[] lookaheads = new PikeVM[this.lookaheads.size()];
|
||||
this.lookaheads.toArray(lookaheads);
|
||||
return new PikeVM(program, findPreambleSize, groupCount, classes,
|
||||
lookaheads);
|
||||
}
|
||||
|
||||
public int addClass(CharacterMatcher characterClass) {
|
||||
@ -75,6 +80,15 @@ class Compiler implements PikeVMOpcodes {
|
||||
classes.add(characterClass);
|
||||
return result;
|
||||
}
|
||||
|
||||
public int addLookahead(PikeVM lookahead) {
|
||||
if (program == null) {
|
||||
return -1;
|
||||
}
|
||||
int result = lookaheads.size();
|
||||
lookaheads.add(lookahead);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
private abstract class Expression {
|
||||
@ -212,6 +226,17 @@ class Compiler implements PikeVMOpcodes {
|
||||
}
|
||||
}
|
||||
|
||||
private class Lookahead extends Expression {
|
||||
private final Group group = new Group(false, null);
|
||||
|
||||
@Override
|
||||
protected void writeCode(Output output) {
|
||||
PikeVM vm = new Output(group).toVM();
|
||||
output.add(LOOKAHEAD);
|
||||
output.add(output.addLookahead(vm));
|
||||
}
|
||||
}
|
||||
|
||||
private class Group0 extends Expression {
|
||||
private final Group group;
|
||||
|
||||
@ -271,10 +296,24 @@ class Compiler implements PikeVMOpcodes {
|
||||
case '(': {
|
||||
boolean capturing = true;
|
||||
if (index + 1 < array.length && array[index + 1] == '?') {
|
||||
if (index + 2 < array.length && array[index + 2] == ':') {
|
||||
index += 2;
|
||||
index += 2;
|
||||
if (index >= array.length) {
|
||||
throw new RuntimeException("Short pattern @" + index + ": "
|
||||
+ regex);
|
||||
}
|
||||
c = array[index];
|
||||
switch (c) {
|
||||
case ':':
|
||||
capturing = false;
|
||||
} else {
|
||||
break;
|
||||
case '=': {
|
||||
capturing = false;
|
||||
Lookahead lookahead = new Lookahead();
|
||||
current.push(lookahead);
|
||||
groups.push(lookahead.group);
|
||||
continue;
|
||||
}
|
||||
default:
|
||||
throw new UnsupportedOperationException("Not yet supported: "
|
||||
+ regex.substring(index));
|
||||
}
|
||||
|
@ -27,19 +27,21 @@ class PikeVM implements PikeVMOpcodes {
|
||||
*/
|
||||
private final int findPrefixLength;
|
||||
private final CharacterMatcher[] classes;
|
||||
private final PikeVM[] lookaheads;
|
||||
|
||||
public interface Result {
|
||||
void set(int[] start, int[] end);
|
||||
}
|
||||
|
||||
protected PikeVM(int[] program, int findPrefixLength, int groupCount,
|
||||
CharacterMatcher[] classes)
|
||||
CharacterMatcher[] classes, PikeVM[] lookaheads)
|
||||
{
|
||||
this.program = program;
|
||||
this.findPrefixLength = findPrefixLength;
|
||||
this.groupCount = groupCount;
|
||||
offsetsCount = 2 * groupCount + 2;
|
||||
this.classes = classes;
|
||||
this.lookaheads = lookaheads;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -313,7 +315,12 @@ class PikeVM implements PikeVMOpcodes {
|
||||
if (anchorEnd && i < end) {
|
||||
continue;
|
||||
}
|
||||
if (result == null) {
|
||||
// only interested in a match, no need to go on
|
||||
return true;
|
||||
}
|
||||
current.setResult(result);
|
||||
|
||||
// now that we found a match, even higher-priority matches must match
|
||||
// at the same start offset
|
||||
if (!anchorStart) {
|
||||
@ -338,10 +345,18 @@ class PikeVM implements PikeVMOpcodes {
|
||||
current.queueNext(pc, pc + 2, next);
|
||||
}
|
||||
break;
|
||||
case LOOKAHEAD:
|
||||
if (lookaheads[program[pc + 1]].matches(characters,
|
||||
i, characters.length, true, false, null)) {
|
||||
current.queueImmediately(pc, pc + 2, false);
|
||||
}
|
||||
break;
|
||||
/* immediate opcodes, i.e. thread continues within the same step */
|
||||
case SAVE_OFFSET:
|
||||
int index = program[pc + 1];
|
||||
current.saveOffset(pc, index, i);
|
||||
if (result != null) {
|
||||
int index = program[pc + 1];
|
||||
current.saveOffset(pc, index, i);
|
||||
}
|
||||
current.queueImmediately(pc, pc + 2, false);
|
||||
break;
|
||||
case SPLIT:
|
||||
|
@ -24,6 +24,8 @@ interface PikeVMOpcodes {
|
||||
|
||||
final static int CHARACTER_CLASS = -20;
|
||||
|
||||
final static int LOOKAHEAD = -30;
|
||||
|
||||
final static int SAVE_OFFSET = -40;
|
||||
|
||||
final static int SPLIT = -50;
|
||||
|
Loading…
Reference in New Issue
Block a user