Regex: Implement reluctant '?', '*' and '+'

Now that we have reluctant quantifiers, we can get rid of the hardcoded
program for the challenging regular expression pattern.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
Johannes Schindelin 2013-11-10 10:23:01 -06:00
parent f979505b3d
commit 7da03b0f19
2 changed files with 17 additions and 37 deletions

View File

@ -66,8 +66,9 @@ class Compiler implements PikeVMOpcodes {
private class Repeat extends Expression {
private Expression expr;
private int minCount, maxCount;
private boolean greedy;
public Repeat(Expression expr, int minCount, int maxCount) {
public Repeat(Expression expr, int minCount, int maxCount, boolean greedy) {
if (minCount != 0 && minCount != 1) {
throw new RuntimeException("Unexpected min count: " + minCount);
}
@ -77,23 +78,26 @@ class Compiler implements PikeVMOpcodes {
this.expr = expr;
this.minCount = minCount;
this.maxCount = maxCount;
this.greedy = greedy;
}
protected void writeCode(Output output) {
int start = output.offset;
int splitJmp = greedy ? SPLIT_JMP : SPLIT;
int split = greedy ? SPLIT : SPLIT_JMP;
if (minCount == 1 && maxCount == -1) {
expr.writeCode(output);
output.add(SPLIT_JMP);
output.add(splitJmp);
output.add(start);
} else if (minCount == 0 && maxCount == -1) {
output.add(SPLIT);
output.add(split);
int jump = output.markJump();
expr.writeCode(output);
output.add(SPLIT_JMP);
output.add(splitJmp);
output.add(start + 2);
output.setJump(jump);
} else if (minCount == 0 && maxCount == 1) {
output.add(SPLIT);
output.add(split);
int jump = output.markJump();
expr.writeCode(output);
output.setJump(jump);
@ -172,10 +176,16 @@ class Compiler implements PikeVMOpcodes {
continue;
case '?':
case '*':
case '+':
case '+': {
boolean greedy = true;
if (index + 1 < array.length && array[index + 1] == '?') {
greedy = false;
++ index;
}
current.push(new Repeat(current.pop(),
c == '+' ? 1 : 0, c == '?' ? 1 : -1));
c == '+' ? 1 : 0, c == '?' ? 1 : -1, greedy));
continue;
}
case '(':
if (index + 1 < array.length && array[index + 1] == '?') {
throw new UnsupportedOperationException("Not yet supported: "

View File

@ -46,36 +46,6 @@ public abstract class Pattern implements PikeVMOpcodes {
if (flags != 0) {
throw new UnsupportedOperationException("TODO");
}
if ("a(a*?)(a?)(a??)(a+)(a*)a".equals(regex)) {
return new RegexPattern(regex, flags, new PikeVM(new int[] {
SAVE_OFFSET, 0,
'a',
SAVE_OFFSET, 2,
SPLIT_JMP, 10,
/* 7 */ 'a',
SPLIT, 7,
/* 10 */ SAVE_OFFSET, 3,
SAVE_OFFSET, 4,
SPLIT, 17,
'a',
/* 17 */ SAVE_OFFSET, 5,
SAVE_OFFSET, 6,
SPLIT_JMP, 24,
'a',
/* 24 */ SAVE_OFFSET, 7,
SAVE_OFFSET, 8,
/* 28 */ 'a',
SPLIT_JMP, 28,
SAVE_OFFSET, 9,
SAVE_OFFSET, 10,
SPLIT, 40,
/* 37 */ 'a',
SPLIT_JMP, 37,
/* 40 */ SAVE_OFFSET, 11,
'a',
SAVE_OFFSET, 1
}, 5));
}
return new Compiler().compile(regex);
}