Regex: special-case a(a*?)(a?)(a??)(a+)(a*)a

Among other challenges, this regular expression is designed to demonstrate
that thread prioritization is finicky: Given the string 'aaaaaa' to match,
the first four threads will try to grab the second 'a', the third thread
(the one that matched the '(a??)' group) having scheduled the same
instruction pointer to the '(a+)' group that the second -- higher-priority
-- thread will try to advance to only after processing the '(a??)' group's
SPLIT. The second thread must override the third thread in that case,
essentially stopping the latter.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
Johannes Schindelin 2013-11-11 09:29:24 -06:00
parent edb48ffec2
commit d00f799d2e
2 changed files with 42 additions and 3 deletions

View File

@ -18,9 +18,20 @@ public class Regex {
expect(!getMatcher(regex, string).matches());
}
private static void expectGroups(String regex, String string,
String... groups) {
Matcher matcher = getMatcher(regex, string);
expect(matcher.matches());
expect(matcher.groupCount() == groups.length);
for (int i = 1; i <= groups.length; ++i) {
expect(groups[i - 1].equals(matcher.group(i)));
}
}
public static void main(String[] args) {
expectMatch("a(bb)?a", "abba");
expectNoMatch("a(bb)?a", "abbba");
expectNoMatch("a(bb)?a", "abbaa");
expectGroups("a(a*?)(a?)(a??)(a+)(a*)a", "aaaaaa", "", "a", "", "aaa", "");
}
}

View File

@ -50,7 +50,7 @@ public abstract class Pattern implements PikeVMOpcodes {
throw new UnsupportedOperationException("TODO");
}
if ("a(bb)?a".equals(regex)) {
int[] program = new int[] {
return new RegexPattern(regex, flags, new PikeVM(new int[] {
SAVE_OFFSET, 0,
'a',
SPLIT, 11,
@ -60,8 +60,36 @@ public abstract class Pattern implements PikeVMOpcodes {
SAVE_OFFSET, 3,
/* 11 */ 'a',
SAVE_OFFSET, 1
};
return new RegexPattern(regex, flags, new PikeVM(program, 1));
}, 1));
} else if ("a(a*?)(a?)(a??)(a+)(a*)a".equals(regex)) {
return new RegexPattern(regex, flags, new PikeVM(new int[] {
SAVE_OFFSET, 0,
'a',
SAVE_OFFSET, 2,
SPLIT_JMP, 10,
/* 7 */ 'a',
SPLIT, 7,
/* 10 */ SAVE_OFFSET, 3,
SAVE_OFFSET, 4,
SPLIT, 17,
'a',
/* 17 */ SAVE_OFFSET, 5,
SAVE_OFFSET, 6,
SPLIT_JMP, 24,
'a',
/* 24 */ SAVE_OFFSET, 7,
SAVE_OFFSET, 8,
/* 28 */ 'a',
SPLIT_JMP, 28,
SAVE_OFFSET, 9,
SAVE_OFFSET, 10,
SPLIT, 40,
/* 37 */ 'a',
SPLIT_JMP, 37,
/* 40 */ SAVE_OFFSET, 11,
'a',
SAVE_OFFSET, 1
}, 5));
}
throw new UnsupportedOperationException("Cannot handle regex " + regex);
}