From 7da03b0f1999235cdb382c8295e5b8f6daa4a4de Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 10 Nov 2013 10:23:01 -0600 Subject: [PATCH] Regex: Implement reluctant '?', '*' and '+' Now that we have reluctant quantifiers, we can get rid of the hardcoded program for the challenging regular expression pattern. Signed-off-by: Johannes Schindelin --- test/regex/Compiler.java | 24 +++++++++++++++++------- test/regex/Pattern.java | 30 ------------------------------ 2 files changed, 17 insertions(+), 37 deletions(-) diff --git a/test/regex/Compiler.java b/test/regex/Compiler.java index 75c9d2d9df..6f65e6b4d9 100644 --- a/test/regex/Compiler.java +++ b/test/regex/Compiler.java @@ -66,8 +66,9 @@ class Compiler implements PikeVMOpcodes { private class Repeat extends Expression { private Expression expr; private int minCount, maxCount; + private boolean greedy; - public Repeat(Expression expr, int minCount, int maxCount) { + public Repeat(Expression expr, int minCount, int maxCount, boolean greedy) { if (minCount != 0 && minCount != 1) { throw new RuntimeException("Unexpected min count: " + minCount); } @@ -77,23 +78,26 @@ class Compiler implements PikeVMOpcodes { this.expr = expr; this.minCount = minCount; this.maxCount = maxCount; + this.greedy = greedy; } protected void writeCode(Output output) { int start = output.offset; + int splitJmp = greedy ? SPLIT_JMP : SPLIT; + int split = greedy ? SPLIT : SPLIT_JMP; if (minCount == 1 && maxCount == -1) { expr.writeCode(output); - output.add(SPLIT_JMP); + output.add(splitJmp); output.add(start); } else if (minCount == 0 && maxCount == -1) { - output.add(SPLIT); + output.add(split); int jump = output.markJump(); expr.writeCode(output); - output.add(SPLIT_JMP); + output.add(splitJmp); output.add(start + 2); output.setJump(jump); } else if (minCount == 0 && maxCount == 1) { - output.add(SPLIT); + output.add(split); int jump = output.markJump(); expr.writeCode(output); output.setJump(jump); @@ -172,10 +176,16 @@ class Compiler implements PikeVMOpcodes { continue; case '?': case '*': - case '+': + case '+': { + boolean greedy = true; + if (index + 1 < array.length && array[index + 1] == '?') { + greedy = false; + ++ index; + } current.push(new Repeat(current.pop(), - c == '+' ? 1 : 0, c == '?' ? 1 : -1)); + c == '+' ? 1 : 0, c == '?' ? 1 : -1, greedy)); continue; + } case '(': if (index + 1 < array.length && array[index + 1] == '?') { throw new UnsupportedOperationException("Not yet supported: " diff --git a/test/regex/Pattern.java b/test/regex/Pattern.java index 49ac289ac3..f0d5596e2f 100644 --- a/test/regex/Pattern.java +++ b/test/regex/Pattern.java @@ -46,36 +46,6 @@ public abstract class Pattern implements PikeVMOpcodes { if (flags != 0) { throw new UnsupportedOperationException("TODO"); } - if ("a(a*?)(a?)(a??)(a+)(a*)a".equals(regex)) { - return new RegexPattern(regex, flags, new PikeVM(new int[] { - SAVE_OFFSET, 0, - 'a', - SAVE_OFFSET, 2, - SPLIT_JMP, 10, - /* 7 */ 'a', - SPLIT, 7, - /* 10 */ SAVE_OFFSET, 3, - SAVE_OFFSET, 4, - SPLIT, 17, - 'a', - /* 17 */ SAVE_OFFSET, 5, - SAVE_OFFSET, 6, - SPLIT_JMP, 24, - 'a', - /* 24 */ SAVE_OFFSET, 7, - SAVE_OFFSET, 8, - /* 28 */ 'a', - SPLIT_JMP, 28, - SAVE_OFFSET, 9, - SAVE_OFFSET, 10, - SPLIT, 40, - /* 37 */ 'a', - SPLIT_JMP, 37, - /* 40 */ SAVE_OFFSET, 11, - 'a', - SAVE_OFFSET, 1 - }, 5)); - } return new Compiler().compile(regex); }