Regex: implement ^,$,\b and \B

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
Johannes Schindelin 2013-11-19 22:58:01 -06:00
parent fe32cce2ad
commit fb6486e276
4 changed files with 70 additions and 0 deletions

View File

@ -83,5 +83,9 @@ public class Regex {
"a", " + ", "b", " * ", "x");
expectMatch("[0-9[def]]", "f");
expectNoMatch("[a-z&&[^d-f]]", "f");
expectSplit("^H", "Hello\nHobbes!", "", "ello\nHobbes!");
expectSplit("o.*?$", "Hello\r\nHobbes!", "Hello\r\nH");
expectSplit("\\b", "a+ b + c\nd", "", "a", "+ ", "b", " + ", "c", "\n", "d");
expectSplit("\\B", "Hi Cal!", "H", "i C", "a", "l!");
}
}

View File

@ -305,6 +305,16 @@ class Compiler implements PikeVMOpcodes {
current.push(new CharacterRange(characterClass));
continue;
}
switch (array[index + 1]) {
case 'b':
index++;
current.push(WORD_BOUNDARY);
continue;
case 'B':
index++;
current.push(NON_WORD_BOUNDARY);
continue;
}
throw new RuntimeException("Parse error @" + index + ": " + regex);
case '?':
case '*':
@ -379,6 +389,12 @@ class Compiler implements PikeVMOpcodes {
case '|':
current.startAlternative();
continue;
case '^':
current.push(LINE_START);
continue;
case '$':
current.push(LINE_END);
continue;
default:
throw new RuntimeException("Parse error @" + index + ": " + regex);
}

View File

@ -28,6 +28,11 @@ class PikeVM implements PikeVMOpcodes {
private final int findPrefixLength;
private final CharacterMatcher[] classes;
private final PikeVM[] lookarounds;
private final static CharacterMatcher wordCharacter =
CharacterMatcher.parse("\\w");
private final static CharacterMatcher lineTerminator =
CharacterMatcher.parse("[\n\r\u0085\u2028\u2029]");
private boolean multiLine;
public interface Result {
void set(int[] start, int[] end);
@ -341,6 +346,46 @@ class PikeVM implements PikeVMOpcodes {
case DOTALL:
current.queueNext(pc, pc + 1, next);
break;
case WORD_BOUNDARY:
case NON_WORD_BOUNDARY: {
int i2 = i - step;
int c2 = i2 < 0 || i2 >= characters.length ? -1 : characters[i2];
switch (opcode) {
case WORD_BOUNDARY:
if ((c2 < 0 || !wordCharacter.matches((char)c2))) {
if (wordCharacter.matches(c)) {
current.queueImmediately(pc, pc + 1, false);
}
} else if (i >= 0 && i < characters.length &&
!wordCharacter.matches(c)) {
current.queueImmediately(pc, pc + 1, false);
}
break;
case NON_WORD_BOUNDARY:
if ((c2 < 0 || !wordCharacter.matches((char)c2))) {
if (i >= 0 && i < characters.length &&
!wordCharacter.matches(c)) {
current.queueImmediately(pc, pc + 1, false);
}
} else if (wordCharacter.matches(c)) {
current.queueImmediately(pc, pc + 1, false);
}
break;
}
break;
}
case LINE_START:
if (i == 0 || (multiLine &&
lineTerminator.matches(characters[i - 1]))) {
current.queueImmediately(pc, pc + 1, false);
}
break;
case LINE_END:
if (i == characters.length || (multiLine &&
lineTerminator.matches(c))) {
current.queueImmediately(pc, pc + 1, false);
}
break;
case CHARACTER_CLASS:
if (classes[program[pc + 1]].matches(c)) {
current.queueNext(pc, pc + 2, next);

View File

@ -22,6 +22,11 @@ interface PikeVMOpcodes {
final static int DOT = -1;
final static int DOTALL = -2;
final static int WORD_BOUNDARY = -10;
final static int NON_WORD_BOUNDARY = -11;
final static int LINE_START = -12;
final static int LINE_END = -13;
final static int CHARACTER_CLASS = -20;
final static int LOOKAHEAD = -30;