mirror of
https://github.com/corda/corda.git
synced 2025-03-17 17:45:17 +00:00
Regex: implement ^,$,\b and \B
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
parent
fe32cce2ad
commit
fb6486e276
@ -83,5 +83,9 @@ public class Regex {
|
||||
"a", " + ", "b", " * ", "x");
|
||||
expectMatch("[0-9[def]]", "f");
|
||||
expectNoMatch("[a-z&&[^d-f]]", "f");
|
||||
expectSplit("^H", "Hello\nHobbes!", "", "ello\nHobbes!");
|
||||
expectSplit("o.*?$", "Hello\r\nHobbes!", "Hello\r\nH");
|
||||
expectSplit("\\b", "a+ b + c\nd", "", "a", "+ ", "b", " + ", "c", "\n", "d");
|
||||
expectSplit("\\B", "Hi Cal!", "H", "i C", "a", "l!");
|
||||
}
|
||||
}
|
||||
|
@ -305,6 +305,16 @@ class Compiler implements PikeVMOpcodes {
|
||||
current.push(new CharacterRange(characterClass));
|
||||
continue;
|
||||
}
|
||||
switch (array[index + 1]) {
|
||||
case 'b':
|
||||
index++;
|
||||
current.push(WORD_BOUNDARY);
|
||||
continue;
|
||||
case 'B':
|
||||
index++;
|
||||
current.push(NON_WORD_BOUNDARY);
|
||||
continue;
|
||||
}
|
||||
throw new RuntimeException("Parse error @" + index + ": " + regex);
|
||||
case '?':
|
||||
case '*':
|
||||
@ -379,6 +389,12 @@ class Compiler implements PikeVMOpcodes {
|
||||
case '|':
|
||||
current.startAlternative();
|
||||
continue;
|
||||
case '^':
|
||||
current.push(LINE_START);
|
||||
continue;
|
||||
case '$':
|
||||
current.push(LINE_END);
|
||||
continue;
|
||||
default:
|
||||
throw new RuntimeException("Parse error @" + index + ": " + regex);
|
||||
}
|
||||
|
@ -28,6 +28,11 @@ class PikeVM implements PikeVMOpcodes {
|
||||
private final int findPrefixLength;
|
||||
private final CharacterMatcher[] classes;
|
||||
private final PikeVM[] lookarounds;
|
||||
private final static CharacterMatcher wordCharacter =
|
||||
CharacterMatcher.parse("\\w");
|
||||
private final static CharacterMatcher lineTerminator =
|
||||
CharacterMatcher.parse("[\n\r\u0085\u2028\u2029]");
|
||||
private boolean multiLine;
|
||||
|
||||
public interface Result {
|
||||
void set(int[] start, int[] end);
|
||||
@ -341,6 +346,46 @@ class PikeVM implements PikeVMOpcodes {
|
||||
case DOTALL:
|
||||
current.queueNext(pc, pc + 1, next);
|
||||
break;
|
||||
case WORD_BOUNDARY:
|
||||
case NON_WORD_BOUNDARY: {
|
||||
int i2 = i - step;
|
||||
int c2 = i2 < 0 || i2 >= characters.length ? -1 : characters[i2];
|
||||
switch (opcode) {
|
||||
case WORD_BOUNDARY:
|
||||
if ((c2 < 0 || !wordCharacter.matches((char)c2))) {
|
||||
if (wordCharacter.matches(c)) {
|
||||
current.queueImmediately(pc, pc + 1, false);
|
||||
}
|
||||
} else if (i >= 0 && i < characters.length &&
|
||||
!wordCharacter.matches(c)) {
|
||||
current.queueImmediately(pc, pc + 1, false);
|
||||
}
|
||||
break;
|
||||
case NON_WORD_BOUNDARY:
|
||||
if ((c2 < 0 || !wordCharacter.matches((char)c2))) {
|
||||
if (i >= 0 && i < characters.length &&
|
||||
!wordCharacter.matches(c)) {
|
||||
current.queueImmediately(pc, pc + 1, false);
|
||||
}
|
||||
} else if (wordCharacter.matches(c)) {
|
||||
current.queueImmediately(pc, pc + 1, false);
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LINE_START:
|
||||
if (i == 0 || (multiLine &&
|
||||
lineTerminator.matches(characters[i - 1]))) {
|
||||
current.queueImmediately(pc, pc + 1, false);
|
||||
}
|
||||
break;
|
||||
case LINE_END:
|
||||
if (i == characters.length || (multiLine &&
|
||||
lineTerminator.matches(c))) {
|
||||
current.queueImmediately(pc, pc + 1, false);
|
||||
}
|
||||
break;
|
||||
case CHARACTER_CLASS:
|
||||
if (classes[program[pc + 1]].matches(c)) {
|
||||
current.queueNext(pc, pc + 2, next);
|
||||
|
@ -22,6 +22,11 @@ interface PikeVMOpcodes {
|
||||
final static int DOT = -1;
|
||||
final static int DOTALL = -2;
|
||||
|
||||
final static int WORD_BOUNDARY = -10;
|
||||
final static int NON_WORD_BOUNDARY = -11;
|
||||
final static int LINE_START = -12;
|
||||
final static int LINE_END = -13;
|
||||
|
||||
final static int CHARACTER_CLASS = -20;
|
||||
|
||||
final static int LOOKAHEAD = -30;
|
||||
|
Loading…
x
Reference in New Issue
Block a user