mirror of
https://github.com/corda/corda.git
synced 2025-01-22 12:28:11 +00:00
Regex: support look-behind patterns
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
parent
62d1964779
commit
8b611c8075
@ -60,5 +60,6 @@ public class Regex {
|
|||||||
expectGroups("a|(b|c)", "a", (String)null);
|
expectGroups("a|(b|c)", "a", (String)null);
|
||||||
expectGroups("a|(b|c)", "c", "c");
|
expectGroups("a|(b|c)", "c", "c");
|
||||||
expectGroups("(?=a)a", "a");
|
expectGroups("(?=a)a", "a");
|
||||||
|
expectGroups(".*(o)(?<=[A-Z][a-z]*)", "Hello", "o");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -28,7 +28,7 @@ class Compiler implements PikeVMOpcodes {
|
|||||||
private int groupCount = -1;
|
private int groupCount = -1;
|
||||||
private int findPreambleSize;
|
private int findPreambleSize;
|
||||||
private ArrayList<CharacterMatcher> classes;
|
private ArrayList<CharacterMatcher> classes;
|
||||||
private ArrayList<PikeVM> lookaheads;
|
private ArrayList<PikeVM> lookarounds;
|
||||||
|
|
||||||
public Output(Expression expr) {
|
public Output(Expression expr) {
|
||||||
// try-run to determine the code size
|
// try-run to determine the code size
|
||||||
@ -37,7 +37,7 @@ class Compiler implements PikeVMOpcodes {
|
|||||||
offset = 0;
|
offset = 0;
|
||||||
groupCount = -1;
|
groupCount = -1;
|
||||||
classes = new ArrayList<CharacterMatcher>();
|
classes = new ArrayList<CharacterMatcher>();
|
||||||
lookaheads = new ArrayList<PikeVM>();
|
lookarounds = new ArrayList<PikeVM>();
|
||||||
// write it out!
|
// write it out!
|
||||||
expr.writeCode(this);
|
expr.writeCode(this);
|
||||||
}
|
}
|
||||||
@ -66,10 +66,10 @@ class Compiler implements PikeVMOpcodes {
|
|||||||
public PikeVM toVM() {
|
public PikeVM toVM() {
|
||||||
CharacterMatcher[] classes = new CharacterMatcher[this.classes.size()];
|
CharacterMatcher[] classes = new CharacterMatcher[this.classes.size()];
|
||||||
this.classes.toArray(classes);
|
this.classes.toArray(classes);
|
||||||
PikeVM[] lookaheads = new PikeVM[this.lookaheads.size()];
|
PikeVM[] lookarounds = new PikeVM[this.lookarounds.size()];
|
||||||
this.lookaheads.toArray(lookaheads);
|
this.lookarounds.toArray(lookarounds);
|
||||||
return new PikeVM(program, findPreambleSize, groupCount, classes,
|
return new PikeVM(program, findPreambleSize, groupCount, classes,
|
||||||
lookaheads);
|
lookarounds);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int addClass(CharacterMatcher characterClass) {
|
public int addClass(CharacterMatcher characterClass) {
|
||||||
@ -81,12 +81,12 @@ class Compiler implements PikeVMOpcodes {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int addLookahead(PikeVM lookahead) {
|
public int addLookaround(PikeVM lookaround) {
|
||||||
if (program == null) {
|
if (program == null) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
int result = lookaheads.size();
|
int result = lookarounds.size();
|
||||||
lookaheads.add(lookahead);
|
lookarounds.add(lookaround);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -226,14 +226,22 @@ class Compiler implements PikeVMOpcodes {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class Lookahead extends Expression {
|
private class Lookaround extends Expression {
|
||||||
private final Group group = new Group(false, null);
|
private final Group group = new Group(false, null);
|
||||||
|
private final boolean forward;
|
||||||
|
|
||||||
|
public Lookaround(boolean forward) {
|
||||||
|
this.forward = forward;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void writeCode(Output output) {
|
protected void writeCode(Output output) {
|
||||||
PikeVM vm = new Output(group).toVM();
|
PikeVM vm = new Output(group).toVM();
|
||||||
output.add(LOOKAHEAD);
|
if (!forward) {
|
||||||
output.add(output.addLookahead(vm));
|
vm.reverse();
|
||||||
|
}
|
||||||
|
output.add(forward ? LOOKAHEAD : LOOKBEHIND);
|
||||||
|
output.add(output.addLookaround(vm));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -302,15 +310,28 @@ class Compiler implements PikeVMOpcodes {
|
|||||||
+ regex);
|
+ regex);
|
||||||
}
|
}
|
||||||
c = array[index];
|
c = array[index];
|
||||||
|
boolean lookAhead = true;
|
||||||
|
if (c == '<') {
|
||||||
|
if (++ index >= array.length) {
|
||||||
|
throw new RuntimeException("Short pattern @" + index + ": "
|
||||||
|
+ regex);
|
||||||
|
}
|
||||||
|
lookAhead = false;
|
||||||
|
c = array[index];
|
||||||
|
if (c != '=' && c != '!') {
|
||||||
|
throw new IllegalArgumentException("Named groups not supported @"
|
||||||
|
+ index + ": " + regex);
|
||||||
|
}
|
||||||
|
}
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case ':':
|
case ':':
|
||||||
capturing = false;
|
capturing = false;
|
||||||
break;
|
break;
|
||||||
case '=': {
|
case '=': {
|
||||||
capturing = false;
|
capturing = false;
|
||||||
Lookahead lookahead = new Lookahead();
|
Lookaround lookaround = new Lookaround(lookAhead);
|
||||||
current.push(lookahead);
|
current.push(lookaround);
|
||||||
groups.push(lookahead.group);
|
groups.push(lookaround.group);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
@ -27,21 +27,21 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
*/
|
*/
|
||||||
private final int findPrefixLength;
|
private final int findPrefixLength;
|
||||||
private final CharacterMatcher[] classes;
|
private final CharacterMatcher[] classes;
|
||||||
private final PikeVM[] lookaheads;
|
private final PikeVM[] lookarounds;
|
||||||
|
|
||||||
public interface Result {
|
public interface Result {
|
||||||
void set(int[] start, int[] end);
|
void set(int[] start, int[] end);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected PikeVM(int[] program, int findPrefixLength, int groupCount,
|
protected PikeVM(int[] program, int findPrefixLength, int groupCount,
|
||||||
CharacterMatcher[] classes, PikeVM[] lookaheads)
|
CharacterMatcher[] classes, PikeVM[] lookarounds)
|
||||||
{
|
{
|
||||||
this.program = program;
|
this.program = program;
|
||||||
this.findPrefixLength = findPrefixLength;
|
this.findPrefixLength = findPrefixLength;
|
||||||
this.groupCount = groupCount;
|
this.groupCount = groupCount;
|
||||||
offsetsCount = 2 * groupCount + 2;
|
offsetsCount = 2 * groupCount + 2;
|
||||||
this.classes = classes;
|
this.classes = classes;
|
||||||
this.lookaheads = lookaheads;
|
this.lookarounds = lookarounds;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -293,13 +293,14 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
ThreadQueue queued = new ThreadQueue(startPC);
|
ThreadQueue queued = new ThreadQueue(startPC);
|
||||||
|
|
||||||
boolean foundMatch = false;
|
boolean foundMatch = false;
|
||||||
for (int i = start; i <= end; ++i) {
|
int step = end > start ? +1 : -1;
|
||||||
|
for (int i = start; i != end + step; i += step) {
|
||||||
if (queued.isEmpty()) {
|
if (queued.isEmpty()) {
|
||||||
// no threads left
|
// no threads left
|
||||||
return foundMatch;
|
return foundMatch;
|
||||||
}
|
}
|
||||||
|
|
||||||
char c = i < end ? characters[i] : 0;
|
char c = i != end ? characters[i] : 0;
|
||||||
int pc = -1;
|
int pc = -1;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
pc = current.next(pc);
|
pc = current.next(pc);
|
||||||
@ -312,7 +313,7 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
|
|
||||||
// pc == program.length is a match!
|
// pc == program.length is a match!
|
||||||
if (pc == program.length) {
|
if (pc == program.length) {
|
||||||
if (anchorEnd && i < end) {
|
if (anchorEnd && i != end) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (result == null) {
|
if (result == null) {
|
||||||
@ -346,11 +347,17 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case LOOKAHEAD:
|
case LOOKAHEAD:
|
||||||
if (lookaheads[program[pc + 1]].matches(characters,
|
if (lookarounds[program[pc + 1]].matches(characters,
|
||||||
i, characters.length, true, false, null)) {
|
i, characters.length, true, false, null)) {
|
||||||
current.queueImmediately(pc, pc + 2, false);
|
current.queueImmediately(pc, pc + 2, false);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case LOOKBEHIND:
|
||||||
|
if (lookarounds[program[pc + 1]].matches(characters,
|
||||||
|
i - 1, -1, true, false, null)) {
|
||||||
|
current.queueImmediately(pc, pc + 2, false);
|
||||||
|
}
|
||||||
|
break;
|
||||||
/* immediate opcodes, i.e. thread continues within the same step */
|
/* immediate opcodes, i.e. thread continues within the same step */
|
||||||
case SAVE_OFFSET:
|
case SAVE_OFFSET:
|
||||||
if (result != null) {
|
if (result != null) {
|
||||||
|
@ -25,6 +25,7 @@ interface PikeVMOpcodes {
|
|||||||
final static int CHARACTER_CLASS = -20;
|
final static int CHARACTER_CLASS = -20;
|
||||||
|
|
||||||
final static int LOOKAHEAD = -30;
|
final static int LOOKAHEAD = -30;
|
||||||
|
final static int LOOKBEHIND = -31;
|
||||||
|
|
||||||
final static int SAVE_OFFSET = -40;
|
final static int SAVE_OFFSET = -40;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user