mirror of
https://github.com/corda/corda.git
synced 2025-01-21 03:55:00 +00:00
Regex: implement find()
Now that we have non-greedy repeats, we can implement the find() (which essentially prefixes the regular expression pattern with '.*?'. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
parent
7da03b0f19
commit
ca428c406c
@ -32,6 +32,18 @@ public class Regex {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void expectFind(String regex, String string,
|
||||||
|
String... matches)
|
||||||
|
{
|
||||||
|
Matcher matcher = getMatcher(regex, string);
|
||||||
|
int i = 0;
|
||||||
|
while (i < matches.length) {
|
||||||
|
expect(matcher.find());
|
||||||
|
expect(matches[i++].equals(matcher.group()));
|
||||||
|
}
|
||||||
|
expect(!matcher.find());
|
||||||
|
}
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
expectMatch("a(bb)?a", "abba");
|
expectMatch("a(bb)?a", "abba");
|
||||||
expectNoMatch("a(bb)?a", "abbba");
|
expectNoMatch("a(bb)?a", "abbba");
|
||||||
@ -41,5 +53,6 @@ public class Regex {
|
|||||||
expectNoMatch(".", "\n");
|
expectNoMatch(".", "\n");
|
||||||
expectGroups("a(bb)*a", "abbbba", "bb");
|
expectGroups("a(bb)*a", "abbbba", "bb");
|
||||||
expectGroups("a(bb)?(bb)+a", "abba", null, "bb");
|
expectGroups("a(bb)?(bb)+a", "abba", null, "bb");
|
||||||
|
expectFind(" +", "Hello , world! ", " ", " ", " ");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -26,6 +26,7 @@ class Compiler implements PikeVMOpcodes {
|
|||||||
private int[] program;
|
private int[] program;
|
||||||
private int offset;
|
private int offset;
|
||||||
private int groupCount = -1;
|
private int groupCount = -1;
|
||||||
|
private int findPreambleSize;
|
||||||
|
|
||||||
public Output(Expression expr) {
|
public Output(Expression expr) {
|
||||||
// try-run to determine the code size
|
// try-run to determine the code size
|
||||||
@ -54,9 +55,14 @@ class Compiler implements PikeVMOpcodes {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public PikeVM toVM() {
|
public void markFindPreambleEnd() {
|
||||||
return new PikeVM(program, groupCount);
|
findPreambleSize = offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public PikeVM toVM() {
|
||||||
|
return new PikeVM(program, findPreambleSize, groupCount);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private abstract class Expression {
|
private abstract class Expression {
|
||||||
@ -148,6 +154,14 @@ class Compiler implements PikeVMOpcodes {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void writeCode(Output output) {
|
public void writeCode(Output output) {
|
||||||
|
// find() preamble
|
||||||
|
int start = output.offset;
|
||||||
|
output.add(SPLIT_JMP);
|
||||||
|
output.add(start + 5);
|
||||||
|
output.add(DOTALL);
|
||||||
|
output.add(SPLIT);
|
||||||
|
output.add(start + 2);
|
||||||
|
output.markFindPreambleEnd();
|
||||||
group.writeCode(output);
|
group.writeCode(output);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -19,13 +19,21 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
private final int[] program;
|
private final int[] program;
|
||||||
private final int groupCount;
|
private final int groupCount;
|
||||||
private final int offsetsCount;
|
private final int offsetsCount;
|
||||||
|
/*
|
||||||
|
* For find(), we do not want to anchor the match at the start offset. Our
|
||||||
|
* compiler allows this by prefixing the code with an implicit '(?:.*?)'. For
|
||||||
|
* regular matches() calls, we want to skip that code and start at {@code
|
||||||
|
* findPrefixLength} instead.
|
||||||
|
*/
|
||||||
|
private final int findPrefixLength;
|
||||||
|
|
||||||
public interface Result {
|
public interface Result {
|
||||||
void set(int[] start, int[] end);
|
void set(int[] start, int[] end);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected PikeVM(int[] program, int groupCount) {
|
protected PikeVM(int[] program, int findPrefixLength, int groupCount) {
|
||||||
this.program = program;
|
this.program = program;
|
||||||
|
this.findPrefixLength = findPrefixLength;
|
||||||
this.groupCount = groupCount;
|
this.groupCount = groupCount;
|
||||||
offsetsCount = 2 * groupCount + 2;
|
offsetsCount = 2 * groupCount + 2;
|
||||||
}
|
}
|
||||||
@ -190,6 +198,31 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
result.set(groupStart, groupEnd);
|
result.set(groupStart, groupEnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void mustStartMatchAt(int start) {
|
||||||
|
int previous = -1;
|
||||||
|
for (int pc = head; pc >= 0; ) {
|
||||||
|
int nextPC = next[pc] - 1;
|
||||||
|
if (start + 1 == offsets[pc][0]) {
|
||||||
|
previous = pc;
|
||||||
|
} else {
|
||||||
|
next[pc] = 0;
|
||||||
|
offsets[pc] = null;
|
||||||
|
if (pc == tail) {
|
||||||
|
head = tail = -1;
|
||||||
|
} else if (previous < 0) {
|
||||||
|
head = nextPC;
|
||||||
|
} else {
|
||||||
|
next[previous] = 1 + nextPC;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pc = nextPC;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int startOffset(int pc) {
|
||||||
|
return offsets[pc][0] - 1;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isEmpty() {
|
public boolean isEmpty() {
|
||||||
return head < 0;
|
return head < 0;
|
||||||
}
|
}
|
||||||
@ -250,11 +283,8 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
ThreadQueue next = new ThreadQueue();
|
ThreadQueue next = new ThreadQueue();
|
||||||
|
|
||||||
// initialize the first thread
|
// initialize the first thread
|
||||||
ThreadQueue queued = new ThreadQueue(0);
|
int startPC = anchorStart ? findPrefixLength : 0;
|
||||||
if (!anchorStart) {
|
ThreadQueue queued = new ThreadQueue(startPC);
|
||||||
// this requires non-greedy matching
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean foundMatch = false;
|
boolean foundMatch = false;
|
||||||
for (int i = start; i <= end; ++i) {
|
for (int i = start; i <= end; ++i) {
|
||||||
@ -280,6 +310,11 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
current.setResult(result);
|
current.setResult(result);
|
||||||
|
// now that we found a match, even higher-priority matches must match
|
||||||
|
// at the same start offset
|
||||||
|
if (!anchorStart) {
|
||||||
|
next.mustStartMatchAt(current.startOffset(pc));
|
||||||
|
}
|
||||||
foundMatch = true;
|
foundMatch = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -346,9 +381,9 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
* non-trivial pattern
|
* non-trivial pattern
|
||||||
*/
|
*/
|
||||||
public String isPlainString() {
|
public String isPlainString() {
|
||||||
// we expect the machine to start with SAVE_OFFSET 0 and
|
// we expect the machine to start with the find preamble and SAVE_OFFSET 0
|
||||||
// end with SAVE_OFFSET 1
|
// end with SAVE_OFFSET 1
|
||||||
int start = 0;
|
int start = findPrefixLength;
|
||||||
if (start + 1 < program.length &&
|
if (start + 1 < program.length &&
|
||||||
program[start] == SAVE_OFFSET && program[start + 1] == 0) {
|
program[start] == SAVE_OFFSET && program[start + 1] == 0) {
|
||||||
start += 2;
|
start += 2;
|
||||||
|
@ -49,8 +49,12 @@ public class RegexMatcher extends Matcher {
|
|||||||
return vm.matches(array, 0, array.length, true, true, adapter);
|
return vm.matches(array, 0, array.length, true, true, adapter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean find() {
|
||||||
|
return find(end + (start == end ? 1 : 0));
|
||||||
|
}
|
||||||
|
|
||||||
public boolean find(int offset) {
|
public boolean find(int offset) {
|
||||||
throw new UnsupportedOperationException("TODO");
|
return vm.matches(array, offset, array.length, false, false, adapter);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int start(int group) {
|
public int start(int group) {
|
||||||
|
Loading…
Reference in New Issue
Block a user