mirror of
https://github.com/corda/corda.git
synced 2025-01-21 03:55:00 +00:00
Regex: add support for character classes
Now we support regular expression patterns a la '[0-9]'. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
parent
ca428c406c
commit
53563c4f8e
@ -54,5 +54,7 @@ public class Regex {
|
|||||||
expectGroups("a(bb)*a", "abbbba", "bb");
|
expectGroups("a(bb)*a", "abbbba", "bb");
|
||||||
expectGroups("a(bb)?(bb)+a", "abba", null, "bb");
|
expectGroups("a(bb)?(bb)+a", "abba", null, "bb");
|
||||||
expectFind(" +", "Hello , world! ", " ", " ", " ");
|
expectFind(" +", "Hello , world! ", " ", " ", " ");
|
||||||
|
expectMatch("[0-9A-Fa-f]+", "08ef");
|
||||||
|
expectNoMatch("[0-9A-Fa-f]+", "08@ef");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -27,6 +27,7 @@ class Compiler implements PikeVMOpcodes {
|
|||||||
private int offset;
|
private int offset;
|
||||||
private int groupCount = -1;
|
private int groupCount = -1;
|
||||||
private int findPreambleSize;
|
private int findPreambleSize;
|
||||||
|
private ArrayList<CharacterMatcher> classes;
|
||||||
|
|
||||||
public Output(Expression expr) {
|
public Output(Expression expr) {
|
||||||
// try-run to determine the code size
|
// try-run to determine the code size
|
||||||
@ -34,6 +35,7 @@ class Compiler implements PikeVMOpcodes {
|
|||||||
program = new int[offset];
|
program = new int[offset];
|
||||||
offset = 0;
|
offset = 0;
|
||||||
groupCount = -1;
|
groupCount = -1;
|
||||||
|
classes = new ArrayList<CharacterMatcher>();
|
||||||
// write it out!
|
// write it out!
|
||||||
expr.writeCode(this);
|
expr.writeCode(this);
|
||||||
}
|
}
|
||||||
@ -60,15 +62,38 @@ class Compiler implements PikeVMOpcodes {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public PikeVM toVM() {
|
public PikeVM toVM() {
|
||||||
return new PikeVM(program, findPreambleSize, groupCount);
|
CharacterMatcher[] classes = new CharacterMatcher[this.classes.size()];
|
||||||
|
this.classes.toArray(classes);
|
||||||
|
return new PikeVM(program, findPreambleSize, groupCount, classes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int addClass(CharacterMatcher characterClass) {
|
||||||
|
if (program == null) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
int result = classes.size();
|
||||||
|
classes.add(characterClass);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private abstract class Expression {
|
private abstract class Expression {
|
||||||
protected abstract void writeCode(Output output);
|
protected abstract void writeCode(Output output);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private class CharacterRange extends Expression {
|
||||||
|
private final CharacterMatcher characterClass;
|
||||||
|
|
||||||
|
public CharacterRange(CharacterMatcher characterClass) {
|
||||||
|
this.characterClass = characterClass;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void writeCode(Output output) {
|
||||||
|
output.add(CHARACTER_CLASS);
|
||||||
|
output.add(output.addClass(characterClass));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private class Repeat extends Expression {
|
private class Repeat extends Expression {
|
||||||
private Expression expr;
|
private Expression expr;
|
||||||
private int minCount, maxCount;
|
private int minCount, maxCount;
|
||||||
@ -177,6 +202,8 @@ class Compiler implements PikeVMOpcodes {
|
|||||||
|
|
||||||
public Pattern compile(String regex) {
|
public Pattern compile(String regex) {
|
||||||
char[] array = regex.toCharArray();
|
char[] array = regex.toCharArray();
|
||||||
|
CharacterMatcher.Parser characterClassParser =
|
||||||
|
new CharacterMatcher.Parser(array);
|
||||||
for (int index = 0; index < array.length; ++ index) {
|
for (int index = 0; index < array.length; ++ index) {
|
||||||
char c = array[index];
|
char c = array[index];
|
||||||
Group current = groups.peek();
|
Group current = groups.peek();
|
||||||
@ -214,6 +241,15 @@ class Compiler implements PikeVMOpcodes {
|
|||||||
}
|
}
|
||||||
groups.pop();
|
groups.pop();
|
||||||
continue;
|
continue;
|
||||||
|
case '[': {
|
||||||
|
CharacterMatcher matcher = characterClassParser.parseClass(index);
|
||||||
|
if (matcher == null) {
|
||||||
|
throw new RuntimeException("Invalid range @" + index + ": " + regex);
|
||||||
|
}
|
||||||
|
current.push(new CharacterRange(matcher));
|
||||||
|
index = characterClassParser.getEndOffset() - 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
throw new RuntimeException("Parse error @" + index + ": " + regex);
|
throw new RuntimeException("Parse error @" + index + ": " + regex);
|
||||||
}
|
}
|
||||||
|
@ -26,16 +26,20 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
* findPrefixLength} instead.
|
* findPrefixLength} instead.
|
||||||
*/
|
*/
|
||||||
private final int findPrefixLength;
|
private final int findPrefixLength;
|
||||||
|
private final CharacterMatcher[] classes;
|
||||||
|
|
||||||
public interface Result {
|
public interface Result {
|
||||||
void set(int[] start, int[] end);
|
void set(int[] start, int[] end);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected PikeVM(int[] program, int findPrefixLength, int groupCount) {
|
protected PikeVM(int[] program, int findPrefixLength, int groupCount,
|
||||||
|
CharacterMatcher[] classes)
|
||||||
|
{
|
||||||
this.program = program;
|
this.program = program;
|
||||||
this.findPrefixLength = findPrefixLength;
|
this.findPrefixLength = findPrefixLength;
|
||||||
this.groupCount = groupCount;
|
this.groupCount = groupCount;
|
||||||
offsetsCount = 2 * groupCount + 2;
|
offsetsCount = 2 * groupCount + 2;
|
||||||
|
this.classes = classes;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -329,6 +333,11 @@ class PikeVM implements PikeVMOpcodes {
|
|||||||
case DOTALL:
|
case DOTALL:
|
||||||
current.queueNext(pc, pc + 1, next);
|
current.queueNext(pc, pc + 1, next);
|
||||||
break;
|
break;
|
||||||
|
case CHARACTER_CLASS:
|
||||||
|
if (classes[program[pc + 1]].matches(c)) {
|
||||||
|
current.queueNext(pc, pc + 2, next);
|
||||||
|
}
|
||||||
|
break;
|
||||||
/* immediate opcodes, i.e. thread continues within the same step */
|
/* immediate opcodes, i.e. thread continues within the same step */
|
||||||
case SAVE_OFFSET:
|
case SAVE_OFFSET:
|
||||||
int index = program[pc + 1];
|
int index = program[pc + 1];
|
||||||
|
@ -22,6 +22,8 @@ interface PikeVMOpcodes {
|
|||||||
final static int DOT = -1;
|
final static int DOT = -1;
|
||||||
final static int DOTALL = -2;
|
final static int DOTALL = -2;
|
||||||
|
|
||||||
|
final static int CHARACTER_CLASS = -20;
|
||||||
|
|
||||||
final static int SAVE_OFFSET = -40;
|
final static int SAVE_OFFSET = -40;
|
||||||
|
|
||||||
final static int SPLIT = -50;
|
final static int SPLIT = -50;
|
||||||
|
Loading…
Reference in New Issue
Block a user