mirror of
https://github.com/corda/corda.git
synced 2025-03-17 17:45:17 +00:00
Regex: support special character classes
This adds support for character classes such as \d or \W, leaving \p{...} style character classes as an exercise for later. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
parent
098f688cd8
commit
8ab10a6953
@ -62,5 +62,12 @@ public class Regex {
|
||||
expectGroups("(?=a)a", "a");
|
||||
expectGroups(".*(o)(?<=[A-Z][a-z]*)", "Hello", "o");
|
||||
expectNoMatch("(?!a).", "a");
|
||||
expectMatch("[\\d]", "0");
|
||||
expectMatch("\\0777", "?7");
|
||||
expectMatch("\\a", "\007");
|
||||
expectMatch("\\\\", "\\");
|
||||
expectMatch("\\x4A", "J");
|
||||
expectMatch("\\x61", "a");
|
||||
expectMatch("\\078", "\0078");
|
||||
}
|
||||
}
|
||||
|
@ -41,6 +41,28 @@ class CharacterMatcher {
|
||||
return (map.length > index && map[index]) ^ inversePattern;
|
||||
}
|
||||
|
||||
private static String specialClass(int c) {
|
||||
if ('d' == c) {
|
||||
return "[0-9]";
|
||||
}
|
||||
if ('D' == c) {
|
||||
return "[^0-9]";
|
||||
}
|
||||
if ('s' == c) {
|
||||
return "[ \\t\\n\\x0B\\f\\r]";
|
||||
}
|
||||
if ('S' == c) {
|
||||
return "[^ \\t\\n\\x0B\\f\\r]";
|
||||
}
|
||||
if ('w' == c) {
|
||||
return "[a-zA-Z_0-9]";
|
||||
}
|
||||
if ('W' == c) {
|
||||
return "[^a-zA-Z_0-9]";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private CharacterMatcher(boolean[] map, boolean inversePattern) {
|
||||
this.map = map;
|
||||
this.inversePattern = inversePattern;
|
||||
@ -65,6 +87,17 @@ class CharacterMatcher {
|
||||
map = java.util.Arrays.copyOf(map, size);
|
||||
}
|
||||
|
||||
private void merge(CharacterMatcher other) {
|
||||
boolean inversePattern = this.inversePattern || other.inversePattern;
|
||||
if ((map.length < other.map.length) ^ inversePattern) {
|
||||
map = java.util.Arrays.copyOf(map, other.map.length);
|
||||
}
|
||||
for (int i = 0; i < map.length; ++ i) {
|
||||
map[i] = (matches((char)i) || other.matches((char)i)) ^ inversePattern;
|
||||
}
|
||||
this.inversePattern = inversePattern;
|
||||
}
|
||||
|
||||
static class Parser {
|
||||
private final char[] description;
|
||||
private int offset;
|
||||
@ -165,6 +198,13 @@ class CharacterMatcher {
|
||||
|
||||
public CharacterMatcher parseClass() {
|
||||
if (description[offset] != '[') {
|
||||
if (description[offset] == '\\') {
|
||||
String range = specialClass(description[++ offset]);
|
||||
if (range != null) {
|
||||
++ offset;
|
||||
return CharacterMatcher.parse(range);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
CharacterMatcher matcher = new CharacterMatcher(new boolean[0],
|
||||
@ -196,9 +236,15 @@ class CharacterMatcher {
|
||||
matcher.map[j] = true;
|
||||
}
|
||||
} else if (c == '\\') {
|
||||
int saved = offset;
|
||||
previous = parseEscapedCharacter();
|
||||
if (previous < 0) {
|
||||
unsupported("escape");
|
||||
offset = saved - 1;
|
||||
CharacterMatcher clazz = parseClass();
|
||||
if (clazz == null) {
|
||||
unsupported("escape");
|
||||
}
|
||||
matcher.merge(clazz);
|
||||
} else {
|
||||
matcher.setMatch(previous);
|
||||
}
|
||||
|
@ -292,6 +292,20 @@ class Compiler implements PikeVMOpcodes {
|
||||
case '.':
|
||||
current.push(DOT);
|
||||
continue;
|
||||
case '\\':
|
||||
int unescaped = characterClassParser.parseEscapedCharacter(index + 1);
|
||||
if (unescaped >= 0) {
|
||||
index = characterClassParser.getEndOffset() - 1;
|
||||
current.push((char)unescaped);
|
||||
continue;
|
||||
}
|
||||
CharacterMatcher characterClass = characterClassParser.parseClass(index);
|
||||
if (characterClass != null) {
|
||||
index = characterClassParser.getEndOffset() - 1;
|
||||
current.push(new CharacterRange(characterClass));
|
||||
continue;
|
||||
}
|
||||
throw new RuntimeException("Parse error @" + index + ": " + regex);
|
||||
case '?':
|
||||
case '*':
|
||||
case '+': {
|
||||
|
Loading…
x
Reference in New Issue
Block a user