Regex: support intersection/union of character classes

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
Johannes Schindelin 2013-11-12 13:52:20 -06:00
parent b4c768b101
commit fe32cce2ad
2 changed files with 35 additions and 2 deletions

View File

@ -81,5 +81,7 @@ public class Regex {
expectMatch("\\078", "\0078"); expectMatch("\\078", "\0078");
expectSplit("(?<=\\w)(?=\\W)|(?<=\\W)(?=\\w)", "a + b * x", expectSplit("(?<=\\w)(?=\\W)|(?<=\\W)(?=\\w)", "a + b * x",
"a", " + ", "b", " * ", "x"); "a", " + ", "b", " * ", "x");
expectMatch("[0-9[def]]", "f");
expectNoMatch("[a-z&&[^d-f]]", "f");
} }
} }

View File

@ -98,6 +98,17 @@ class CharacterMatcher {
this.inversePattern = inversePattern; this.inversePattern = inversePattern;
} }
private void intersect(CharacterMatcher other) {
boolean inversePattern = this.inversePattern && other.inversePattern;
if ((map.length > other.map.length) ^ inversePattern) {
map = java.util.Arrays.copyOf(map, other.map.length);
}
for (int i = 0; i < map.length; ++ i) {
map[i] = (matches((char)i) && other.matches((char)i)) ^ inversePattern;
}
this.inversePattern = inversePattern;
}
static class Parser { static class Parser {
private final char[] description; private final char[] description;
private int offset; private int offset;
@ -248,8 +259,28 @@ class CharacterMatcher {
} else { } else {
matcher.setMatch(previous); matcher.setMatch(previous);
} }
} else if (c == '&' || c == '[') { } else if (c == '[') {
Parser parser = new Parser(description);
CharacterMatcher other = parser.parseClass(offset - 1);
if (other == null) {
unsupported("invalid merge");
}
matcher.merge(other);
offset = parser.getEndOffset();
previous = -1;
} else if (c == '&') {
if (offset + 2 > description.length || description[offset] != '&'
|| description[offset + 1] != '[') {
unsupported("operation"); unsupported("operation");
}
Parser parser = new Parser(description);
CharacterMatcher other = parser.parseClass(offset + 1);
if (other == null) {
unsupported("invalid intersection");
}
matcher.intersect(other);
offset = parser.getEndOffset();
previous = -1;
} else if (c == ']') { } else if (c == ']') {
break; break;
} else { } else {