diff --git a/test/Regex.java b/test/Regex.java index 48836cfe38..6a5e0909ad 100644 --- a/test/Regex.java +++ b/test/Regex.java @@ -62,5 +62,12 @@ public class Regex { expectGroups("(?=a)a", "a"); expectGroups(".*(o)(?<=[A-Z][a-z]*)", "Hello", "o"); expectNoMatch("(?!a).", "a"); + expectMatch("[\\d]", "0"); + expectMatch("\\0777", "?7"); + expectMatch("\\a", "\007"); + expectMatch("\\\\", "\\"); + expectMatch("\\x4A", "J"); + expectMatch("\\x61", "a"); + expectMatch("\\078", "\0078"); } } diff --git a/test/regex/CharacterMatcher.java b/test/regex/CharacterMatcher.java index 8e5d5318b8..b5b99a75cb 100644 --- a/test/regex/CharacterMatcher.java +++ b/test/regex/CharacterMatcher.java @@ -41,6 +41,28 @@ class CharacterMatcher { return (map.length > index && map[index]) ^ inversePattern; } + private static String specialClass(int c) { + if ('d' == c) { + return "[0-9]"; + } + if ('D' == c) { + return "[^0-9]"; + } + if ('s' == c) { + return "[ \\t\\n\\x0B\\f\\r]"; + } + if ('S' == c) { + return "[^ \\t\\n\\x0B\\f\\r]"; + } + if ('w' == c) { + return "[a-zA-Z_0-9]"; + } + if ('W' == c) { + return "[^a-zA-Z_0-9]"; + } + return null; + } + private CharacterMatcher(boolean[] map, boolean inversePattern) { this.map = map; this.inversePattern = inversePattern; @@ -65,6 +87,17 @@ class CharacterMatcher { map = java.util.Arrays.copyOf(map, size); } + private void merge(CharacterMatcher other) { + boolean inversePattern = this.inversePattern || other.inversePattern; + if ((map.length < other.map.length) ^ inversePattern) { + map = java.util.Arrays.copyOf(map, other.map.length); + } + for (int i = 0; i < map.length; ++ i) { + map[i] = (matches((char)i) || other.matches((char)i)) ^ inversePattern; + } + this.inversePattern = inversePattern; + } + static class Parser { private final char[] description; private int offset; @@ -165,6 +198,13 @@ class CharacterMatcher { public CharacterMatcher parseClass() { if (description[offset] != '[') { + if (description[offset] == '\\') { + String range = specialClass(description[++ offset]); + if (range != null) { + ++ offset; + return CharacterMatcher.parse(range); + } + } return null; } CharacterMatcher matcher = new CharacterMatcher(new boolean[0], @@ -196,9 +236,15 @@ class CharacterMatcher { matcher.map[j] = true; } } else if (c == '\\') { + int saved = offset; previous = parseEscapedCharacter(); if (previous < 0) { - unsupported("escape"); + offset = saved - 1; + CharacterMatcher clazz = parseClass(); + if (clazz == null) { + unsupported("escape"); + } + matcher.merge(clazz); } else { matcher.setMatch(previous); } diff --git a/test/regex/Compiler.java b/test/regex/Compiler.java index e109be63bf..05242e0d0e 100644 --- a/test/regex/Compiler.java +++ b/test/regex/Compiler.java @@ -292,6 +292,20 @@ class Compiler implements PikeVMOpcodes { case '.': current.push(DOT); continue; + case '\\': + int unescaped = characterClassParser.parseEscapedCharacter(index + 1); + if (unescaped >= 0) { + index = characterClassParser.getEndOffset() - 1; + current.push((char)unescaped); + continue; + } + CharacterMatcher characterClass = characterClassParser.parseClass(index); + if (characterClass != null) { + index = characterClassParser.getEndOffset() - 1; + current.push(new CharacterRange(characterClass)); + continue; + } + throw new RuntimeException("Parse error @" + index + ": " + regex); case '?': case '*': case '+': {