From 728473e9ad6fdaaa71ef7c366680c32bc81d5490 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 21 Oct 2013 12:00:42 -0500 Subject: [PATCH] Support escaped octal and hexadecimal characters in regular expressions In the previous commit, we did not support characters in regular expressions specified via \0..., \x... or \u... yet. This is a bit more involved, therefore support for them is added in its own commit. Signed-off-by: Johannes Schindelin --- classpath/java/util/regex/Pattern.java | 38 +++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/classpath/java/util/regex/Pattern.java b/classpath/java/util/regex/Pattern.java index 6c3da68628..b9c84eb6f3 100644 --- a/classpath/java/util/regex/Pattern.java +++ b/classpath/java/util/regex/Pattern.java @@ -45,7 +45,25 @@ public class Pattern { char c = pattern.charAt(i); switch (c) { case '\\': - if (++i == pattern.length() || (c = unescape(pattern.charAt(i))) != -1) { + if (++i == pattern.length()) { + break; + } + c = pattern.charAt(i); + if (c == '0') { + int len = digits(pattern, ++i, 3, 8); + if (len == 3 && pattern.charAt(i) > '3') { + --len; + } + c = (char)Integer.parseInt(pattern.substring(i, i + len), 8); + i += len - 1; + } else if (c == 'x' || c == 'u') { + int len = digits(pattern, ++i, 4, 16); + c = (char)Integer.parseInt(pattern.substring(i, i + len), 16); + i += len - 1; + } else { + c = unescape(pattern.charAt(i)); + } + if (c != -1) { break; } // fallthru @@ -70,6 +88,24 @@ public class Pattern { return buffer.toString(); } + private static int digits(String s, int offset, int maxLength, int base) { + for (int i = 0; ; ++i) { + if (i == maxLength || offset + i >= s.length()) { + return i; + } + int value = s.charAt(offset + i) - '0'; + if (value < 0) { + return i; + } + if (base > 10 && value >= 10) { + value += 10 - (value >= 'a' - '0' ? 'a' - '0' : 'A' - '0'); + } + if (value >= base) { + return i; + } + } + } + private static char unescape(char c) { switch (c) { case '\\':