From 4c14a9ab665b8d798bb91bfe3a399202d768e821 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Thu, 20 Aug 2009 11:14:05 -0600 Subject: [PATCH] implement enough of java.util.regex to ensure String methods work for trivial regular expressions --- classpath/java/util/regex/Matcher.java | 82 ++++++++++++++---- classpath/java/util/regex/Pattern.java | 113 ++++++++++++++++++++++--- test/Strings.java | 17 +++- 3 files changed, 184 insertions(+), 28 deletions(-) diff --git a/classpath/java/util/regex/Matcher.java b/classpath/java/util/regex/Matcher.java index 58af1a707d..2296b4bf9d 100644 --- a/classpath/java/util/regex/Matcher.java +++ b/classpath/java/util/regex/Matcher.java @@ -1,4 +1,4 @@ -/* Copyright (c) 2008, Avian Contributors +/* Copyright (c) 2008-2009, Avian Contributors Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided @@ -11,17 +11,29 @@ package java.util.regex; /** - * This implementation is a skeleton, useful only for compilation. At runtime it - * is need to be replaced by a working implementation, for example one from the - * Apache Harmony project. - * - * @author zsombor + * This is a work in progress. * + * @author zsombor and others */ public class Matcher { + private final Pattern pattern; + private CharSequence input; + private int start; + private int end; + + Matcher(Pattern pattern, CharSequence input) { + this.pattern = pattern; + this.input = input; + } public boolean matches() { - throw new UnsupportedOperationException(); + if (pattern.pattern().equals(input.toString())) { + start = 0; + end = input.length(); + return true; + } else { + return false; + } } public boolean requireEnd() { @@ -37,15 +49,18 @@ public class Matcher { } public Matcher reset() { - throw new UnsupportedOperationException(); + return reset(input); } public Matcher reset(CharSequence input) { - throw new UnsupportedOperationException(); + this.input = input; + start = 0; + end = 0; + return this; } public int start() { - throw new UnsupportedOperationException(); + return start; } public int start(int group) { @@ -69,15 +84,44 @@ public class Matcher { } public String replaceAll(String replacement) { - throw new UnsupportedOperationException(); + return replace(replacement, Integer.MAX_VALUE); } public String replaceFirst(String replacement) { - throw new UnsupportedOperationException(); + return replace(replacement, 1); + } + + private String replace(String replacement, int limit) { + reset(); + + StringBuilder sb = null; + int index = 0; + int count = 0; + while (count < limit && index < input.length()) { + if (find(index)) { + if (sb == null) { + sb = new StringBuilder(); + } + if (start > index) { + sb.append(input.subSequence(index, start)); + } + sb.append(replacement); + index = end; + ++ count; + } else if (index == 0) { + return input.toString(); + } else { + break; + } + } + if (index < input.length()) { + sb.append(input.subSequence(index, input.length())); + } + return sb.toString(); } public int end() { - throw new UnsupportedOperationException(); + return end; } public int end(int group) { @@ -85,11 +129,19 @@ public class Matcher { } public boolean find() { - throw new UnsupportedOperationException(); + return find(end); } public boolean find(int start) { - throw new UnsupportedOperationException(); + String p = pattern.pattern(); + int i = Pattern.indexOf(input, p, start); + if (i >= 0) { + this.start = i; + this.end = i + p.length(); + return true; + } else { + return false; + } } public int groupCount() { diff --git a/classpath/java/util/regex/Pattern.java b/classpath/java/util/regex/Pattern.java index 972f93de37..a3ec90c676 100644 --- a/classpath/java/util/regex/Pattern.java +++ b/classpath/java/util/regex/Pattern.java @@ -1,4 +1,4 @@ -/* Copyright (c) 2008, Avian Contributors +/* Copyright (c) 2008-2009, Avian Contributors Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided @@ -10,12 +10,14 @@ package java.util.regex; +import java.util.Iterator; +import java.util.List; +import java.util.LinkedList; + /** - * This implementation is a skeleton, useful only for compilation. At runtime it - * is need to be replaced by a working implementation, for example one from the - * Apache Harmony project. + * This is a work in progress. * - * @author zsombor + * @author zsombor and others * */ public class Pattern { @@ -29,12 +31,41 @@ public class Pattern { public static final int UNICODE_CASE = 64; public static final int CANON_EQ = 128; - private int patternFlags; - private String pattern; + private final int patternFlags; + private final String pattern; protected Pattern(String pattern, int flags) { this.pattern = pattern; this.patternFlags = flags; + + if (! trivial(pattern)) { + throw new UnsupportedOperationException + ("only trivial regular expressions are supported so far"); + } + } + + private static boolean trivial(String pattern) { + for (int i = 0; i < pattern.length(); ++i) { + char c = pattern.charAt(i); + switch (c) { + case '\\': + case '.': + case '*': + case '+': + case '?': + case '|': + case '[': + case ']': + case '{': + case '}': + case '(': + case ')': + case '^': + case '$': + return false; + } + } + return true; } public static Pattern compile(String regex) { @@ -50,7 +81,7 @@ public class Pattern { } public Matcher matcher(CharSequence input) { - throw new UnsupportedOperationException(); + return new Matcher(this, input); } public static boolean matches(String regex, CharSequence input) { @@ -66,10 +97,72 @@ public class Pattern { } public String[] split(CharSequence input) { - throw new UnsupportedOperationException(); + return split(input, 0); } public String[] split(CharSequence input, int limit) { - throw new UnsupportedOperationException(); + boolean strip; + if (limit < 0) { + strip = false; + limit = Integer.MAX_VALUE; + } else if (limit == 0) { + strip = true; + limit = Integer.MAX_VALUE; + } else { + strip = false; + } + + List list = new LinkedList(); + int index = 0; + int trailing = 0; + while (index < input.length() && list.size() < limit) { + int i = indexOf(input, pattern, index); + if (i >= 0) { + if (i == index) { + ++ trailing; + } else { + trailing = 0; + } + + list.add(input.subSequence(index, i)); + index = i + pattern.length(); + } else { + break; + } + } + + if (strip && index == input.length()) { + ++ trailing; + } else { + trailing = 0; + } + list.add(input.subSequence(index, input.length())); + + String[] result = new String[list.size() - trailing]; + int i = 0; + for (Iterator it = list.iterator(); + it.hasNext() && i < result.length; ++ i) + { + result[i] = it.next().toString(); + } + return result; + } + + static int indexOf(CharSequence haystack, CharSequence needle, int start) { + if (needle.length() == 0) return start; + + for (int i = start; i < haystack.length() - needle.length() + 1; ++i) { + int j = 0; + for (; j < needle.length(); ++j) { + if (haystack.charAt(i + j) != needle.charAt(j)) { + break; + } + } + if (j == needle.length()) { + return i; + } + } + + return -1; } } diff --git a/test/Strings.java b/test/Strings.java index c84f93e996..9927ba2983 100644 --- a/test/Strings.java +++ b/test/Strings.java @@ -9,9 +9,20 @@ public class Strings { 115, 46, 83, 121, 109, 98, 111, 108 }) .equals("com.ecovate.nat.bus.Symbol")); - // We don't yet have a regex implementation, so this test will fail: -// final String months = "Jan\u00aeFeb\u00aeMar\u00ae"; -// expect(months.split("\u00ae").length == 3); + final String months = "Jan\u00aeFeb\u00aeMar\u00ae"; + expect(months.split("\u00ae").length == 3); + expect(months.replaceAll("\u00ae", ".").equals("Jan.Feb.Mar.")); + + expect("foo_foofoo__foo".replaceAll("_", "__") + .equals("foo__foofoo____foo")); + + expect("foo_foofoo__foo".replaceFirst("_", "__") + .equals("foo__foofoo__foo")); + + expect("stereomime".matches("stereomime")); + expect(! "stereomime".matches("stereomim")); + expect(! "stereomime".matches("tereomime")); + expect(! "stereomime".matches("sterEomime")); StringBuilder sb = new StringBuilder(); sb.append('$');