Replace java.util.regex.* with the new regular expression engine

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
Johannes Schindelin 2013-11-22 17:42:12 -06:00
parent e96379ee19
commit 6626b477ad
14 changed files with 72 additions and 415 deletions

View File

@ -8,7 +8,7 @@
There is NO WARRANTY for this software. See license.txt for There is NO WARRANTY for this software. See license.txt for
details. */ details. */
package regex; package java.util.regex;
/** /**
* A class to match classes of characters. * A class to match classes of characters.

View File

@ -8,7 +8,7 @@
There is NO WARRANTY for this software. See license.txt for There is NO WARRANTY for this software. See license.txt for
details. */ details. */
package regex; package java.util.regex;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Stack; import java.util.Stack;

View File

@ -15,27 +15,23 @@ package java.util.regex;
* *
* @author zsombor and others * @author zsombor and others
*/ */
public class Matcher { public abstract class Matcher {
private final Pattern pattern; protected CharSequence input;
private CharSequence input; protected int start;
private int start; protected int end;
private int end;
Matcher(Pattern pattern, CharSequence input) { public Matcher(CharSequence input) {
this.pattern = pattern; reset(input);
this.input = input;
} }
public boolean matches() { public abstract boolean matches();
if (pattern.pattern().equals(input.toString())) {
start = 0; public boolean find() {
end = input.length(); return find(end);
return true;
} else {
return false;
}
} }
public abstract boolean find(int start);
public Matcher reset() { public Matcher reset() {
return reset(input); return reset(input);
} }
@ -47,10 +43,6 @@ public class Matcher {
return this; return this;
} }
public int start() {
return start;
}
public String replaceAll(String replacement) { public String replaceAll(String replacement) {
return replace(replacement, Integer.MAX_VALUE); return replace(replacement, Integer.MAX_VALUE);
} }
@ -59,7 +51,7 @@ public class Matcher {
return replace(replacement, 1); return replace(replacement, 1);
} }
private String replace(String replacement, int limit) { protected String replace(String replacement, int limit) {
reset(); reset();
StringBuilder sb = null; StringBuilder sb = null;
@ -88,23 +80,40 @@ public class Matcher {
return sb.toString(); return sb.toString();
} }
public int start() {
return start;
}
public int end() { public int end() {
return end; return end;
} }
public boolean find() { public String group() {
return find(end); return input.subSequence(start, end).toString();
} }
public boolean find(int start) { public int start(int group) {
String p = pattern.pattern(); if (group == 0) {
int i = Pattern.indexOf(input, p, start); return start();
if (i >= 0) {
this.start = i;
this.end = i + p.length();
return true;
} else {
return false;
} }
throw new UnsupportedOperationException();
}
public int end(int group) {
if (group == 0) {
return end();
}
throw new UnsupportedOperationException();
}
public String group(int group) {
if (group == 0) {
return group();
}
throw new UnsupportedOperationException();
}
public int groupCount() {
return 0;
} }
} }

View File

@ -10,9 +10,8 @@
package java.util.regex; package java.util.regex;
import java.util.Iterator; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.LinkedList;
/** /**
* This is a work in progress. * This is a work in progress.
@ -20,7 +19,7 @@ import java.util.LinkedList;
* @author zsombor and others * @author zsombor and others
* *
*/ */
public class Pattern { public abstract class Pattern implements PikeVMOpcodes {
public static final int UNIX_LINES = 1; public static final int UNIX_LINES = 1;
public static final int CASE_INSENSITIVE = 2; public static final int CASE_INSENSITIVE = 2;
@ -35,112 +34,26 @@ public class Pattern {
private final String pattern; private final String pattern;
protected Pattern(String pattern, int flags) { protected Pattern(String pattern, int flags) {
this.pattern = trivial(pattern); this.pattern = pattern;
this.patternFlags = flags; this.patternFlags = flags;
} }
private static String trivial(String pattern) {
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < pattern.length(); ++i) {
char c = pattern.charAt(i);
switch (c) {
case '\\':
if (++i == pattern.length()) {
break;
}
c = pattern.charAt(i);
if (c == '0') {
int len = digits(pattern, ++i, 3, 8);
if (len == 3 && pattern.charAt(i) > '3') {
--len;
}
c = (char)Integer.parseInt(pattern.substring(i, i + len), 8);
i += len - 1;
} else if (c == 'x' || c == 'u') {
int len = digits(pattern, ++i, 4, 16);
c = (char)Integer.parseInt(pattern.substring(i, i + len), 16);
i += len - 1;
} else {
c = unescape(pattern.charAt(i));
}
if (c != -1) {
break;
}
// fallthru
case '.':
case '*':
case '+':
case '?':
case '|':
case '[':
case ']':
case '{':
case '}':
case '(':
case ')':
case '^':
case '$':
throw new UnsupportedOperationException
("only trivial regular expressions are supported so far (" + pattern + ")");
}
buffer.append(c);
}
return buffer.toString();
}
private static int digits(String s, int offset, int maxLength, int base) {
for (int i = 0; ; ++i) {
if (i == maxLength || offset + i >= s.length()) {
return i;
}
int value = s.charAt(offset + i) - '0';
if (value < 0) {
return i;
}
if (base > 10 && value >= 10) {
value += 10 - (value >= 'a' - '0' ? 'a' - '0' : 'A' - '0');
}
if (value >= base) {
return i;
}
}
}
private static char unescape(char c) {
switch (c) {
case '\\':
return c;
case 'a':
return 0x0007;
case 'e':
return 0x001B;
case 'f':
return 0x000C;
case 'n':
return 0x000A;
case 'r':
return 0x000D;
case 't':
return 0x0009;
}
return (char)-1;
}
public static Pattern compile(String regex) { public static Pattern compile(String regex) {
return new Pattern(regex, 0); return compile(regex, 0);
} }
public static Pattern compile(String regex, int flags) { public static Pattern compile(String regex, int flags) {
return new Pattern(regex, flags); if (flags != 0) {
throw new UnsupportedOperationException("TODO");
}
return new Compiler().compile(regex);
} }
public int flags() { public int flags() {
return patternFlags; return patternFlags;
} }
public Matcher matcher(CharSequence input) { public abstract Matcher matcher(CharSequence input);
return new Matcher(this, input);
}
public static boolean matches(String regex, CharSequence input) { public static boolean matches(String regex, CharSequence input) {
return Pattern.compile(regex).matcher(input).matches(); return Pattern.compile(regex).matcher(input).matches();
@ -155,79 +68,22 @@ public class Pattern {
} }
public String[] split(CharSequence input, int limit) { public String[] split(CharSequence input, int limit) {
boolean strip; if (limit <= 0) {
if (limit < 0) {
strip = false;
limit = Integer.MAX_VALUE; limit = Integer.MAX_VALUE;
} else if (limit == 0) {
strip = true;
limit = Integer.MAX_VALUE;
} else {
strip = false;
} }
Matcher matcher = matcher(input);
List<CharSequence> list = new LinkedList(); List<String> result = new ArrayList<String>();
int index = 0; int offset = 0;
int trailing = 0; for (;;) {
int patternLength = pattern.length(); if (result.size() >= limit || !matcher.find()) {
while (index < input.length() && list.size() < limit - 1) {
int i;
if (patternLength == 0) {
if (list.size() == 0) {
i = 0;
} else {
i = index + 1;
}
} else {
i = indexOf(input, pattern, index);
}
if (i >= 0) {
if (patternLength != 0 && i == index) {
++ trailing;
} else {
trailing = 0;
}
list.add(input.subSequence(index, i));
index = i + patternLength;
} else {
break; break;
} }
result.add(input.subSequence(offset, matcher.start()).toString());
offset = matcher.end();
} }
if (offset == 0 || offset < input.length()) {
if (strip && index > 0 && index == input.length()) { result.add(input.subSequence(offset, input.length()).toString());
++ trailing;
} else {
trailing = 0;
} }
list.add(input.subSequence(index, input.length())); return result.toArray(new String[result.size()]);
String[] result = new String[list.size() - trailing];
int i = 0;
for (Iterator<CharSequence> it = list.iterator();
it.hasNext() && i < result.length; ++ i)
{
result[i] = it.next().toString();
}
return result;
}
static int indexOf(CharSequence haystack, CharSequence needle, int start) {
if (needle.length() == 0) return start;
for (int i = start; i < haystack.length() - needle.length() + 1; ++i) {
int j = 0;
for (; j < needle.length(); ++j) {
if (haystack.charAt(i + j) != needle.charAt(j)) {
break;
}
}
if (j == needle.length()) {
return i;
}
}
return -1;
} }
} }

View File

@ -8,7 +8,7 @@
There is NO WARRANTY for this software. See license.txt for There is NO WARRANTY for this software. See license.txt for
details. */ details. */
package regex; package java.util.regex;
/** /**
* A minimal implementation of a regular expression engine. * A minimal implementation of a regular expression engine.

View File

@ -8,7 +8,7 @@
There is NO WARRANTY for this software. See license.txt for There is NO WARRANTY for this software. See license.txt for
details. */ details. */
package regex; package java.util.regex;
/** /**
* Opcodes for the Pike VM. * Opcodes for the Pike VM.

View File

@ -8,7 +8,7 @@
There is NO WARRANTY for this software. See license.txt for There is NO WARRANTY for this software. See license.txt for
details. */ details. */
package regex; package java.util.regex;
/** /**
* A minimal implementation of a regular expression matcher. * A minimal implementation of a regular expression matcher.

View File

@ -8,7 +8,7 @@
There is NO WARRANTY for this software. See license.txt for There is NO WARRANTY for this software. See license.txt for
details. */ details. */
package regex; package java.util.regex;
/** /**
* A minimal implementation of a regular expression engine. * A minimal implementation of a regular expression engine.

View File

@ -8,7 +8,7 @@
There is NO WARRANTY for this software. See license.txt for There is NO WARRANTY for this software. See license.txt for
details. */ details. */
package regex; package java.util.regex;
/** /**
* This is a work in progress. * This is a work in progress.

View File

@ -8,7 +8,7 @@
There is NO WARRANTY for this software. See license.txt for There is NO WARRANTY for this software. See license.txt for
details. */ details. */
package regex; package java.util.regex;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;

View File

@ -1344,7 +1344,7 @@ vm-classes = \
avian/*.class \ avian/*.class \
avian/resource/*.class avian/resource/*.class
test-support-sources = $(shell find $(test)/avian $(test)/regex -name '*.java') test-support-sources = $(shell find $(test)/avian/ -name '*.java')
test-sources = $(wildcard $(test)/*.java) test-sources = $(wildcard $(test)/*.java)
test-cpp-sources = $(wildcard $(test)/*.cpp) test-cpp-sources = $(wildcard $(test)/*.cpp)
test-sources += $(test-support-sources) test-sources += $(test-support-sources)

View File

@ -1,5 +1,5 @@
import regex.Matcher; import java.util.regex.Matcher;
import regex.Pattern; import java.util.regex.Pattern;
public class Regex { public class Regex {
private static void expect(boolean v) { private static void expect(boolean v) {

View File

@ -1,119 +0,0 @@
/* Copyright (c) 2008-2013, Avian Contributors
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
There is NO WARRANTY for this software. See license.txt for
details. */
package regex;
/**
* This is a work in progress.
*
* @author zsombor and others
*/
public abstract class Matcher {
protected CharSequence input;
protected int start;
protected int end;
public Matcher(CharSequence input) {
reset(input);
}
public abstract boolean matches();
public boolean find() {
return find(end);
}
public abstract boolean find(int start);
public Matcher reset() {
return reset(input);
}
public Matcher reset(CharSequence input) {
this.input = input;
start = 0;
end = 0;
return this;
}
public String replaceAll(String replacement) {
return replace(replacement, Integer.MAX_VALUE);
}
public String replaceFirst(String replacement) {
return replace(replacement, 1);
}
protected String replace(String replacement, int limit) {
reset();
StringBuilder sb = null;
int index = 0;
int count = 0;
while (count < limit && index < input.length()) {
if (find(index)) {
if (sb == null) {
sb = new StringBuilder();
}
if (start > index) {
sb.append(input.subSequence(index, start));
}
sb.append(replacement);
index = end;
++ count;
} else if (index == 0) {
return input.toString();
} else {
break;
}
}
if (index < input.length()) {
sb.append(input.subSequence(index, input.length()));
}
return sb.toString();
}
public int start() {
return start;
}
public int end() {
return end;
}
public String group() {
return input.subSequence(start, end).toString();
}
public int start(int group) {
if (group == 0) {
return start();
}
throw new UnsupportedOperationException();
}
public int end(int group) {
if (group == 0) {
return end();
}
throw new UnsupportedOperationException();
}
public String group(int group) {
if (group == 0) {
return group();
}
throw new UnsupportedOperationException();
}
public int groupCount() {
return 0;
}
}

View File

@ -1,89 +0,0 @@
/* Copyright (c) 2008-2013, Avian Contributors
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
There is NO WARRANTY for this software. See license.txt for
details. */
package regex;
import java.util.ArrayList;
import java.util.List;
/**
* This is a work in progress.
*
* @author zsombor and others
*
*/
public abstract class Pattern implements PikeVMOpcodes {
public static final int UNIX_LINES = 1;
public static final int CASE_INSENSITIVE = 2;
public static final int COMMENTS = 4;
public static final int MULTILINE = 8;
public static final int LITERAL = 16;
public static final int DOTALL = 32;
public static final int UNICODE_CASE = 64;
public static final int CANON_EQ = 128;
private final int patternFlags;
private final String pattern;
protected Pattern(String pattern, int flags) {
this.pattern = pattern;
this.patternFlags = flags;
}
public static Pattern compile(String regex) {
return compile(regex, 0);
}
public static Pattern compile(String regex, int flags) {
if (flags != 0) {
throw new UnsupportedOperationException("TODO");
}
return new Compiler().compile(regex);
}
public int flags() {
return patternFlags;
}
public abstract Matcher matcher(CharSequence input);
public static boolean matches(String regex, CharSequence input) {
return Pattern.compile(regex).matcher(input).matches();
}
public String pattern() {
return pattern;
}
public String[] split(CharSequence input) {
return split(input, 0);
}
public String[] split(CharSequence input, int limit) {
if (limit <= 0) {
limit = Integer.MAX_VALUE;
}
Matcher matcher = matcher(input);
List<String> result = new ArrayList<String>();
int offset = 0;
for (;;) {
if (result.size() >= limit || !matcher.find()) {
break;
}
result.add(input.subSequence(offset, matcher.start()).toString());
offset = matcher.end();
}
if (offset == 0 || offset < input.length()) {
result.add(input.subSequence(offset, input.length()).toString());
}
return result.toArray(new String[result.size()]);
}
}