Replace java.util.regex.* with the new regular expression engine

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
Johannes Schindelin 2013-11-22 17:42:12 -06:00
parent e96379ee19
commit 6626b477ad
14 changed files with 72 additions and 415 deletions

View File

@ -8,7 +8,7 @@
There is NO WARRANTY for this software. See license.txt for
details. */
package regex;
package java.util.regex;
/**
* A class to match classes of characters.

View File

@ -8,7 +8,7 @@
There is NO WARRANTY for this software. See license.txt for
details. */
package regex;
package java.util.regex;
import java.util.ArrayList;
import java.util.Stack;

View File

@ -15,27 +15,23 @@ package java.util.regex;
*
* @author zsombor and others
*/
public class Matcher {
private final Pattern pattern;
private CharSequence input;
private int start;
private int end;
public abstract class Matcher {
protected CharSequence input;
protected int start;
protected int end;
Matcher(Pattern pattern, CharSequence input) {
this.pattern = pattern;
this.input = input;
public Matcher(CharSequence input) {
reset(input);
}
public boolean matches() {
if (pattern.pattern().equals(input.toString())) {
start = 0;
end = input.length();
return true;
} else {
return false;
}
public abstract boolean matches();
public boolean find() {
return find(end);
}
public abstract boolean find(int start);
public Matcher reset() {
return reset(input);
}
@ -47,10 +43,6 @@ public class Matcher {
return this;
}
public int start() {
return start;
}
public String replaceAll(String replacement) {
return replace(replacement, Integer.MAX_VALUE);
}
@ -59,7 +51,7 @@ public class Matcher {
return replace(replacement, 1);
}
private String replace(String replacement, int limit) {
protected String replace(String replacement, int limit) {
reset();
StringBuilder sb = null;
@ -88,23 +80,40 @@ public class Matcher {
return sb.toString();
}
public int start() {
return start;
}
public int end() {
return end;
}
public boolean find() {
return find(end);
public String group() {
return input.subSequence(start, end).toString();
}
public boolean find(int start) {
String p = pattern.pattern();
int i = Pattern.indexOf(input, p, start);
if (i >= 0) {
this.start = i;
this.end = i + p.length();
return true;
} else {
return false;
public int start(int group) {
if (group == 0) {
return start();
}
throw new UnsupportedOperationException();
}
public int end(int group) {
if (group == 0) {
return end();
}
throw new UnsupportedOperationException();
}
public String group(int group) {
if (group == 0) {
return group();
}
throw new UnsupportedOperationException();
}
public int groupCount() {
return 0;
}
}

View File

@ -10,9 +10,8 @@
package java.util.regex;
import java.util.Iterator;
import java.util.ArrayList;
import java.util.List;
import java.util.LinkedList;
/**
* This is a work in progress.
@ -20,7 +19,7 @@ import java.util.LinkedList;
* @author zsombor and others
*
*/
public class Pattern {
public abstract class Pattern implements PikeVMOpcodes {
public static final int UNIX_LINES = 1;
public static final int CASE_INSENSITIVE = 2;
@ -35,112 +34,26 @@ public class Pattern {
private final String pattern;
protected Pattern(String pattern, int flags) {
this.pattern = trivial(pattern);
this.pattern = pattern;
this.patternFlags = flags;
}
private static String trivial(String pattern) {
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < pattern.length(); ++i) {
char c = pattern.charAt(i);
switch (c) {
case '\\':
if (++i == pattern.length()) {
break;
}
c = pattern.charAt(i);
if (c == '0') {
int len = digits(pattern, ++i, 3, 8);
if (len == 3 && pattern.charAt(i) > '3') {
--len;
}
c = (char)Integer.parseInt(pattern.substring(i, i + len), 8);
i += len - 1;
} else if (c == 'x' || c == 'u') {
int len = digits(pattern, ++i, 4, 16);
c = (char)Integer.parseInt(pattern.substring(i, i + len), 16);
i += len - 1;
} else {
c = unescape(pattern.charAt(i));
}
if (c != -1) {
break;
}
// fallthru
case '.':
case '*':
case '+':
case '?':
case '|':
case '[':
case ']':
case '{':
case '}':
case '(':
case ')':
case '^':
case '$':
throw new UnsupportedOperationException
("only trivial regular expressions are supported so far (" + pattern + ")");
}
buffer.append(c);
}
return buffer.toString();
}
private static int digits(String s, int offset, int maxLength, int base) {
for (int i = 0; ; ++i) {
if (i == maxLength || offset + i >= s.length()) {
return i;
}
int value = s.charAt(offset + i) - '0';
if (value < 0) {
return i;
}
if (base > 10 && value >= 10) {
value += 10 - (value >= 'a' - '0' ? 'a' - '0' : 'A' - '0');
}
if (value >= base) {
return i;
}
}
}
private static char unescape(char c) {
switch (c) {
case '\\':
return c;
case 'a':
return 0x0007;
case 'e':
return 0x001B;
case 'f':
return 0x000C;
case 'n':
return 0x000A;
case 'r':
return 0x000D;
case 't':
return 0x0009;
}
return (char)-1;
}
public static Pattern compile(String regex) {
return new Pattern(regex, 0);
return compile(regex, 0);
}
public static Pattern compile(String regex, int flags) {
return new Pattern(regex, flags);
if (flags != 0) {
throw new UnsupportedOperationException("TODO");
}
return new Compiler().compile(regex);
}
public int flags() {
return patternFlags;
}
public Matcher matcher(CharSequence input) {
return new Matcher(this, input);
}
public abstract Matcher matcher(CharSequence input);
public static boolean matches(String regex, CharSequence input) {
return Pattern.compile(regex).matcher(input).matches();
@ -155,79 +68,22 @@ public class Pattern {
}
public String[] split(CharSequence input, int limit) {
boolean strip;
if (limit < 0) {
strip = false;
if (limit <= 0) {
limit = Integer.MAX_VALUE;
} else if (limit == 0) {
strip = true;
limit = Integer.MAX_VALUE;
} else {
strip = false;
}
List<CharSequence> list = new LinkedList();
int index = 0;
int trailing = 0;
int patternLength = pattern.length();
while (index < input.length() && list.size() < limit - 1) {
int i;
if (patternLength == 0) {
if (list.size() == 0) {
i = 0;
} else {
i = index + 1;
}
} else {
i = indexOf(input, pattern, index);
}
if (i >= 0) {
if (patternLength != 0 && i == index) {
++ trailing;
} else {
trailing = 0;
}
list.add(input.subSequence(index, i));
index = i + patternLength;
} else {
Matcher matcher = matcher(input);
List<String> result = new ArrayList<String>();
int offset = 0;
for (;;) {
if (result.size() >= limit || !matcher.find()) {
break;
}
result.add(input.subSequence(offset, matcher.start()).toString());
offset = matcher.end();
}
if (strip && index > 0 && index == input.length()) {
++ trailing;
} else {
trailing = 0;
if (offset == 0 || offset < input.length()) {
result.add(input.subSequence(offset, input.length()).toString());
}
list.add(input.subSequence(index, input.length()));
String[] result = new String[list.size() - trailing];
int i = 0;
for (Iterator<CharSequence> it = list.iterator();
it.hasNext() && i < result.length; ++ i)
{
result[i] = it.next().toString();
}
return result;
}
static int indexOf(CharSequence haystack, CharSequence needle, int start) {
if (needle.length() == 0) return start;
for (int i = start; i < haystack.length() - needle.length() + 1; ++i) {
int j = 0;
for (; j < needle.length(); ++j) {
if (haystack.charAt(i + j) != needle.charAt(j)) {
break;
}
}
if (j == needle.length()) {
return i;
}
}
return -1;
return result.toArray(new String[result.size()]);
}
}

View File

@ -8,7 +8,7 @@
There is NO WARRANTY for this software. See license.txt for
details. */
package regex;
package java.util.regex;
/**
* A minimal implementation of a regular expression engine.

View File

@ -8,7 +8,7 @@
There is NO WARRANTY for this software. See license.txt for
details. */
package regex;
package java.util.regex;
/**
* Opcodes for the Pike VM.

View File

@ -8,7 +8,7 @@
There is NO WARRANTY for this software. See license.txt for
details. */
package regex;
package java.util.regex;
/**
* A minimal implementation of a regular expression matcher.

View File

@ -8,7 +8,7 @@
There is NO WARRANTY for this software. See license.txt for
details. */
package regex;
package java.util.regex;
/**
* A minimal implementation of a regular expression engine.

View File

@ -8,7 +8,7 @@
There is NO WARRANTY for this software. See license.txt for
details. */
package regex;
package java.util.regex;
/**
* This is a work in progress.

View File

@ -8,7 +8,7 @@
There is NO WARRANTY for this software. See license.txt for
details. */
package regex;
package java.util.regex;
import java.util.Iterator;
import java.util.List;

View File

@ -1344,7 +1344,7 @@ vm-classes = \
avian/*.class \
avian/resource/*.class
test-support-sources = $(shell find $(test)/avian $(test)/regex -name '*.java')
test-support-sources = $(shell find $(test)/avian/ -name '*.java')
test-sources = $(wildcard $(test)/*.java)
test-cpp-sources = $(wildcard $(test)/*.cpp)
test-sources += $(test-support-sources)

View File

@ -1,5 +1,5 @@
import regex.Matcher;
import regex.Pattern;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Regex {
private static void expect(boolean v) {

View File

@ -1,119 +0,0 @@
/* Copyright (c) 2008-2013, Avian Contributors
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
There is NO WARRANTY for this software. See license.txt for
details. */
package regex;
/**
* This is a work in progress.
*
* @author zsombor and others
*/
public abstract class Matcher {
protected CharSequence input;
protected int start;
protected int end;
public Matcher(CharSequence input) {
reset(input);
}
public abstract boolean matches();
public boolean find() {
return find(end);
}
public abstract boolean find(int start);
public Matcher reset() {
return reset(input);
}
public Matcher reset(CharSequence input) {
this.input = input;
start = 0;
end = 0;
return this;
}
public String replaceAll(String replacement) {
return replace(replacement, Integer.MAX_VALUE);
}
public String replaceFirst(String replacement) {
return replace(replacement, 1);
}
protected String replace(String replacement, int limit) {
reset();
StringBuilder sb = null;
int index = 0;
int count = 0;
while (count < limit && index < input.length()) {
if (find(index)) {
if (sb == null) {
sb = new StringBuilder();
}
if (start > index) {
sb.append(input.subSequence(index, start));
}
sb.append(replacement);
index = end;
++ count;
} else if (index == 0) {
return input.toString();
} else {
break;
}
}
if (index < input.length()) {
sb.append(input.subSequence(index, input.length()));
}
return sb.toString();
}
public int start() {
return start;
}
public int end() {
return end;
}
public String group() {
return input.subSequence(start, end).toString();
}
public int start(int group) {
if (group == 0) {
return start();
}
throw new UnsupportedOperationException();
}
public int end(int group) {
if (group == 0) {
return end();
}
throw new UnsupportedOperationException();
}
public String group(int group) {
if (group == 0) {
return group();
}
throw new UnsupportedOperationException();
}
public int groupCount() {
return 0;
}
}

View File

@ -1,89 +0,0 @@
/* Copyright (c) 2008-2013, Avian Contributors
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
There is NO WARRANTY for this software. See license.txt for
details. */
package regex;
import java.util.ArrayList;
import java.util.List;
/**
* This is a work in progress.
*
* @author zsombor and others
*
*/
public abstract class Pattern implements PikeVMOpcodes {
public static final int UNIX_LINES = 1;
public static final int CASE_INSENSITIVE = 2;
public static final int COMMENTS = 4;
public static final int MULTILINE = 8;
public static final int LITERAL = 16;
public static final int DOTALL = 32;
public static final int UNICODE_CASE = 64;
public static final int CANON_EQ = 128;
private final int patternFlags;
private final String pattern;
protected Pattern(String pattern, int flags) {
this.pattern = pattern;
this.patternFlags = flags;
}
public static Pattern compile(String regex) {
return compile(regex, 0);
}
public static Pattern compile(String regex, int flags) {
if (flags != 0) {
throw new UnsupportedOperationException("TODO");
}
return new Compiler().compile(regex);
}
public int flags() {
return patternFlags;
}
public abstract Matcher matcher(CharSequence input);
public static boolean matches(String regex, CharSequence input) {
return Pattern.compile(regex).matcher(input).matches();
}
public String pattern() {
return pattern;
}
public String[] split(CharSequence input) {
return split(input, 0);
}
public String[] split(CharSequence input, int limit) {
if (limit <= 0) {
limit = Integer.MAX_VALUE;
}
Matcher matcher = matcher(input);
List<String> result = new ArrayList<String>();
int offset = 0;
for (;;) {
if (result.size() >= limit || !matcher.find()) {
break;
}
result.add(input.subSequence(offset, matcher.start()).toString());
offset = matcher.end();
}
if (offset == 0 || offset < input.length()) {
result.add(input.subSequence(offset, input.length()).toString());
}
return result.toArray(new String[result.size()]);
}
}