mirror of
https://github.com/corda/corda.git
synced 2025-01-04 04:04:27 +00:00
Replace java.util.regex.* with the new regular expression engine
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
parent
e96379ee19
commit
6626b477ad
@ -8,7 +8,7 @@
|
||||
There is NO WARRANTY for this software. See license.txt for
|
||||
details. */
|
||||
|
||||
package regex;
|
||||
package java.util.regex;
|
||||
|
||||
/**
|
||||
* A class to match classes of characters.
|
@ -8,7 +8,7 @@
|
||||
There is NO WARRANTY for this software. See license.txt for
|
||||
details. */
|
||||
|
||||
package regex;
|
||||
package java.util.regex;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Stack;
|
@ -15,27 +15,23 @@ package java.util.regex;
|
||||
*
|
||||
* @author zsombor and others
|
||||
*/
|
||||
public class Matcher {
|
||||
private final Pattern pattern;
|
||||
private CharSequence input;
|
||||
private int start;
|
||||
private int end;
|
||||
public abstract class Matcher {
|
||||
protected CharSequence input;
|
||||
protected int start;
|
||||
protected int end;
|
||||
|
||||
Matcher(Pattern pattern, CharSequence input) {
|
||||
this.pattern = pattern;
|
||||
this.input = input;
|
||||
public Matcher(CharSequence input) {
|
||||
reset(input);
|
||||
}
|
||||
|
||||
public boolean matches() {
|
||||
if (pattern.pattern().equals(input.toString())) {
|
||||
start = 0;
|
||||
end = input.length();
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
public abstract boolean matches();
|
||||
|
||||
public boolean find() {
|
||||
return find(end);
|
||||
}
|
||||
|
||||
public abstract boolean find(int start);
|
||||
|
||||
public Matcher reset() {
|
||||
return reset(input);
|
||||
}
|
||||
@ -47,10 +43,6 @@ public class Matcher {
|
||||
return this;
|
||||
}
|
||||
|
||||
public int start() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public String replaceAll(String replacement) {
|
||||
return replace(replacement, Integer.MAX_VALUE);
|
||||
}
|
||||
@ -59,7 +51,7 @@ public class Matcher {
|
||||
return replace(replacement, 1);
|
||||
}
|
||||
|
||||
private String replace(String replacement, int limit) {
|
||||
protected String replace(String replacement, int limit) {
|
||||
reset();
|
||||
|
||||
StringBuilder sb = null;
|
||||
@ -88,23 +80,40 @@ public class Matcher {
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public int start() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public int end() {
|
||||
return end;
|
||||
}
|
||||
|
||||
public boolean find() {
|
||||
return find(end);
|
||||
public String group() {
|
||||
return input.subSequence(start, end).toString();
|
||||
}
|
||||
|
||||
public boolean find(int start) {
|
||||
String p = pattern.pattern();
|
||||
int i = Pattern.indexOf(input, p, start);
|
||||
if (i >= 0) {
|
||||
this.start = i;
|
||||
this.end = i + p.length();
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
public int start(int group) {
|
||||
if (group == 0) {
|
||||
return start();
|
||||
}
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public int end(int group) {
|
||||
if (group == 0) {
|
||||
return end();
|
||||
}
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public String group(int group) {
|
||||
if (group == 0) {
|
||||
return group();
|
||||
}
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public int groupCount() {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@ -10,9 +10,8 @@
|
||||
|
||||
package java.util.regex;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
* This is a work in progress.
|
||||
@ -20,7 +19,7 @@ import java.util.LinkedList;
|
||||
* @author zsombor and others
|
||||
*
|
||||
*/
|
||||
public class Pattern {
|
||||
public abstract class Pattern implements PikeVMOpcodes {
|
||||
|
||||
public static final int UNIX_LINES = 1;
|
||||
public static final int CASE_INSENSITIVE = 2;
|
||||
@ -35,112 +34,26 @@ public class Pattern {
|
||||
private final String pattern;
|
||||
|
||||
protected Pattern(String pattern, int flags) {
|
||||
this.pattern = trivial(pattern);
|
||||
this.pattern = pattern;
|
||||
this.patternFlags = flags;
|
||||
}
|
||||
|
||||
private static String trivial(String pattern) {
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
for (int i = 0; i < pattern.length(); ++i) {
|
||||
char c = pattern.charAt(i);
|
||||
switch (c) {
|
||||
case '\\':
|
||||
if (++i == pattern.length()) {
|
||||
break;
|
||||
}
|
||||
c = pattern.charAt(i);
|
||||
if (c == '0') {
|
||||
int len = digits(pattern, ++i, 3, 8);
|
||||
if (len == 3 && pattern.charAt(i) > '3') {
|
||||
--len;
|
||||
}
|
||||
c = (char)Integer.parseInt(pattern.substring(i, i + len), 8);
|
||||
i += len - 1;
|
||||
} else if (c == 'x' || c == 'u') {
|
||||
int len = digits(pattern, ++i, 4, 16);
|
||||
c = (char)Integer.parseInt(pattern.substring(i, i + len), 16);
|
||||
i += len - 1;
|
||||
} else {
|
||||
c = unescape(pattern.charAt(i));
|
||||
}
|
||||
if (c != -1) {
|
||||
break;
|
||||
}
|
||||
// fallthru
|
||||
case '.':
|
||||
case '*':
|
||||
case '+':
|
||||
case '?':
|
||||
case '|':
|
||||
case '[':
|
||||
case ']':
|
||||
case '{':
|
||||
case '}':
|
||||
case '(':
|
||||
case ')':
|
||||
case '^':
|
||||
case '$':
|
||||
throw new UnsupportedOperationException
|
||||
("only trivial regular expressions are supported so far (" + pattern + ")");
|
||||
}
|
||||
buffer.append(c);
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
private static int digits(String s, int offset, int maxLength, int base) {
|
||||
for (int i = 0; ; ++i) {
|
||||
if (i == maxLength || offset + i >= s.length()) {
|
||||
return i;
|
||||
}
|
||||
int value = s.charAt(offset + i) - '0';
|
||||
if (value < 0) {
|
||||
return i;
|
||||
}
|
||||
if (base > 10 && value >= 10) {
|
||||
value += 10 - (value >= 'a' - '0' ? 'a' - '0' : 'A' - '0');
|
||||
}
|
||||
if (value >= base) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static char unescape(char c) {
|
||||
switch (c) {
|
||||
case '\\':
|
||||
return c;
|
||||
case 'a':
|
||||
return 0x0007;
|
||||
case 'e':
|
||||
return 0x001B;
|
||||
case 'f':
|
||||
return 0x000C;
|
||||
case 'n':
|
||||
return 0x000A;
|
||||
case 'r':
|
||||
return 0x000D;
|
||||
case 't':
|
||||
return 0x0009;
|
||||
}
|
||||
return (char)-1;
|
||||
}
|
||||
|
||||
public static Pattern compile(String regex) {
|
||||
return new Pattern(regex, 0);
|
||||
return compile(regex, 0);
|
||||
}
|
||||
|
||||
public static Pattern compile(String regex, int flags) {
|
||||
return new Pattern(regex, flags);
|
||||
if (flags != 0) {
|
||||
throw new UnsupportedOperationException("TODO");
|
||||
}
|
||||
return new Compiler().compile(regex);
|
||||
}
|
||||
|
||||
public int flags() {
|
||||
return patternFlags;
|
||||
}
|
||||
|
||||
public Matcher matcher(CharSequence input) {
|
||||
return new Matcher(this, input);
|
||||
}
|
||||
public abstract Matcher matcher(CharSequence input);
|
||||
|
||||
public static boolean matches(String regex, CharSequence input) {
|
||||
return Pattern.compile(regex).matcher(input).matches();
|
||||
@ -155,79 +68,22 @@ public class Pattern {
|
||||
}
|
||||
|
||||
public String[] split(CharSequence input, int limit) {
|
||||
boolean strip;
|
||||
if (limit < 0) {
|
||||
strip = false;
|
||||
if (limit <= 0) {
|
||||
limit = Integer.MAX_VALUE;
|
||||
} else if (limit == 0) {
|
||||
strip = true;
|
||||
limit = Integer.MAX_VALUE;
|
||||
} else {
|
||||
strip = false;
|
||||
}
|
||||
|
||||
List<CharSequence> list = new LinkedList();
|
||||
int index = 0;
|
||||
int trailing = 0;
|
||||
int patternLength = pattern.length();
|
||||
while (index < input.length() && list.size() < limit - 1) {
|
||||
int i;
|
||||
if (patternLength == 0) {
|
||||
if (list.size() == 0) {
|
||||
i = 0;
|
||||
} else {
|
||||
i = index + 1;
|
||||
}
|
||||
} else {
|
||||
i = indexOf(input, pattern, index);
|
||||
}
|
||||
|
||||
if (i >= 0) {
|
||||
if (patternLength != 0 && i == index) {
|
||||
++ trailing;
|
||||
} else {
|
||||
trailing = 0;
|
||||
}
|
||||
|
||||
list.add(input.subSequence(index, i));
|
||||
index = i + patternLength;
|
||||
} else {
|
||||
Matcher matcher = matcher(input);
|
||||
List<String> result = new ArrayList<String>();
|
||||
int offset = 0;
|
||||
for (;;) {
|
||||
if (result.size() >= limit || !matcher.find()) {
|
||||
break;
|
||||
}
|
||||
result.add(input.subSequence(offset, matcher.start()).toString());
|
||||
offset = matcher.end();
|
||||
}
|
||||
|
||||
if (strip && index > 0 && index == input.length()) {
|
||||
++ trailing;
|
||||
} else {
|
||||
trailing = 0;
|
||||
if (offset == 0 || offset < input.length()) {
|
||||
result.add(input.subSequence(offset, input.length()).toString());
|
||||
}
|
||||
list.add(input.subSequence(index, input.length()));
|
||||
|
||||
String[] result = new String[list.size() - trailing];
|
||||
int i = 0;
|
||||
for (Iterator<CharSequence> it = list.iterator();
|
||||
it.hasNext() && i < result.length; ++ i)
|
||||
{
|
||||
result[i] = it.next().toString();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static int indexOf(CharSequence haystack, CharSequence needle, int start) {
|
||||
if (needle.length() == 0) return start;
|
||||
|
||||
for (int i = start; i < haystack.length() - needle.length() + 1; ++i) {
|
||||
int j = 0;
|
||||
for (; j < needle.length(); ++j) {
|
||||
if (haystack.charAt(i + j) != needle.charAt(j)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == needle.length()) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
return result.toArray(new String[result.size()]);
|
||||
}
|
||||
}
|
||||
|
@ -8,7 +8,7 @@
|
||||
There is NO WARRANTY for this software. See license.txt for
|
||||
details. */
|
||||
|
||||
package regex;
|
||||
package java.util.regex;
|
||||
|
||||
/**
|
||||
* A minimal implementation of a regular expression engine.
|
@ -8,7 +8,7 @@
|
||||
There is NO WARRANTY for this software. See license.txt for
|
||||
details. */
|
||||
|
||||
package regex;
|
||||
package java.util.regex;
|
||||
|
||||
/**
|
||||
* Opcodes for the Pike VM.
|
@ -8,7 +8,7 @@
|
||||
There is NO WARRANTY for this software. See license.txt for
|
||||
details. */
|
||||
|
||||
package regex;
|
||||
package java.util.regex;
|
||||
|
||||
/**
|
||||
* A minimal implementation of a regular expression matcher.
|
@ -8,7 +8,7 @@
|
||||
There is NO WARRANTY for this software. See license.txt for
|
||||
details. */
|
||||
|
||||
package regex;
|
||||
package java.util.regex;
|
||||
|
||||
/**
|
||||
* A minimal implementation of a regular expression engine.
|
@ -8,7 +8,7 @@
|
||||
There is NO WARRANTY for this software. See license.txt for
|
||||
details. */
|
||||
|
||||
package regex;
|
||||
package java.util.regex;
|
||||
|
||||
/**
|
||||
* This is a work in progress.
|
@ -8,7 +8,7 @@
|
||||
There is NO WARRANTY for this software. See license.txt for
|
||||
details. */
|
||||
|
||||
package regex;
|
||||
package java.util.regex;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
2
makefile
2
makefile
@ -1344,7 +1344,7 @@ vm-classes = \
|
||||
avian/*.class \
|
||||
avian/resource/*.class
|
||||
|
||||
test-support-sources = $(shell find $(test)/avian $(test)/regex -name '*.java')
|
||||
test-support-sources = $(shell find $(test)/avian/ -name '*.java')
|
||||
test-sources = $(wildcard $(test)/*.java)
|
||||
test-cpp-sources = $(wildcard $(test)/*.cpp)
|
||||
test-sources += $(test-support-sources)
|
||||
|
@ -1,5 +1,5 @@
|
||||
import regex.Matcher;
|
||||
import regex.Pattern;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class Regex {
|
||||
private static void expect(boolean v) {
|
||||
|
@ -1,119 +0,0 @@
|
||||
/* Copyright (c) 2008-2013, Avian Contributors
|
||||
|
||||
Permission to use, copy, modify, and/or distribute this software
|
||||
for any purpose with or without fee is hereby granted, provided
|
||||
that the above copyright notice and this permission notice appear
|
||||
in all copies.
|
||||
|
||||
There is NO WARRANTY for this software. See license.txt for
|
||||
details. */
|
||||
|
||||
package regex;
|
||||
|
||||
/**
|
||||
* This is a work in progress.
|
||||
*
|
||||
* @author zsombor and others
|
||||
*/
|
||||
public abstract class Matcher {
|
||||
protected CharSequence input;
|
||||
protected int start;
|
||||
protected int end;
|
||||
|
||||
public Matcher(CharSequence input) {
|
||||
reset(input);
|
||||
}
|
||||
|
||||
public abstract boolean matches();
|
||||
|
||||
public boolean find() {
|
||||
return find(end);
|
||||
}
|
||||
|
||||
public abstract boolean find(int start);
|
||||
|
||||
public Matcher reset() {
|
||||
return reset(input);
|
||||
}
|
||||
|
||||
public Matcher reset(CharSequence input) {
|
||||
this.input = input;
|
||||
start = 0;
|
||||
end = 0;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String replaceAll(String replacement) {
|
||||
return replace(replacement, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
public String replaceFirst(String replacement) {
|
||||
return replace(replacement, 1);
|
||||
}
|
||||
|
||||
protected String replace(String replacement, int limit) {
|
||||
reset();
|
||||
|
||||
StringBuilder sb = null;
|
||||
int index = 0;
|
||||
int count = 0;
|
||||
while (count < limit && index < input.length()) {
|
||||
if (find(index)) {
|
||||
if (sb == null) {
|
||||
sb = new StringBuilder();
|
||||
}
|
||||
if (start > index) {
|
||||
sb.append(input.subSequence(index, start));
|
||||
}
|
||||
sb.append(replacement);
|
||||
index = end;
|
||||
++ count;
|
||||
} else if (index == 0) {
|
||||
return input.toString();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (index < input.length()) {
|
||||
sb.append(input.subSequence(index, input.length()));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public int start() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public int end() {
|
||||
return end;
|
||||
}
|
||||
|
||||
public String group() {
|
||||
return input.subSequence(start, end).toString();
|
||||
}
|
||||
|
||||
public int start(int group) {
|
||||
if (group == 0) {
|
||||
return start();
|
||||
}
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public int end(int group) {
|
||||
if (group == 0) {
|
||||
return end();
|
||||
}
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public String group(int group) {
|
||||
if (group == 0) {
|
||||
return group();
|
||||
}
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public int groupCount() {
|
||||
return 0;
|
||||
}
|
||||
}
|
@ -1,89 +0,0 @@
|
||||
/* Copyright (c) 2008-2013, Avian Contributors
|
||||
|
||||
Permission to use, copy, modify, and/or distribute this software
|
||||
for any purpose with or without fee is hereby granted, provided
|
||||
that the above copyright notice and this permission notice appear
|
||||
in all copies.
|
||||
|
||||
There is NO WARRANTY for this software. See license.txt for
|
||||
details. */
|
||||
|
||||
package regex;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* This is a work in progress.
|
||||
*
|
||||
* @author zsombor and others
|
||||
*
|
||||
*/
|
||||
public abstract class Pattern implements PikeVMOpcodes {
|
||||
|
||||
public static final int UNIX_LINES = 1;
|
||||
public static final int CASE_INSENSITIVE = 2;
|
||||
public static final int COMMENTS = 4;
|
||||
public static final int MULTILINE = 8;
|
||||
public static final int LITERAL = 16;
|
||||
public static final int DOTALL = 32;
|
||||
public static final int UNICODE_CASE = 64;
|
||||
public static final int CANON_EQ = 128;
|
||||
|
||||
private final int patternFlags;
|
||||
private final String pattern;
|
||||
|
||||
protected Pattern(String pattern, int flags) {
|
||||
this.pattern = pattern;
|
||||
this.patternFlags = flags;
|
||||
}
|
||||
|
||||
public static Pattern compile(String regex) {
|
||||
return compile(regex, 0);
|
||||
}
|
||||
|
||||
public static Pattern compile(String regex, int flags) {
|
||||
if (flags != 0) {
|
||||
throw new UnsupportedOperationException("TODO");
|
||||
}
|
||||
return new Compiler().compile(regex);
|
||||
}
|
||||
|
||||
public int flags() {
|
||||
return patternFlags;
|
||||
}
|
||||
|
||||
public abstract Matcher matcher(CharSequence input);
|
||||
|
||||
public static boolean matches(String regex, CharSequence input) {
|
||||
return Pattern.compile(regex).matcher(input).matches();
|
||||
}
|
||||
|
||||
public String pattern() {
|
||||
return pattern;
|
||||
}
|
||||
|
||||
public String[] split(CharSequence input) {
|
||||
return split(input, 0);
|
||||
}
|
||||
|
||||
public String[] split(CharSequence input, int limit) {
|
||||
if (limit <= 0) {
|
||||
limit = Integer.MAX_VALUE;
|
||||
}
|
||||
Matcher matcher = matcher(input);
|
||||
List<String> result = new ArrayList<String>();
|
||||
int offset = 0;
|
||||
for (;;) {
|
||||
if (result.size() >= limit || !matcher.find()) {
|
||||
break;
|
||||
}
|
||||
result.add(input.subSequence(offset, matcher.start()).toString());
|
||||
offset = matcher.end();
|
||||
}
|
||||
if (offset == 0 || offset < input.length()) {
|
||||
result.add(input.subSequence(offset, input.length()).toString());
|
||||
}
|
||||
return result.toArray(new String[result.size()]);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user