mirror of
https://github.com/corda/corda.git
synced 2025-01-24 05:18:24 +00:00
84829dc390
This makes both the Pattern and the Matcher class abstract so that more specialized patterns than the trivial patterns we support so far can be implemented as convenient subclasses of the respective abstract base classes. To ease development, we work on copies in test/regex/ in the 'regex' package. That way, it can be developed in Eclipse (because it does not interfere with Oracle JRE's java.util.regex.* classes). Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
200 lines
4.7 KiB
Java
200 lines
4.7 KiB
Java
/* Copyright (c) 2008-2013, Avian Contributors
|
|
|
|
Permission to use, copy, modify, and/or distribute this software
|
|
for any purpose with or without fee is hereby granted, provided
|
|
that the above copyright notice and this permission notice appear
|
|
in all copies.
|
|
|
|
There is NO WARRANTY for this software. See license.txt for
|
|
details. */
|
|
|
|
package regex;
|
|
|
|
import java.util.Iterator;
|
|
import java.util.List;
|
|
import java.util.LinkedList;
|
|
|
|
/**
|
|
* This is a work in progress.
|
|
*
|
|
* @author zsombor and others
|
|
*
|
|
*/
|
|
public class TrivialPattern extends Pattern {
|
|
|
|
private final String trivialPattern;
|
|
|
|
TrivialPattern(String pattern, int flags) {
|
|
super(pattern, flags);
|
|
this.trivialPattern = trivial(pattern);
|
|
}
|
|
|
|
private static String trivial(String pattern) {
|
|
StringBuffer buffer = new StringBuffer();
|
|
for (int i = 0; i < pattern.length(); ++i) {
|
|
char c = pattern.charAt(i);
|
|
switch (c) {
|
|
case '\\':
|
|
if (++i == pattern.length()) {
|
|
break;
|
|
}
|
|
c = pattern.charAt(i);
|
|
if (c == '0') {
|
|
int len = digits(pattern, ++i, 3, 8);
|
|
if (len == 3 && pattern.charAt(i) > '3') {
|
|
--len;
|
|
}
|
|
c = (char)Integer.parseInt(pattern.substring(i, i + len), 8);
|
|
i += len - 1;
|
|
} else if (c == 'x' || c == 'u') {
|
|
int len = digits(pattern, ++i, 4, 16);
|
|
c = (char)Integer.parseInt(pattern.substring(i, i + len), 16);
|
|
i += len - 1;
|
|
} else {
|
|
c = unescape(pattern.charAt(i));
|
|
}
|
|
if (c != -1) {
|
|
break;
|
|
}
|
|
// fallthru
|
|
case '.':
|
|
case '*':
|
|
case '+':
|
|
case '?':
|
|
case '|':
|
|
case '[':
|
|
case ']':
|
|
case '{':
|
|
case '}':
|
|
case '(':
|
|
case ')':
|
|
case '^':
|
|
case '$':
|
|
throw new UnsupportedOperationException
|
|
("only trivial regular expressions are supported so far (" + pattern + ")");
|
|
}
|
|
buffer.append(c);
|
|
}
|
|
return buffer.toString();
|
|
}
|
|
|
|
private static int digits(String s, int offset, int maxLength, int base) {
|
|
for (int i = 0; ; ++i) {
|
|
if (i == maxLength || offset + i >= s.length()) {
|
|
return i;
|
|
}
|
|
int value = s.charAt(offset + i) - '0';
|
|
if (value < 0) {
|
|
return i;
|
|
}
|
|
if (base > 10 && value >= 10) {
|
|
value += 10 - (value >= 'a' - '0' ? 'a' - '0' : 'A' - '0');
|
|
}
|
|
if (value >= base) {
|
|
return i;
|
|
}
|
|
}
|
|
}
|
|
|
|
private static char unescape(char c) {
|
|
switch (c) {
|
|
case '\\':
|
|
return c;
|
|
case 'a':
|
|
return 0x0007;
|
|
case 'e':
|
|
return 0x001B;
|
|
case 'f':
|
|
return 0x000C;
|
|
case 'n':
|
|
return 0x000A;
|
|
case 'r':
|
|
return 0x000D;
|
|
case 't':
|
|
return 0x0009;
|
|
}
|
|
return (char)-1;
|
|
}
|
|
|
|
public Matcher matcher(CharSequence input) {
|
|
return new TrivialMatcher(trivialPattern, input);
|
|
}
|
|
|
|
public String[] split(CharSequence input, int limit) {
|
|
boolean strip;
|
|
if (limit < 0) {
|
|
strip = false;
|
|
limit = Integer.MAX_VALUE;
|
|
} else if (limit == 0) {
|
|
strip = true;
|
|
limit = Integer.MAX_VALUE;
|
|
} else {
|
|
strip = false;
|
|
}
|
|
|
|
List<CharSequence> list = new LinkedList<CharSequence>();
|
|
int index = 0;
|
|
int trailing = 0;
|
|
int patternLength = trivialPattern.length();
|
|
while (index < input.length() && list.size() < limit - 1) {
|
|
int i;
|
|
if (patternLength == 0) {
|
|
if (list.size() == 0) {
|
|
i = 0;
|
|
} else {
|
|
i = index + 1;
|
|
}
|
|
} else {
|
|
i = indexOf(input, trivialPattern, index);
|
|
}
|
|
|
|
if (i >= 0) {
|
|
if (patternLength != 0 && i == index) {
|
|
++ trailing;
|
|
} else {
|
|
trailing = 0;
|
|
}
|
|
|
|
list.add(input.subSequence(index, i));
|
|
index = i + patternLength;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (strip && index > 0 && index == input.length()) {
|
|
++ trailing;
|
|
} else {
|
|
trailing = 0;
|
|
}
|
|
list.add(input.subSequence(index, input.length()));
|
|
|
|
String[] result = new String[list.size() - trailing];
|
|
int i = 0;
|
|
for (Iterator<CharSequence> it = list.iterator();
|
|
it.hasNext() && i < result.length; ++ i)
|
|
{
|
|
result[i] = it.next().toString();
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static int indexOf(CharSequence haystack, CharSequence needle, int start) {
|
|
if (needle.length() == 0) return start;
|
|
|
|
for (int i = start; i < haystack.length() - needle.length() + 1; ++i) {
|
|
int j = 0;
|
|
for (; j < needle.length(); ++j) {
|
|
if (haystack.charAt(i + j) != needle.charAt(j)) {
|
|
break;
|
|
}
|
|
}
|
|
if (j == needle.length()) {
|
|
return i;
|
|
}
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
}
|