Fix string tokenizer (#2538)
* Fix * Parse quote and double quote at the same time. Add TokenizerTest. * Simplify TokenizerTest * Fix handling multiple space * Fix handling empty part * Supports escape sequences * Remove an unnecessary lambda. --------- Co-authored-by: Burning_TNT <pangyl08@163.com“> Co-authored-by: Glavo <zjx001202@gmail.com>
This commit is contained in:
@@ -19,11 +19,11 @@ package org.jackhuang.hmcl.util;
|
|||||||
|
|
||||||
import org.jackhuang.hmcl.util.platform.OperatingSystem;
|
import org.jackhuang.hmcl.util.platform.OperatingSystem;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.PrintWriter;
|
||||||
|
import java.io.StringWriter;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
|
||||||
* @author huangyuhui
|
* @author huangyuhui
|
||||||
*/
|
*/
|
||||||
public final class StringUtils {
|
public final class StringUtils {
|
||||||
@@ -208,39 +208,71 @@ public final class StringUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static List<String> tokenize(String str) {
|
public static List<String> tokenize(String str) {
|
||||||
if (str == null)
|
if (isBlank(str)) {
|
||||||
return new ArrayList<>();
|
return new ArrayList<>();
|
||||||
else {
|
|
||||||
// Split the string with ' or " and space cleverly.
|
|
||||||
|
|
||||||
final char groupSplit;
|
|
||||||
if (OperatingSystem.CURRENT_OS == OperatingSystem.WINDOWS) {
|
|
||||||
groupSplit = '"';
|
|
||||||
} else {
|
} else {
|
||||||
groupSplit = '\'';
|
// Split the string with ' and space cleverly.
|
||||||
}
|
|
||||||
|
|
||||||
ArrayList<String> parts = new ArrayList<>();
|
ArrayList<String> parts = new ArrayList<>();
|
||||||
|
|
||||||
{
|
boolean hasValue = false;
|
||||||
boolean inside = false;
|
StringBuilder current = new StringBuilder(str.length());
|
||||||
StringBuilder current = new StringBuilder();
|
for (int i = 0; i < str.length(); ) {
|
||||||
|
|
||||||
for (int i = 0; i < str.length(); i++) {
|
|
||||||
char c = str.charAt(i);
|
char c = str.charAt(i);
|
||||||
if (c == groupSplit) {
|
if (c == '\'') {
|
||||||
inside = !inside;
|
hasValue = true;
|
||||||
} else if (!inside && c == ' ') {
|
int end = str.indexOf(c, i + 1);
|
||||||
parts.add(current.toString());
|
if (end < 0) {
|
||||||
current.setLength(0);
|
end = str.length();
|
||||||
|
}
|
||||||
|
current.append(str, i + 1, end);
|
||||||
|
i = end + 1;
|
||||||
|
|
||||||
|
} else if (c == '"') {
|
||||||
|
hasValue = true;
|
||||||
|
i++;
|
||||||
|
while (i < str.length()) {
|
||||||
|
c = str.charAt(i++);
|
||||||
|
if (c == '"') {
|
||||||
|
break;
|
||||||
|
} else if (c == '\\' && i < str.length()) {
|
||||||
|
c = str.charAt(i++);
|
||||||
|
switch (c) {
|
||||||
|
case 'n':
|
||||||
|
c = '\n';
|
||||||
|
break;
|
||||||
|
case 'r':
|
||||||
|
c = '\r';
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
c = '\t';
|
||||||
|
break;
|
||||||
|
case 'v':
|
||||||
|
c = '\u000b';
|
||||||
|
break;
|
||||||
|
case 'a':
|
||||||
|
c = '\u0007';
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
current.append(c);
|
||||||
} else {
|
} else {
|
||||||
current.append(c);
|
current.append(c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else if (c == ' ') {
|
||||||
if (current.length() != 0) {
|
if (hasValue) {
|
||||||
parts.add(current.toString());
|
parts.add(current.toString());
|
||||||
|
current.setLength(0);
|
||||||
|
hasValue = false;
|
||||||
}
|
}
|
||||||
|
i++;
|
||||||
|
} else {
|
||||||
|
hasValue = true;
|
||||||
|
current.append(c);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (hasValue) {
|
||||||
|
parts.add(current.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
return parts;
|
return parts;
|
||||||
@@ -249,17 +281,17 @@ public final class StringUtils {
|
|||||||
|
|
||||||
public static List<String> parseCommand(String command, Map<String, String> env) {
|
public static List<String> parseCommand(String command, Map<String, String> env) {
|
||||||
StringBuilder stringBuilder = new StringBuilder(command);
|
StringBuilder stringBuilder = new StringBuilder(command);
|
||||||
env.forEach((key, value) -> {
|
for (Map.Entry<String, String> entry : env.entrySet()) {
|
||||||
key = "$" + key;
|
String key = "$" + entry.getKey();
|
||||||
int i = 0;
|
int i = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
i = stringBuilder.indexOf(key, i);
|
i = stringBuilder.indexOf(key, i);
|
||||||
if (i == -1) {
|
if (i == -1) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
stringBuilder.replace(i, i + key.length(), value);
|
stringBuilder.replace(i, i + key.length(), entry.getValue());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
|
||||||
|
|
||||||
return tokenize(stringBuilder.toString());
|
return tokenize(stringBuilder.toString());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,40 @@
|
|||||||
|
package org.jackhuang.hmcl.util;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
public class TokenizerTest {
|
||||||
|
private void test(String source, String... expected) {
|
||||||
|
Assertions.assertEquals(Arrays.asList(expected), StringUtils.tokenize(source));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void textTokenizer() {
|
||||||
|
test(
|
||||||
|
"\"C:/Program Files/Bellsoft/JDK-11/bin.java.exe\" -version \"a.b.c\" something else",
|
||||||
|
"C:/Program Files/Bellsoft/JDK-11/bin.java.exe", "-version", "a.b.c", "something", "else"
|
||||||
|
);
|
||||||
|
test(
|
||||||
|
"\"Another\"Text something else",
|
||||||
|
"AnotherText", "something", "else"
|
||||||
|
);
|
||||||
|
test(
|
||||||
|
"Text without quote",
|
||||||
|
"Text", "without", "quote"
|
||||||
|
);
|
||||||
|
test(
|
||||||
|
"Text with multiple spaces",
|
||||||
|
"Text", "with", "multiple", "spaces"
|
||||||
|
);
|
||||||
|
test(
|
||||||
|
"Text with empty part ''",
|
||||||
|
"Text", "with", "empty", "part", ""
|
||||||
|
);
|
||||||
|
test(
|
||||||
|
"head\"abc\\n\\\\\\\"\"end",
|
||||||
|
"headabc\n\\\"end"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user