Fix string tokenizer (#2538)

* Fix

* Parse quote and double quote at the same time. Add TokenizerTest.

* Simplify TokenizerTest

* Fix handling multiple space

* Fix handling empty part

* Supports escape sequences

* Remove an unnecessary lambda.

---------

Co-authored-by: Burning_TNT <pangyl08@163.com“>
Co-authored-by: Glavo <zjx001202@gmail.com>
This commit is contained in:
Burning_TNT
2023-12-31 22:45:26 +08:00
committed by GitHub
parent bdcbe6c948
commit 5defff2bb0
2 changed files with 103 additions and 31 deletions

View File

@@ -19,11 +19,11 @@ package org.jackhuang.hmcl.util;
import org.jackhuang.hmcl.util.platform.OperatingSystem;
import java.io.*;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.*;
/**
*
* @author huangyuhui
*/
public final class StringUtils {
@@ -208,39 +208,71 @@ public final class StringUtils {
}
public static List<String> tokenize(String str) {
if (str == null)
if (isBlank(str)) {
return new ArrayList<>();
else {
// Split the string with ' or " and space cleverly.
final char groupSplit;
if (OperatingSystem.CURRENT_OS == OperatingSystem.WINDOWS) {
groupSplit = '"';
} else {
groupSplit = '\'';
}
} else {
// Split the string with ' and space cleverly.
ArrayList<String> parts = new ArrayList<>();
{
boolean inside = false;
StringBuilder current = new StringBuilder();
boolean hasValue = false;
StringBuilder current = new StringBuilder(str.length());
for (int i = 0; i < str.length(); ) {
char c = str.charAt(i);
if (c == '\'') {
hasValue = true;
int end = str.indexOf(c, i + 1);
if (end < 0) {
end = str.length();
}
current.append(str, i + 1, end);
i = end + 1;
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
if (c == groupSplit) {
inside = !inside;
} else if (!inside && c == ' ') {
} else if (c == '"') {
hasValue = true;
i++;
while (i < str.length()) {
c = str.charAt(i++);
if (c == '"') {
break;
} else if (c == '\\' && i < str.length()) {
c = str.charAt(i++);
switch (c) {
case 'n':
c = '\n';
break;
case 'r':
c = '\r';
break;
case 't':
c = '\t';
break;
case 'v':
c = '\u000b';
break;
case 'a':
c = '\u0007';
break;
}
current.append(c);
} else {
current.append(c);
}
}
} else if (c == ' ') {
if (hasValue) {
parts.add(current.toString());
current.setLength(0);
} else {
current.append(c);
hasValue = false;
}
i++;
} else {
hasValue = true;
current.append(c);
i++;
}
if (current.length() != 0) {
parts.add(current.toString());
}
}
if (hasValue) {
parts.add(current.toString());
}
return parts;
@@ -249,17 +281,17 @@ public final class StringUtils {
public static List<String> parseCommand(String command, Map<String, String> env) {
StringBuilder stringBuilder = new StringBuilder(command);
env.forEach((key, value) -> {
key = "$" + key;
for (Map.Entry<String, String> entry : env.entrySet()) {
String key = "$" + entry.getKey();
int i = 0;
while (true) {
i = stringBuilder.indexOf(key, i);
if (i == -1) {
break;
}
stringBuilder.replace(i, i + key.length(), value);
stringBuilder.replace(i, i + key.length(), entry.getValue());
}
});
}
return tokenize(stringBuilder.toString());
}

View File

@@ -0,0 +1,40 @@
package org.jackhuang.hmcl.util;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.util.Arrays;
public class TokenizerTest {
private void test(String source, String... expected) {
Assertions.assertEquals(Arrays.asList(expected), StringUtils.tokenize(source));
}
@Test
public void textTokenizer() {
test(
"\"C:/Program Files/Bellsoft/JDK-11/bin.java.exe\" -version \"a.b.c\" something else",
"C:/Program Files/Bellsoft/JDK-11/bin.java.exe", "-version", "a.b.c", "something", "else"
);
test(
"\"Another\"Text something else",
"AnotherText", "something", "else"
);
test(
"Text without quote",
"Text", "without", "quote"
);
test(
"Text with multiple spaces",
"Text", "with", "multiple", "spaces"
);
test(
"Text with empty part ''",
"Text", "with", "empty", "part", ""
);
test(
"head\"abc\\n\\\\\\\"\"end",
"headabc\n\\\"end"
);
}
}