From 5defff2bb01c90ebe4b29141b789ab20e50d056a Mon Sep 17 00:00:00 2001 From: Burning_TNT <88144530+burningtnt@users.noreply.github.com> Date: Sun, 31 Dec 2023 22:45:26 +0800 Subject: [PATCH] Fix string tokenizer (#2538) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix * Parse quote and double quote at the same time. Add TokenizerTest. * Simplify TokenizerTest * Fix handling multiple space * Fix handling empty part * Supports escape sequences * Remove an unnecessary lambda. --------- Co-authored-by: Burning_TNT Co-authored-by: Glavo --- .../org/jackhuang/hmcl/util/StringUtils.java | 94 +++++++++++++------ .../jackhuang/hmcl/util/TokenizerTest.java | 40 ++++++++ 2 files changed, 103 insertions(+), 31 deletions(-) create mode 100644 HMCLCore/src/test/java/org/jackhuang/hmcl/util/TokenizerTest.java diff --git a/HMCLCore/src/main/java/org/jackhuang/hmcl/util/StringUtils.java b/HMCLCore/src/main/java/org/jackhuang/hmcl/util/StringUtils.java index b58d666d8..5a02cfbb6 100644 --- a/HMCLCore/src/main/java/org/jackhuang/hmcl/util/StringUtils.java +++ b/HMCLCore/src/main/java/org/jackhuang/hmcl/util/StringUtils.java @@ -19,11 +19,11 @@ package org.jackhuang.hmcl.util; import org.jackhuang.hmcl.util.platform.OperatingSystem; -import java.io.*; +import java.io.PrintWriter; +import java.io.StringWriter; import java.util.*; /** - * * @author huangyuhui */ public final class StringUtils { @@ -208,39 +208,71 @@ public final class StringUtils { } public static List tokenize(String str) { - if (str == null) + if (isBlank(str)) { return new ArrayList<>(); - else { - // Split the string with ' or " and space cleverly. - - final char groupSplit; - if (OperatingSystem.CURRENT_OS == OperatingSystem.WINDOWS) { - groupSplit = '"'; - } else { - groupSplit = '\''; - } - + } else { + // Split the string with ' and space cleverly. ArrayList parts = new ArrayList<>(); - { - boolean inside = false; - StringBuilder current = new StringBuilder(); + boolean hasValue = false; + StringBuilder current = new StringBuilder(str.length()); + for (int i = 0; i < str.length(); ) { + char c = str.charAt(i); + if (c == '\'') { + hasValue = true; + int end = str.indexOf(c, i + 1); + if (end < 0) { + end = str.length(); + } + current.append(str, i + 1, end); + i = end + 1; - for (int i = 0; i < str.length(); i++) { - char c = str.charAt(i); - if (c == groupSplit) { - inside = !inside; - } else if (!inside && c == ' ') { + } else if (c == '"') { + hasValue = true; + i++; + while (i < str.length()) { + c = str.charAt(i++); + if (c == '"') { + break; + } else if (c == '\\' && i < str.length()) { + c = str.charAt(i++); + switch (c) { + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\u000b'; + break; + case 'a': + c = '\u0007'; + break; + } + current.append(c); + } else { + current.append(c); + } + } + } else if (c == ' ') { + if (hasValue) { parts.add(current.toString()); current.setLength(0); - } else { - current.append(c); + hasValue = false; } + i++; + } else { + hasValue = true; + current.append(c); + i++; } - - if (current.length() != 0) { - parts.add(current.toString()); - } + } + if (hasValue) { + parts.add(current.toString()); } return parts; @@ -249,17 +281,17 @@ public final class StringUtils { public static List parseCommand(String command, Map env) { StringBuilder stringBuilder = new StringBuilder(command); - env.forEach((key, value) -> { - key = "$" + key; + for (Map.Entry entry : env.entrySet()) { + String key = "$" + entry.getKey(); int i = 0; while (true) { i = stringBuilder.indexOf(key, i); if (i == -1) { break; } - stringBuilder.replace(i, i + key.length(), value); + stringBuilder.replace(i, i + key.length(), entry.getValue()); } - }); + } return tokenize(stringBuilder.toString()); } diff --git a/HMCLCore/src/test/java/org/jackhuang/hmcl/util/TokenizerTest.java b/HMCLCore/src/test/java/org/jackhuang/hmcl/util/TokenizerTest.java new file mode 100644 index 000000000..8516a85b9 --- /dev/null +++ b/HMCLCore/src/test/java/org/jackhuang/hmcl/util/TokenizerTest.java @@ -0,0 +1,40 @@ +package org.jackhuang.hmcl.util; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; + +public class TokenizerTest { + private void test(String source, String... expected) { + Assertions.assertEquals(Arrays.asList(expected), StringUtils.tokenize(source)); + } + + @Test + public void textTokenizer() { + test( + "\"C:/Program Files/Bellsoft/JDK-11/bin.java.exe\" -version \"a.b.c\" something else", + "C:/Program Files/Bellsoft/JDK-11/bin.java.exe", "-version", "a.b.c", "something", "else" + ); + test( + "\"Another\"Text something else", + "AnotherText", "something", "else" + ); + test( + "Text without quote", + "Text", "without", "quote" + ); + test( + "Text with multiple spaces", + "Text", "with", "multiple", "spaces" + ); + test( + "Text with empty part ''", + "Text", "with", "empty", "part", "" + ); + test( + "head\"abc\\n\\\\\\\"\"end", + "headabc\n\\\"end" + ); + } +}