From 351a59149f170d5f9382e1757f29d161bdd9827d Mon Sep 17 00:00:00 2001 From: Glavo Date: Fri, 26 Jan 2024 04:37:54 +0800 Subject: [PATCH] =?UTF-8?q?=E5=BC=83=E7=94=A8=20charsetCandidates=20=20(#2?= =?UTF-8?q?683)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update * create test --- .../hmcl/util/io/CompressingUtils.java | 54 +++++++++++------- .../hmcl/util/io/CompressingUtilsTest.java | 41 +++++++++++++ HMCLCore/src/test/resources/zip/gbk.zip | Bin 0 -> 242 bytes HMCLCore/src/test/resources/zip/utf-8.zip | Bin 0 -> 246 bytes 4 files changed, 74 insertions(+), 21 deletions(-) create mode 100644 HMCLCore/src/test/java/org/jackhuang/hmcl/util/io/CompressingUtilsTest.java create mode 100644 HMCLCore/src/test/resources/zip/gbk.zip create mode 100644 HMCLCore/src/test/resources/zip/utf-8.zip diff --git a/HMCLCore/src/main/java/org/jackhuang/hmcl/util/io/CompressingUtils.java b/HMCLCore/src/main/java/org/jackhuang/hmcl/util/io/CompressingUtils.java index b9de8c4c4..a104441d1 100644 --- a/HMCLCore/src/main/java/org/jackhuang/hmcl/util/io/CompressingUtils.java +++ b/HMCLCore/src/main/java/org/jackhuang/hmcl/util/io/CompressingUtils.java @@ -88,30 +88,48 @@ public final class CompressingUtils { } public static Charset findSuitableEncoding(Path zipFile) throws IOException { - return findSuitableEncoding(zipFile, null); - } - - public static Charset findSuitableEncoding(Path zipFile, Collection candidates) throws IOException { try (ZipFile zf = openZipFile(zipFile, StandardCharsets.UTF_8)) { - return findSuitableEncoding(zf, candidates); + return findSuitableEncoding(zf); } } public static Charset findSuitableEncoding(ZipFile zipFile) throws IOException { - return findSuitableEncoding(zipFile, null); - } - - public static Charset findSuitableEncoding(ZipFile zipFile, Collection candidates) throws IOException { if (testEncoding(zipFile, StandardCharsets.UTF_8)) return StandardCharsets.UTF_8; if (OperatingSystem.NATIVE_CHARSET != StandardCharsets.UTF_8 && testEncoding(zipFile, OperatingSystem.NATIVE_CHARSET)) return OperatingSystem.NATIVE_CHARSET; - if (candidates == null) - candidates = Charset.availableCharsets().values(); + String[] candidates = { + "GB18030", + "Big5", + "Shift_JIS", + "EUC-JP", + "ISO-2022-JP", + "EUC-KR", + "ISO-2022-KR", + "KOI8-R", + "windows-1251", + "x-MacCyrillic", + "IBM855", + "IBM866", + "windows-1252", + "ISO-8859-1", + "ISO-8859-5", + "ISO-8859-7", + "ISO-8859-8", + "UTF-16LE", "UTF-16BE", + "UTF-32LE", "UTF-32BE" + }; + + for (String candidate : candidates) { + try { + Charset charset = Charset.forName(candidate); + if (!charset.equals(OperatingSystem.NATIVE_CHARSET) && testEncoding(zipFile, charset)) { + return charset; + } + } catch (IllegalArgumentException ignored) { + } + } - for (Charset charset : candidates) - if (charset != null && testEncoding(zipFile, charset)) - return charset; throw new IOException("Cannot find suitable encoding for the zip."); } @@ -125,7 +143,6 @@ public final class CompressingUtils { public static final class Builder { private boolean autoDetectEncoding = false; - private Collection charsetCandidates; private Charset encoding = StandardCharsets.UTF_8; private boolean useTempFile = false; private final boolean create; @@ -141,11 +158,6 @@ public final class CompressingUtils { return this; } - public Builder setCharsetCandidates(Collection charsetCandidates) { - this.charsetCandidates = charsetCandidates; - return this; - } - public Builder setEncoding(Charset encoding) { this.encoding = encoding; return this; @@ -159,7 +171,7 @@ public final class CompressingUtils { public FileSystem build() throws IOException { if (autoDetectEncoding) { if (!testEncoding(zip, encoding)) { - encoding = findSuitableEncoding(zip, charsetCandidates); + encoding = findSuitableEncoding(zip); } } return createZipFileSystem(zip, create, useTempFile, encoding); diff --git a/HMCLCore/src/test/java/org/jackhuang/hmcl/util/io/CompressingUtilsTest.java b/HMCLCore/src/test/java/org/jackhuang/hmcl/util/io/CompressingUtilsTest.java new file mode 100644 index 000000000..f76f5b0a8 --- /dev/null +++ b/HMCLCore/src/test/java/org/jackhuang/hmcl/util/io/CompressingUtilsTest.java @@ -0,0 +1,41 @@ +package org.jackhuang.hmcl.util.io; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.stream.Stream; + +import static org.jackhuang.hmcl.util.Pair.pair; +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * @author Glavo + */ +public final class CompressingUtilsTest { + + public static Stream arguments() { + return Stream.of( + pair("utf-8.zip", StandardCharsets.UTF_8), + pair("gbk.zip", Charset.forName("GB18030")) + ).map(pair -> { + try { + return Arguments.of(Paths.get(CompressingUtilsTest.class.getResource("/zip/" + pair.getKey()).toURI()), pair.getValue()); + } catch (URISyntaxException e) { + throw new AssertionError(e); + } + }); + } + + @ParameterizedTest + @MethodSource("arguments") + public void testFindSuitableEncoding(Path path, Charset charset) throws IOException { + assertEquals(charset, CompressingUtils.findSuitableEncoding(path)); + } +} diff --git a/HMCLCore/src/test/resources/zip/gbk.zip b/HMCLCore/src/test/resources/zip/gbk.zip new file mode 100644 index 0000000000000000000000000000000000000000..cf94d7fcadf2c7ab47df2c2031e6fb3c1ca08dcd GIT binary patch literal 242 zcmWIWW@Zs#-~hq`Wvd7VB)|b=m!uY#=#^BIFf#;rvx5W}n4l^G@ansE;oK2i`WTr+ q7*K75nF+Q4YCeR5*&pCdtS$?nMwsmZ-mGjObC`fI2S^uyI1B*7jT