弃用 charsetCandidates (#2683)

* update

* create test
This commit is contained in:
Glavo
2024-01-26 04:37:54 +08:00
committed by GitHub
parent 7880c2f3d9
commit 351a59149f
4 changed files with 74 additions and 21 deletions

View File

@@ -88,30 +88,48 @@ public final class CompressingUtils {
}
public static Charset findSuitableEncoding(Path zipFile) throws IOException {
return findSuitableEncoding(zipFile, null);
}
public static Charset findSuitableEncoding(Path zipFile, Collection<Charset> candidates) throws IOException {
try (ZipFile zf = openZipFile(zipFile, StandardCharsets.UTF_8)) {
return findSuitableEncoding(zf, candidates);
return findSuitableEncoding(zf);
}
}
public static Charset findSuitableEncoding(ZipFile zipFile) throws IOException {
return findSuitableEncoding(zipFile, null);
}
public static Charset findSuitableEncoding(ZipFile zipFile, Collection<Charset> candidates) throws IOException {
if (testEncoding(zipFile, StandardCharsets.UTF_8)) return StandardCharsets.UTF_8;
if (OperatingSystem.NATIVE_CHARSET != StandardCharsets.UTF_8 && testEncoding(zipFile, OperatingSystem.NATIVE_CHARSET))
return OperatingSystem.NATIVE_CHARSET;
if (candidates == null)
candidates = Charset.availableCharsets().values();
String[] candidates = {
"GB18030",
"Big5",
"Shift_JIS",
"EUC-JP",
"ISO-2022-JP",
"EUC-KR",
"ISO-2022-KR",
"KOI8-R",
"windows-1251",
"x-MacCyrillic",
"IBM855",
"IBM866",
"windows-1252",
"ISO-8859-1",
"ISO-8859-5",
"ISO-8859-7",
"ISO-8859-8",
"UTF-16LE", "UTF-16BE",
"UTF-32LE", "UTF-32BE"
};
for (String candidate : candidates) {
try {
Charset charset = Charset.forName(candidate);
if (!charset.equals(OperatingSystem.NATIVE_CHARSET) && testEncoding(zipFile, charset)) {
return charset;
}
} catch (IllegalArgumentException ignored) {
}
}
for (Charset charset : candidates)
if (charset != null && testEncoding(zipFile, charset))
return charset;
throw new IOException("Cannot find suitable encoding for the zip.");
}
@@ -125,7 +143,6 @@ public final class CompressingUtils {
public static final class Builder {
private boolean autoDetectEncoding = false;
private Collection<Charset> charsetCandidates;
private Charset encoding = StandardCharsets.UTF_8;
private boolean useTempFile = false;
private final boolean create;
@@ -141,11 +158,6 @@ public final class CompressingUtils {
return this;
}
public Builder setCharsetCandidates(Collection<Charset> charsetCandidates) {
this.charsetCandidates = charsetCandidates;
return this;
}
public Builder setEncoding(Charset encoding) {
this.encoding = encoding;
return this;
@@ -159,7 +171,7 @@ public final class CompressingUtils {
public FileSystem build() throws IOException {
if (autoDetectEncoding) {
if (!testEncoding(zip, encoding)) {
encoding = findSuitableEncoding(zip, charsetCandidates);
encoding = findSuitableEncoding(zip);
}
}
return createZipFileSystem(zip, create, useTempFile, encoding);

View File

@@ -0,0 +1,41 @@
package org.jackhuang.hmcl.util.io;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.stream.Stream;
import static org.jackhuang.hmcl.util.Pair.pair;
import static org.junit.jupiter.api.Assertions.assertEquals;
/**
* @author Glavo
*/
public final class CompressingUtilsTest {
public static Stream<Arguments> arguments() {
return Stream.of(
pair("utf-8.zip", StandardCharsets.UTF_8),
pair("gbk.zip", Charset.forName("GB18030"))
).map(pair -> {
try {
return Arguments.of(Paths.get(CompressingUtilsTest.class.getResource("/zip/" + pair.getKey()).toURI()), pair.getValue());
} catch (URISyntaxException e) {
throw new AssertionError(e);
}
});
}
@ParameterizedTest
@MethodSource("arguments")
public void testFindSuitableEncoding(Path path, Charset charset) throws IOException {
assertEquals(charset, CompressingUtils.findSuitableEncoding(path));
}
}

Binary file not shown.

Binary file not shown.