弃用 charsetCandidates (#2683)

* update

* create test
This commit is contained in:
Glavo
2024-01-26 04:37:54 +08:00
committed by GitHub
parent 7880c2f3d9
commit 351a59149f
4 changed files with 74 additions and 21 deletions

View File

@@ -88,30 +88,48 @@ public final class CompressingUtils {
}
public static Charset findSuitableEncoding(Path zipFile) throws IOException {
return findSuitableEncoding(zipFile, null);
}
public static Charset findSuitableEncoding(Path zipFile, Collection<Charset> candidates) throws IOException {
try (ZipFile zf = openZipFile(zipFile, StandardCharsets.UTF_8)) {
return findSuitableEncoding(zf, candidates);
return findSuitableEncoding(zf);
}
}
public static Charset findSuitableEncoding(ZipFile zipFile) throws IOException {
return findSuitableEncoding(zipFile, null);
}
public static Charset findSuitableEncoding(ZipFile zipFile, Collection<Charset> candidates) throws IOException {
if (testEncoding(zipFile, StandardCharsets.UTF_8)) return StandardCharsets.UTF_8;
if (OperatingSystem.NATIVE_CHARSET != StandardCharsets.UTF_8 && testEncoding(zipFile, OperatingSystem.NATIVE_CHARSET))
return OperatingSystem.NATIVE_CHARSET;
if (candidates == null)
candidates = Charset.availableCharsets().values();
String[] candidates = {
"GB18030",
"Big5",
"Shift_JIS",
"EUC-JP",
"ISO-2022-JP",
"EUC-KR",
"ISO-2022-KR",
"KOI8-R",
"windows-1251",
"x-MacCyrillic",
"IBM855",
"IBM866",
"windows-1252",
"ISO-8859-1",
"ISO-8859-5",
"ISO-8859-7",
"ISO-8859-8",
"UTF-16LE", "UTF-16BE",
"UTF-32LE", "UTF-32BE"
};
for (String candidate : candidates) {
try {
Charset charset = Charset.forName(candidate);
if (!charset.equals(OperatingSystem.NATIVE_CHARSET) && testEncoding(zipFile, charset)) {
return charset;
}
} catch (IllegalArgumentException ignored) {
}
}
for (Charset charset : candidates)
if (charset != null && testEncoding(zipFile, charset))
return charset;
throw new IOException("Cannot find suitable encoding for the zip.");
}
@@ -125,7 +143,6 @@ public final class CompressingUtils {
public static final class Builder {
private boolean autoDetectEncoding = false;
private Collection<Charset> charsetCandidates;
private Charset encoding = StandardCharsets.UTF_8;
private boolean useTempFile = false;
private final boolean create;
@@ -141,11 +158,6 @@ public final class CompressingUtils {
return this;
}
public Builder setCharsetCandidates(Collection<Charset> charsetCandidates) {
this.charsetCandidates = charsetCandidates;
return this;
}
public Builder setEncoding(Charset encoding) {
this.encoding = encoding;
return this;
@@ -159,7 +171,7 @@ public final class CompressingUtils {
public FileSystem build() throws IOException {
if (autoDetectEncoding) {
if (!testEncoding(zip, encoding)) {
encoding = findSuitableEncoding(zip, charsetCandidates);
encoding = findSuitableEncoding(zip);
}
}
return createZipFileSystem(zip, create, useTempFile, encoding);