fix #1368: Guess Zip file encoding using Commons Compress
This commit is contained in:
@@ -17,22 +17,18 @@
|
|||||||
*/
|
*/
|
||||||
package org.jackhuang.hmcl.util.io;
|
package org.jackhuang.hmcl.util.io;
|
||||||
|
|
||||||
|
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
|
||||||
import org.apache.commons.compress.archivers.zip.ZipFile;
|
import org.apache.commons.compress.archivers.zip.ZipFile;
|
||||||
import org.jackhuang.hmcl.util.platform.OperatingSystem;
|
import org.jackhuang.hmcl.util.platform.OperatingSystem;
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.CharBuffer;
|
||||||
|
import java.nio.charset.*;
|
||||||
import java.nio.file.*;
|
import java.nio.file.*;
|
||||||
import java.nio.file.attribute.BasicFileAttributes;
|
|
||||||
import java.nio.file.spi.FileSystemProvider;
|
import java.nio.file.spi.FileSystemProvider;
|
||||||
import java.util.Collection;
|
import java.util.*;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
|
||||||
import java.util.zip.ZipError;
|
import java.util.zip.ZipError;
|
||||||
import java.util.zip.ZipException;
|
import java.util.zip.ZipException;
|
||||||
|
|
||||||
@@ -51,38 +47,42 @@ public final class CompressingUtils {
|
|||||||
private CompressingUtils() {
|
private CompressingUtils() {
|
||||||
}
|
}
|
||||||
|
|
||||||
@NotNull
|
private static CharsetDecoder newCharsetDecoder(Charset charset) {
|
||||||
private static FileVisitResult testZipPath(Path file, Path root, AtomicBoolean result) {
|
return charset.newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||||
try {
|
|
||||||
root.relativize(file).toString(); // throw IllegalArgumentException for wrong encoding.
|
|
||||||
return FileVisitResult.CONTINUE;
|
|
||||||
} catch (Exception e) {
|
|
||||||
result.set(false);
|
|
||||||
return FileVisitResult.TERMINATE;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean testEncoding(Path zipFile, Charset encoding) throws IOException {
|
public static boolean testEncoding(Path zipFile, Charset encoding) throws IOException {
|
||||||
AtomicBoolean result = new AtomicBoolean(true);
|
try (ZipFile zf = openZipFile(zipFile, encoding)) {
|
||||||
try (FileSystem fs = CompressingUtils.createReadOnlyZipFileSystem(zipFile, encoding)) {
|
return testEncoding(zf, encoding);
|
||||||
Path root = fs.getPath("/");
|
|
||||||
Files.walkFileTree(root, new SimpleFileVisitor<Path>() {
|
|
||||||
@Override
|
|
||||||
public FileVisitResult visitFile(Path file,
|
|
||||||
BasicFileAttributes attrs) {
|
|
||||||
return testZipPath(file, root, result);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FileVisitResult preVisitDirectory(Path dir,
|
|
||||||
BasicFileAttributes attrs) {
|
|
||||||
return testZipPath(dir, root, result);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
} catch (IllegalArgumentException e) {
|
|
||||||
throw new IOException(e);
|
|
||||||
}
|
}
|
||||||
return result.get();
|
}
|
||||||
|
|
||||||
|
public static boolean testEncoding(ZipFile zipFile, Charset encoding) throws IOException {
|
||||||
|
Enumeration<ZipArchiveEntry> entries = zipFile.getEntries();
|
||||||
|
CharsetDecoder cd = newCharsetDecoder(encoding);
|
||||||
|
CharBuffer cb = CharBuffer.allocate(32);
|
||||||
|
|
||||||
|
while (entries.hasMoreElements()) {
|
||||||
|
ZipArchiveEntry entry = entries.nextElement();
|
||||||
|
|
||||||
|
if (entry.getGeneralPurposeBit().usesUTF8ForNames()) continue;
|
||||||
|
|
||||||
|
cd.reset();
|
||||||
|
byte[] ba = entry.getRawName();
|
||||||
|
int clen = (int)(ba.length * cd.maxCharsPerByte());
|
||||||
|
if (clen == 0) continue;
|
||||||
|
if (clen <= cb.capacity())
|
||||||
|
cb.clear();
|
||||||
|
else
|
||||||
|
cb = CharBuffer.allocate(clen);
|
||||||
|
|
||||||
|
ByteBuffer bb = ByteBuffer.wrap(ba, 0, ba.length);
|
||||||
|
CoderResult cr = cd.decode(bb, cb, true);
|
||||||
|
if (!cr.isUnderflow()) return false;
|
||||||
|
cr = cd.flush(cb);
|
||||||
|
if (!cr.isUnderflow()) return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Charset findSuitableEncoding(Path zipFile) throws IOException {
|
public static Charset findSuitableEncoding(Path zipFile) throws IOException {
|
||||||
@@ -90,6 +90,16 @@ public final class CompressingUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static Charset findSuitableEncoding(Path zipFile, Collection<Charset> candidates) throws IOException {
|
public static Charset findSuitableEncoding(Path zipFile, Collection<Charset> candidates) throws IOException {
|
||||||
|
try (ZipFile zf = openZipFile(zipFile, StandardCharsets.UTF_8)) {
|
||||||
|
return findSuitableEncoding(zf, candidates);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Charset findSuitableEncoding(ZipFile zipFile) throws IOException {
|
||||||
|
return findSuitableEncoding(zipFile, Charset.availableCharsets().values());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Charset findSuitableEncoding(ZipFile zipFile, Collection<Charset> candidates) throws IOException {
|
||||||
if (testEncoding(zipFile, StandardCharsets.UTF_8)) return StandardCharsets.UTF_8;
|
if (testEncoding(zipFile, StandardCharsets.UTF_8)) return StandardCharsets.UTF_8;
|
||||||
if (OperatingSystem.NATIVE_CHARSET != StandardCharsets.UTF_8 && testEncoding(zipFile, OperatingSystem.NATIVE_CHARSET))
|
if (OperatingSystem.NATIVE_CHARSET != StandardCharsets.UTF_8 && testEncoding(zipFile, OperatingSystem.NATIVE_CHARSET))
|
||||||
return OperatingSystem.NATIVE_CHARSET;
|
return OperatingSystem.NATIVE_CHARSET;
|
||||||
@@ -100,6 +110,14 @@ public final class CompressingUtils {
|
|||||||
throw new IOException("Cannot find suitable encoding for the zip.");
|
throw new IOException("Cannot find suitable encoding for the zip.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static ZipFile openZipFile(Path zipFile) throws IOException {
|
||||||
|
return new ZipFile(Files.newByteChannel(zipFile));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ZipFile openZipFile(Path zipFile, Charset charset) throws IOException {
|
||||||
|
return new ZipFile(Files.newByteChannel(zipFile), charset.name());
|
||||||
|
}
|
||||||
|
|
||||||
public static final class Builder {
|
public static final class Builder {
|
||||||
private boolean autoDetectEncoding = false;
|
private boolean autoDetectEncoding = false;
|
||||||
private Collection<Charset> charsetCandidates;
|
private Collection<Charset> charsetCandidates;
|
||||||
@@ -212,7 +230,7 @@ public final class CompressingUtils {
|
|||||||
* @return the plain text content of given file.
|
* @return the plain text content of given file.
|
||||||
*/
|
*/
|
||||||
public static String readTextZipEntry(Path zipFile, String name, Charset encoding) throws IOException {
|
public static String readTextZipEntry(Path zipFile, String name, Charset encoding) throws IOException {
|
||||||
try (ZipFile s = new ZipFile(zipFile.toFile(), encoding.name())) {
|
try (ZipFile s = openZipFile(zipFile, encoding)) {
|
||||||
return IOUtils.readFullyAsString(s.getInputStream(s.getEntry(name)), StandardCharsets.UTF_8);
|
return IOUtils.readFullyAsString(s.getInputStream(s.getEntry(name)), StandardCharsets.UTF_8);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user