自动探测日志文件编码 (#3863)

This commit is contained in:
Glavo
2025-04-27 20:39:49 +08:00
committed by GitHub
parent e98c7dfef1
commit 05c5b53c52
3 changed files with 32 additions and 1 deletions

View File

@@ -16,6 +16,7 @@ dependencies {
api("com.github.steveice10:opennbt:1.5")
api("org.nanohttpd:nanohttpd:2.3.1")
api("org.jsoup:jsoup:1.19.1")
api("org.glavo:chardet:2.5.0")
compileOnlyApi("org.jetbrains:annotations:26.0.1")
if (JavaVersion.current().isJava8) {

View File

@@ -17,10 +17,20 @@
*/
package org.jackhuang.hmcl.util.io;
import org.glavo.chardet.DetectedCharset;
import org.glavo.chardet.UniversalDetector;
import java.io.*;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.zip.GZIPInputStream;
import static java.nio.charset.StandardCharsets.*;
import static org.jackhuang.hmcl.util.platform.OperatingSystem.NATIVE_CHARSET;
/**
* This utility class consists of some util methods operating on InputStream/OutputStream.
*
@@ -33,6 +43,25 @@ public final class IOUtils {
public static final int DEFAULT_BUFFER_SIZE = 8 * 1024;
public static BufferedReader newBufferedReaderMaybeNativeEncoding(Path file) throws IOException {
if (NATIVE_CHARSET == UTF_8)
return Files.newBufferedReader(file);
FileChannel channel = FileChannel.open(file);
try {
long oldPosition = channel.position();
DetectedCharset detectedCharset = UniversalDetector.detectCharset(channel);
Charset charset = detectedCharset != null && detectedCharset.isSupported()
&& (detectedCharset.getCharset() == UTF_8 || detectedCharset.getCharset() == US_ASCII)
? UTF_8 : NATIVE_CHARSET;
channel.position(oldPosition);
return new BufferedReader(new InputStreamReader(Channels.newInputStream(channel), charset));
} catch (Throwable e) {
closeQuietly(channel, e);
throw e;
}
}
/**
* Read all bytes to a buffer from given input stream. The stream will not be closed.
*