Now can download .pack.xz file for Forge library.

This commit is contained in:
huanghongxun
2015-07-28 20:36:41 +08:00
parent bc8cc93bd9
commit e31b5e3a28
103 changed files with 13147 additions and 460 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -47,7 +47,8 @@ public class DefaultGameLauncher extends GameLauncher {
final TaskWindow dw = TaskWindow.getInstance(); final TaskWindow dw = TaskWindow.getInstance();
ParallelTask parallelTask = new ParallelTask(); ParallelTask parallelTask = new ParallelTask();
for (DownloadLibraryJob s : t) { for (DownloadLibraryJob s : t) {
parallelTask.addDependsTask(new FileDownloadTask(s.url, s.path).setTag(s.name)); //parallelTask.addDependsTask(new FileDownloadTask(s.url, s.path).setTag(s.name));
parallelTask.addDependsTask(new LibraryDownloadTask(s));
} }
dw.addTask(parallelTask); dw.addTask(parallelTask);
boolean flag = true; boolean flag = true;

View File

@@ -0,0 +1,234 @@
/*
* Copyright 2013 huangyuhui <huanghongxun2008@126.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.
*/
package org.jackhuang.hellominecraft.launcher.launch;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Arrays;
import java.util.jar.JarEntry;
import java.util.jar.JarOutputStream;
import java.util.jar.Pack200;
import org.jackhuang.hellominecraft.C;
import org.jackhuang.hellominecraft.HMCLog;
import org.jackhuang.hellominecraft.tasks.Task;
import org.jackhuang.hellominecraft.tasks.download.NetException;
import org.jackhuang.hellominecraft.utils.system.FileUtils;
import org.jackhuang.hellominecraft.utils.system.IOUtils;
import org.tukaani.xz.XZInputStream;
/**
*
* @author huangyuhui
*/
public class LibraryDownloadTask extends Task {
private static final int MAX_BUFFER_SIZE = 2048;
GameLauncher.DownloadLibraryJob job;
public LibraryDownloadTask(GameLauncher.DownloadLibraryJob job) {
this.job = job;
}
@Override
public boolean executeTask() {
try {
File packFile = new File(job.path.getParentFile(), job.path.getName() + ".pack.xz");
if (job.url.contains("forge") && download(new URL(job.url + ".pack.xz"), packFile)) {
unpackLibrary(job.path, FileUtils.toByteArray(packFile));
packFile.delete();
return true;
} else {
return download(new URL(job.url), job.path);
}
} catch (Exception ex) {
setFailReason(ex);
return false;
}
}
InputStream stream;
RandomAccessFile file;
boolean shouldContinue = true, aborted = false;
int size = -1;
boolean download(URL url, File filePath) {
size = -1;
int downloaded = 0;
for (int repeat = 0; repeat < 6; repeat++) {
if (repeat > 0)
HMCLog.warn("Failed to download, repeat: " + repeat);
try {
// Open connection to URL.
HttpURLConnection connection
= (HttpURLConnection) url.openConnection();
connection.setConnectTimeout(5000);
connection.setRequestProperty("User-Agent", "Hello Minecraft! Launcher");
// Connect to server.
connection.connect();
// Make sure response code is in the 200 range.
if (connection.getResponseCode() / 100 != 2) {
setFailReason(new NetException(C.i18n("download.not_200") + " " + connection.getResponseCode()));
return false;
}
// Check for valid content length.
int contentLength = connection.getContentLength();
if (contentLength < 1) {
setFailReason(new NetException("The content length is invalid."));
return false;
}
// Set the size for this download if it hasn't been already set.
if (size == -1)
size = contentLength;
filePath.getParentFile().mkdirs();
File tempFile = new File(filePath.getAbsolutePath() + ".hmd");
if (!tempFile.exists())
tempFile.createNewFile();
// Open file and seek to the end of it.
file = new RandomAccessFile(tempFile, "rw");
file.seek(downloaded);
stream = connection.getInputStream();
while (true) {
// Size buffer according to how much of the file is left to download.
if (!shouldContinue) {
closeFiles();
filePath.delete();
break;
}
byte buffer[] = new byte[MAX_BUFFER_SIZE];
// Read from server into buffer.
int read = stream.read(buffer);
if (read == -1)
break;
// Write buffer to file.
file.write(buffer, 0, read);
downloaded += read;
if (ppl != null)
ppl.setProgress(this, downloaded, size);
}
closeFiles();
tempFile.renameTo(filePath);
if (ppl != null)
ppl.onProgressProviderDone(this);
return true;
} catch (Exception e) {
setFailReason(new NetException(C.i18n("download.failed") + " " + url, e));
} finally {
closeFiles();
}
}
return false;
}
public static void unpackLibrary(File output, byte[] data)
throws IOException {
HMCLog.log("Unpacking " + output);
if (output.exists())
output.delete();
byte[] decompressed = IOUtils.readFully(new XZInputStream(new ByteArrayInputStream(data)));
String end = new String(decompressed, decompressed.length - 4, 4);
if (!end.equals("SIGN")) {
HMCLog.log("Unpacking failed, signature missing " + end);
return;
}
int x = decompressed.length;
int len = decompressed[(x - 8)] & 0xFF | (decompressed[(x - 7)] & 0xFF) << 8 | (decompressed[(x - 6)] & 0xFF) << 16 | (decompressed[(x - 5)] & 0xFF) << 24;
File temp = File.createTempFile("art", ".pack");
HMCLog.log(" Signed");
HMCLog.log(" Checksum Length: " + len);
HMCLog.log(" Total Length: " + (decompressed.length - len - 8));
HMCLog.log(" Temp File: " + temp.getAbsolutePath());
byte[] checksums = Arrays.copyOfRange(decompressed, decompressed.length - len - 8, decompressed.length - 8);
try (OutputStream out = new FileOutputStream(temp)) {
out.write(decompressed, 0, decompressed.length - len - 8);
}
decompressed = null;
data = null;
System.gc();
try (FileOutputStream jarBytes = new FileOutputStream(output); JarOutputStream jos = new JarOutputStream(jarBytes)) {
Pack200.newUnpacker().unpack(temp, jos);
JarEntry checksumsFile = new JarEntry("checksums.sha1");
checksumsFile.setTime(0L);
jos.putNextEntry(checksumsFile);
jos.write(checksums);
jos.closeEntry();
}
temp.delete();
}
private void closeFiles() {
// Close file.
if (file != null)
try {
file.close();
file = null;
} catch (IOException e) {
HMCLog.warn("Failed to close file", e);
}
// Close connection to server.
if (stream != null)
try {
stream.close();
stream = null;
} catch (IOException e) {
HMCLog.warn("Failed to close stream", e);
}
}
@Override
public boolean abort() {
shouldContinue = false;
aborted = true;
return true;
}
@Override
public String getInfo() {
return C.i18n("download") + ": " + job.name;
}
}

View File

@@ -950,7 +950,7 @@ btnRefreshLiteLoader.addActionListener(new java.awt.event.ActionListener() {
MessageBox.Show(C.i18n("install.not_refreshed")); MessageBox.Show(C.i18n("install.not_refreshed"));
return; return;
} }
InstallerVersion v = forge.getVersion(idx);//forgeVersions.get(idx); InstallerVersion v = forge.getVersion(idx);
String url; String url;
File filepath = IOUtils.tryGetCanonicalFile(IOUtils.currentDirWithSeparator() + "forge-installer.jar"); File filepath = IOUtils.tryGetCanonicalFile(IOUtils.currentDirWithSeparator() + "forge-installer.jar");
if (v.installer != null) { if (v.installer != null) {

View File

@@ -16,6 +16,7 @@
*/ */
package org.jackhuang.hellominecraft.tasks; package org.jackhuang.hellominecraft.tasks;
import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
/** /**
@@ -65,6 +66,17 @@ public abstract class Task {
public void setParallelExecuting(boolean parallelExecuting) { public void setParallelExecuting(boolean parallelExecuting) {
this.parallelExecuting = parallelExecuting; this.parallelExecuting = parallelExecuting;
} }
ArrayList<DoingDoneListener<Task>> taskListener = new ArrayList();
public Task addTaskListener(DoingDoneListener<Task> l) {
taskListener.add(l);
return this;
}
public ArrayList<DoingDoneListener<Task>> getTaskListeners() {
return taskListener;
}
public abstract String getInfo(); public abstract String getInfo();

View File

@@ -116,16 +116,22 @@ public class TaskList extends Thread {
HMCLog.log("Executing task: " + t.getInfo()); HMCLog.log("Executing task: " + t.getInfo());
for (DoingDoneListener<Task> d : taskListener) for (DoingDoneListener<Task> d : taskListener)
d.onDoing(t); d.onDoing(t);
for (DoingDoneListener<Task> d : t.getTaskListeners())
d.onDoing(t);
if (t.executeTask()) { if (t.executeTask()) {
HMCLog.log("Task finished: " + t.getInfo()); HMCLog.log("Task finished: " + t.getInfo());
for (DoingDoneListener<Task> d : taskListener) for (DoingDoneListener<Task> d : taskListener)
d.onDone(t); d.onDone(t);
for (DoingDoneListener<Task> d : t.getTaskListeners())
d.onDone(t);
processTasks(t.getAfterTasks()); processTasks(t.getAfterTasks());
} else { } else {
HMCLog.err("Task failed: " + t.getInfo(), t.getFailReason()); HMCLog.err("Task failed: " + t.getInfo(), t.getFailReason());
for (DoingDoneListener<Task> d : taskListener) for (DoingDoneListener<Task> d : taskListener)
d.onFailed(t); d.onFailed(t);
for (DoingDoneListener<Task> d : t.getTaskListeners())
d.onFailed(t);
} }
} }

View File

@@ -68,9 +68,8 @@ public class FileDownloadTask extends Task implements PreviousResult<File>, Prev
sslContext.init(null, xtmArray, new java.security.SecureRandom()); sslContext.init(null, xtmArray, new java.security.SecureRandom());
} catch (GeneralSecurityException gse) { } catch (GeneralSecurityException gse) {
} }
if (sslContext != null) { if (sslContext != null)
HttpsURLConnection.setDefaultSSLSocketFactory(sslContext.getSocketFactory()); HttpsURLConnection.setDefaultSSLSocketFactory(sslContext.getSocketFactory());
}
HttpsURLConnection.setDefaultHostnameVerifier(hnv); HttpsURLConnection.setDefaultHostnameVerifier(hnv);
} }
@@ -110,37 +109,33 @@ public class FileDownloadTask extends Task implements PreviousResult<File>, Prev
private void closeFiles() { private void closeFiles() {
// Close file. // Close file.
if (file != null) { if (file != null)
try { try {
file.close(); file.close();
file = null; file = null;
} catch (IOException e) { } catch (IOException e) {
HMCLog.warn("Failed to close file", e); HMCLog.warn("Failed to close file", e);
} }
}
// Close connection to server. // Close connection to server.
if (stream != null) { if (stream != null)
try { try {
stream.close(); stream.close();
stream = null; stream = null;
} catch (IOException e) { } catch (IOException e) {
HMCLog.warn("Failed to close stream", e); HMCLog.warn("Failed to close stream", e);
} }
}
} }
// Download file. // Download file.
@Override @Override
public boolean executeTask() { public boolean executeTask() {
for (PreviousResult<String> p : al) { for (PreviousResult<String> p : al)
this.url = IOUtils.parseURL(p.getResult()); this.url = IOUtils.parseURL(p.getResult());
}
for (int repeat = 0; repeat < 6; repeat++) { for (int repeat = 0; repeat < 6; repeat++) {
if (repeat > 0) { if (repeat > 0)
HMCLog.warn("Failed to download, repeat: " + repeat); HMCLog.warn("Failed to download, repeat: " + repeat);
}
try { try {
// Open connection to URL. // Open connection to URL.
@@ -167,16 +162,14 @@ public class FileDownloadTask extends Task implements PreviousResult<File>, Prev
} }
// Set the size for this download if it hasn't been already set. // Set the size for this download if it hasn't been already set.
if (size == -1) { if (size == -1)
size = contentLength; size = contentLength;
}
filePath.getParentFile().mkdirs(); filePath.getParentFile().mkdirs();
File tempFile = new File(filePath.getAbsolutePath() + ".hmd"); File tempFile = new File(filePath.getAbsolutePath() + ".hmd");
if (!tempFile.exists()) { if (!tempFile.exists())
tempFile.createNewFile(); tempFile.createNewFile();
}
// Open file and seek to the end of it. // Open file and seek to the end of it.
file = new RandomAccessFile(tempFile, "rw"); file = new RandomAccessFile(tempFile, "rw");
@@ -195,23 +188,20 @@ public class FileDownloadTask extends Task implements PreviousResult<File>, Prev
// Read from server into buffer. // Read from server into buffer.
int read = stream.read(buffer); int read = stream.read(buffer);
if (read == -1) { if (read == -1)
break; break;
}
// Write buffer to file. // Write buffer to file.
file.write(buffer, 0, read); file.write(buffer, 0, read);
downloaded += read; downloaded += read;
if (ppl != null) { if (ppl != null)
ppl.setProgress(this, downloaded, size); ppl.setProgress(this, downloaded, size);
}
} }
closeFiles(); closeFiles();
tempFile.renameTo(filePath); tempFile.renameTo(filePath);
if (ppl != null) { if (ppl != null)
ppl.onProgressProviderDone(this); ppl.onProgressProviderDone(this);
}
return true; return true;
} catch (Exception e) { } catch (Exception e) {
setFailReason(new NetException(C.i18n("download.failed") + " " + url, e)); setFailReason(new NetException(C.i18n("download.failed") + " " + url, e));

View File

@@ -16,6 +16,7 @@
*/ */
package org.jackhuang.hellominecraft.utils.system; package org.jackhuang.hellominecraft.utils.system;
import java.io.ByteArrayOutputStream;
import java.io.File; import java.io.File;
import java.io.FileFilter; import java.io.FileFilter;
import java.io.FileInputStream; import java.io.FileInputStream;
@@ -25,6 +26,7 @@ import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import org.jackhuang.hellominecraft.HMCLog; import org.jackhuang.hellominecraft.HMCLog;
import org.jackhuang.hellominecraft.utils.NetUtils; import org.jackhuang.hellominecraft.utils.NetUtils;
@@ -73,7 +75,7 @@ public class FileUtils {
public static void cleanDirectory(File directory) public static void cleanDirectory(File directory)
throws IOException { throws IOException {
if (!directory.exists()) { if (!directory.exists()) {
//String message = directory + " does not exist"; //String message = directory + " does not exist";
//throw new IllegalArgumentException(message); //throw new IllegalArgumentException(message);
directory.mkdirs(); directory.mkdirs();
return; return;
@@ -429,4 +431,15 @@ public class FileUtils {
if (f.getName().endsWith(suffix)) al.add(f); if (f.getName().endsWith(suffix)) al.add(f);
return al.toArray(new File[0]); return al.toArray(new File[0]);
} }
public static byte[] toByteArray(File file) throws IOException {
try (FileInputStream is = new FileInputStream(file)) {
ByteArrayOutputStream os = new ByteArrayOutputStream();
int n;
byte[] b = new byte[1024];
while ((n = is.read(b)) != -1) os.write(b, 0, n);
os.close();
return os.toByteArray();
}
}
} }

View File

@@ -0,0 +1,36 @@
/*
* ARMOptions
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import org.tukaani.xz.simple.ARM;
/**
* BCJ filter for little endian ARM instructions.
*/
public class ARMOptions extends BCJOptions {
private static final int ALIGNMENT = 4;
public ARMOptions() {
super(ALIGNMENT);
}
public FinishableOutputStream getOutputStream(FinishableOutputStream out) {
return new SimpleOutputStream(out, new ARM(true, startOffset));
}
public InputStream getInputStream(InputStream in) {
return new SimpleInputStream(in, new ARM(false, startOffset));
}
FilterEncoder getFilterEncoder() {
return new BCJEncoder(this, BCJCoder.ARM_FILTER_ID);
}
}

View File

@@ -0,0 +1,36 @@
/*
* ARMThumbOptions
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import org.tukaani.xz.simple.ARMThumb;
/**
* BCJ filter for little endian ARM-Thumb instructions.
*/
public class ARMThumbOptions extends BCJOptions {
private static final int ALIGNMENT = 2;
public ARMThumbOptions() {
super(ALIGNMENT);
}
public FinishableOutputStream getOutputStream(FinishableOutputStream out) {
return new SimpleOutputStream(out, new ARMThumb(true, startOffset));
}
public InputStream getInputStream(InputStream in) {
return new SimpleInputStream(in, new ARMThumb(false, startOffset));
}
FilterEncoder getFilterEncoder() {
return new BCJEncoder(this, BCJCoder.ARMTHUMB_FILTER_ID);
}
}

View File

@@ -0,0 +1,35 @@
/*
* BCJCoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
abstract class BCJCoder implements FilterCoder {
public static final long X86_FILTER_ID = 0x04;
public static final long POWERPC_FILTER_ID = 0x05;
public static final long IA64_FILTER_ID = 0x06;
public static final long ARM_FILTER_ID = 0x07;
public static final long ARMTHUMB_FILTER_ID = 0x08;
public static final long SPARC_FILTER_ID = 0x09;
public static boolean isBCJFilterID(long filterID) {
return filterID >= 0x04 && filterID <= 0x09;
}
public boolean changesSize() {
return false;
}
public boolean nonLastOK() {
return true;
}
public boolean lastOK() {
return false;
}
}

View File

@@ -0,0 +1,62 @@
/*
* BCJDecoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import org.tukaani.xz.simple.*;
class BCJDecoder extends BCJCoder implements FilterDecoder {
private final long filterID;
private final int startOffset;
BCJDecoder(long filterID, byte[] props)
throws UnsupportedOptionsException {
assert isBCJFilterID(filterID);
this.filterID = filterID;
if (props.length == 0) {
startOffset = 0;
} else if (props.length == 4) {
int n = 0;
for (int i = 0; i < 4; ++i)
n |= (props[i] & 0xFF) << (i * 8);
startOffset = n;
} else {
throw new UnsupportedOptionsException(
"Unsupported BCJ filter properties");
}
}
public int getMemoryUsage() {
return SimpleInputStream.getMemoryUsage();
}
public InputStream getInputStream(InputStream in) {
SimpleFilter simpleFilter = null;
if (filterID == X86_FILTER_ID)
simpleFilter = new X86(false, startOffset);
else if (filterID == POWERPC_FILTER_ID)
simpleFilter = new PowerPC(false, startOffset);
else if (filterID == IA64_FILTER_ID)
simpleFilter = new IA64(false, startOffset);
else if (filterID == ARM_FILTER_ID)
simpleFilter = new ARM(false, startOffset);
else if (filterID == ARMTHUMB_FILTER_ID)
simpleFilter = new ARMThumb(false, startOffset);
else if (filterID == SPARC_FILTER_ID)
simpleFilter = new SPARC(false, startOffset);
else
assert false;
return new SimpleInputStream(in, simpleFilter);
}
}

View File

@@ -0,0 +1,48 @@
/*
* BCJEncoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
class BCJEncoder extends BCJCoder implements FilterEncoder {
private final BCJOptions options;
private final long filterID;
private final byte[] props;
BCJEncoder(BCJOptions options, long filterID) {
assert isBCJFilterID(filterID);
int startOffset = options.getStartOffset();
if (startOffset == 0) {
props = new byte[0];
} else {
props = new byte[4];
for (int i = 0; i < 4; ++i)
props[i] = (byte)(startOffset >>> (i * 8));
}
this.filterID = filterID;
this.options = (BCJOptions)options.clone();
}
public long getFilterID() {
return filterID;
}
public byte[] getFilterProps() {
return props;
}
public boolean supportsFlushing() {
return false;
}
public FinishableOutputStream getOutputStream(FinishableOutputStream out) {
return options.getOutputStream(out);
}
}

View File

@@ -0,0 +1,57 @@
/*
* BCJOptions
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
abstract class BCJOptions extends FilterOptions {
private final int alignment;
int startOffset = 0;
BCJOptions(int alignment) {
this.alignment = alignment;
}
/**
* Sets the start offset for the address conversions.
* Normally this is useless so you shouldn't use this function.
* The default value is <code>0</code>.
*/
public void setStartOffset(int startOffset)
throws UnsupportedOptionsException {
if ((startOffset & (alignment - 1)) != 0)
throw new UnsupportedOptionsException(
"Start offset must be a multiple of " + alignment);
this.startOffset = startOffset;
}
/**
* Gets the start offset.
*/
public int getStartOffset() {
return startOffset;
}
public int getEncoderMemoryUsage() {
return SimpleOutputStream.getMemoryUsage();
}
public int getDecoderMemoryUsage() {
return SimpleInputStream.getMemoryUsage();
}
public Object clone() {
try {
return super.clone();
} catch (CloneNotSupportedException e) {
assert false;
throw new RuntimeException();
}
}
}

View File

@@ -0,0 +1,278 @@
/*
* BlockInputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import java.io.DataInputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Arrays;
import org.tukaani.xz.common.DecoderUtil;
import org.tukaani.xz.check.Check;
class BlockInputStream extends InputStream {
private final DataInputStream inData;
private final CountingInputStream inCounted;
private InputStream filterChain;
private final Check check;
private long uncompressedSizeInHeader = -1;
private long compressedSizeInHeader = -1;
private long compressedSizeLimit;
private final int headerSize;
private long uncompressedSize = 0;
private boolean endReached = false;
private final byte[] tempBuf = new byte[1];
public BlockInputStream(InputStream in, Check check, int memoryLimit,
long unpaddedSizeInIndex,
long uncompressedSizeInIndex)
throws IOException, IndexIndicatorException {
this.check = check;
inData = new DataInputStream(in);
byte[] buf = new byte[DecoderUtil.BLOCK_HEADER_SIZE_MAX];
// Block Header Size or Index Indicator
inData.readFully(buf, 0, 1);
// See if this begins the Index field.
if (buf[0] == 0x00)
throw new IndexIndicatorException();
// Read the rest of the Block Header.
headerSize = 4 * ((buf[0] & 0xFF) + 1);
inData.readFully(buf, 1, headerSize - 1);
// Validate the CRC32.
if (!DecoderUtil.isCRC32Valid(buf, 0, headerSize - 4, headerSize - 4))
throw new CorruptedInputException("XZ Block Header is corrupt");
// Check for reserved bits in Block Flags.
if ((buf[1] & 0x3C) != 0)
throw new UnsupportedOptionsException(
"Unsupported options in XZ Block Header");
// Memory for the Filter Flags field
int filterCount = (buf[1] & 0x03) + 1;
long[] filterIDs = new long[filterCount];
byte[][] filterProps = new byte[filterCount][];
// Use a stream to parse the fields after the Block Flags field.
// Exclude the CRC32 field at the end.
ByteArrayInputStream bufStream = new ByteArrayInputStream(
buf, 2, headerSize - 6);
try {
// Set the maximum valid compressed size. This is overriden
// by the value from the Compressed Size field if it is present.
compressedSizeLimit = (DecoderUtil.VLI_MAX & ~3)
- headerSize - check.getSize();
// Decode and validate Compressed Size if the relevant flag
// is set in Block Flags.
if ((buf[1] & 0x40) != 0x00) {
compressedSizeInHeader = DecoderUtil.decodeVLI(bufStream);
if (compressedSizeInHeader == 0
|| compressedSizeInHeader > compressedSizeLimit)
throw new CorruptedInputException();
compressedSizeLimit = compressedSizeInHeader;
}
// Decode Uncompressed Size if the relevant flag is set
// in Block Flags.
if ((buf[1] & 0x80) != 0x00)
uncompressedSizeInHeader = DecoderUtil.decodeVLI(bufStream);
// Decode Filter Flags.
for (int i = 0; i < filterCount; ++i) {
filterIDs[i] = DecoderUtil.decodeVLI(bufStream);
long filterPropsSize = DecoderUtil.decodeVLI(bufStream);
if (filterPropsSize > bufStream.available())
throw new CorruptedInputException();
filterProps[i] = new byte[(int)filterPropsSize];
bufStream.read(filterProps[i]);
}
} catch (IOException e) {
throw new CorruptedInputException("XZ Block Header is corrupt");
}
// Check that the remaining bytes are zero.
for (int i = bufStream.available(); i > 0; --i)
if (bufStream.read() != 0x00)
throw new UnsupportedOptionsException(
"Unsupported options in XZ Block Header");
// Validate the Blcok Header against the Index when doing
// random access reading.
if (unpaddedSizeInIndex != -1) {
// Compressed Data must be at least one byte, so if Block Header
// and Check alone take as much or more space than the size
// stored in the Index, the file is corrupt.
int headerAndCheckSize = headerSize + check.getSize();
if (headerAndCheckSize >= unpaddedSizeInIndex)
throw new CorruptedInputException(
"XZ Index does not match a Block Header");
// The compressed size calculated from Unpadded Size must
// match the value stored in the Compressed Size field in
// the Block Header.
long compressedSizeFromIndex
= unpaddedSizeInIndex - headerAndCheckSize;
if (compressedSizeFromIndex > compressedSizeLimit
|| (compressedSizeInHeader != -1
&& compressedSizeInHeader != compressedSizeFromIndex))
throw new CorruptedInputException(
"XZ Index does not match a Block Header");
// The uncompressed size stored in the Index must match
// the value stored in the Uncompressed Size field in
// the Block Header.
if (uncompressedSizeInHeader != -1
&& uncompressedSizeInHeader != uncompressedSizeInIndex)
throw new CorruptedInputException(
"XZ Index does not match a Block Header");
// For further validation, pretend that the values from the Index
// were stored in the Block Header.
compressedSizeLimit = compressedSizeFromIndex;
compressedSizeInHeader = compressedSizeFromIndex;
uncompressedSizeInHeader = uncompressedSizeInIndex;
}
// Check if the Filter IDs are supported, decode
// the Filter Properties, and check that they are
// supported by this decoder implementation.
FilterDecoder[] filters = new FilterDecoder[filterIDs.length];
for (int i = 0; i < filters.length; ++i) {
if (filterIDs[i] == LZMA2Coder.FILTER_ID)
filters[i] = new LZMA2Decoder(filterProps[i]);
else if (filterIDs[i] == DeltaCoder.FILTER_ID)
filters[i] = new DeltaDecoder(filterProps[i]);
else if (BCJDecoder.isBCJFilterID(filterIDs[i]))
filters[i] = new BCJDecoder(filterIDs[i], filterProps[i]);
else
throw new UnsupportedOptionsException(
"Unknown Filter ID " + filterIDs[i]);
}
RawCoder.validate(filters);
// Check the memory usage limit.
if (memoryLimit >= 0) {
int memoryNeeded = 0;
for (int i = 0; i < filters.length; ++i)
memoryNeeded += filters[i].getMemoryUsage();
if (memoryNeeded > memoryLimit)
throw new MemoryLimitException(memoryNeeded, memoryLimit);
}
// Use an input size counter to calculate
// the size of the Compressed Data field.
inCounted = new CountingInputStream(in);
// Initialize the filter chain.
filterChain = inCounted;
for (int i = filters.length - 1; i >= 0; --i)
filterChain = filters[i].getInputStream(filterChain);
}
public int read() throws IOException {
return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
}
public int read(byte[] buf, int off, int len) throws IOException {
if (endReached)
return -1;
int ret = filterChain.read(buf, off, len);
if (ret > 0) {
check.update(buf, off, ret);
uncompressedSize += ret;
// Catch invalid values.
long compressedSize = inCounted.getSize();
if (compressedSize < 0
|| compressedSize > compressedSizeLimit
|| uncompressedSize < 0
|| (uncompressedSizeInHeader != -1
&& uncompressedSize > uncompressedSizeInHeader))
throw new CorruptedInputException();
// Check the Block integrity as soon as possible:
// - The filter chain shouldn't return less than requested
// unless it hit the end of the input.
// - If the uncompressed size is known, we know when there
// shouldn't be more data coming. We still need to read
// one byte to let the filter chain catch errors and to
// let it read end of payload marker(s).
if (ret < len || uncompressedSize == uncompressedSizeInHeader) {
if (filterChain.read() != -1)
throw new CorruptedInputException();
validate();
endReached = true;
}
} else if (ret == -1) {
validate();
endReached = true;
}
return ret;
}
private void validate() throws IOException {
long compressedSize = inCounted.getSize();
// Validate Compressed Size and Uncompressed Size if they were
// present in Block Header.
if ((compressedSizeInHeader != -1
&& compressedSizeInHeader != compressedSize)
|| (uncompressedSizeInHeader != -1
&& uncompressedSizeInHeader != uncompressedSize))
throw new CorruptedInputException();
// Block Padding bytes must be zeros.
while ((compressedSize++ & 3) != 0)
if (inData.readUnsignedByte() != 0x00)
throw new CorruptedInputException();
// Validate the integrity check.
byte[] storedCheck = new byte[check.getSize()];
inData.readFully(storedCheck);
if (!Arrays.equals(check.finish(), storedCheck))
throw new CorruptedInputException("Integrity check ("
+ check.getName() + ") does not match");
}
public int available() throws IOException {
return filterChain.available();
}
public long getUnpaddedSize() {
return headerSize + inCounted.getSize() + check.getSize();
}
public long getUncompressedSize() {
return uncompressedSize;
}
}

View File

@@ -0,0 +1,134 @@
/*
* BlockOutputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.OutputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import org.tukaani.xz.common.EncoderUtil;
import org.tukaani.xz.check.Check;
class BlockOutputStream extends FinishableOutputStream {
private final OutputStream out;
private final CountingOutputStream outCounted;
private FinishableOutputStream filterChain;
private final Check check;
private final int headerSize;
private final long compressedSizeLimit;
private long uncompressedSize = 0;
private final byte[] tempBuf = new byte[1];
public BlockOutputStream(OutputStream out, FilterEncoder[] filters,
Check check) throws IOException {
this.out = out;
this.check = check;
// Initialize the filter chain.
outCounted = new CountingOutputStream(out);
filterChain = outCounted;
for (int i = filters.length - 1; i >= 0; --i)
filterChain = filters[i].getOutputStream(filterChain);
// Prepare to encode the Block Header field.
ByteArrayOutputStream bufStream = new ByteArrayOutputStream();
// Write a dummy Block Header Size field. The real value is written
// once everything else except CRC32 has been written.
bufStream.write(0x00);
// Write Block Flags. Storing Compressed Size or Uncompressed Size
// isn't supported for now.
bufStream.write(filters.length - 1);
// List of Filter Flags
for (int i = 0; i < filters.length; ++i) {
EncoderUtil.encodeVLI(bufStream, filters[i].getFilterID());
byte[] filterProps = filters[i].getFilterProps();
EncoderUtil.encodeVLI(bufStream, filterProps.length);
bufStream.write(filterProps);
}
// Header Padding
while ((bufStream.size() & 3) != 0)
bufStream.write(0x00);
byte[] buf = bufStream.toByteArray();
// Total size of the Block Header: Take the size of the CRC32 field
// into account.
headerSize = buf.length + 4;
// This is just a sanity check.
if (headerSize > EncoderUtil.BLOCK_HEADER_SIZE_MAX)
throw new UnsupportedOptionsException();
// Block Header Size
buf[0] = (byte)(buf.length / 4);
// Write the Block Header field to the output stream.
out.write(buf);
EncoderUtil.writeCRC32(out, buf);
// Calculate the maximum allowed size of the Compressed Data field.
// It is hard to exceed it so this is mostly to be pedantic.
compressedSizeLimit = (EncoderUtil.VLI_MAX & ~3)
- headerSize - check.getSize();
}
public void write(int b) throws IOException {
tempBuf[0] = (byte)b;
write(tempBuf, 0, 1);
}
public void write(byte[] buf, int off, int len) throws IOException {
filterChain.write(buf, off, len);
check.update(buf, off, len);
uncompressedSize += len;
validate();
}
public void flush() throws IOException {
filterChain.flush();
validate();
}
public void finish() throws IOException {
// Finish the Compressed Data field.
filterChain.finish();
validate();
// Block Padding
for (long i = outCounted.getSize(); (i & 3) != 0; ++i)
out.write(0x00);
// Check
out.write(check.finish());
}
private void validate() throws IOException {
long compressedSize = outCounted.getSize();
// It is very hard to trigger this exception.
// This is just to be pedantic.
if (compressedSize < 0 || compressedSize > compressedSizeLimit
|| uncompressedSize < 0)
throw new XZIOException("XZ Stream has grown too big");
}
public long getUnpaddedSize() {
return headerSize + outCounted.getSize() + check.getSize();
}
public long getUncompressedSize() {
return uncompressedSize;
}
}

View File

@@ -0,0 +1,37 @@
/*
* CorruptedInputException
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
/**
* Thrown when the compressed input data is corrupt.
* However, it is possible that some or all of the data
* already read from the input stream was corrupt too.
*/
public class CorruptedInputException extends XZIOException {
private static final long serialVersionUID = 3L;
/**
* Creates a new CorruptedInputException with
* the default error detail message.
*/
public CorruptedInputException() {
super("Compressed data is corrupt");
}
/**
* Creates a new CorruptedInputException with
* the specified error detail message.
*
* @param s error detail message
*/
public CorruptedInputException(String s) {
super(s);
}
}

View File

@@ -0,0 +1,45 @@
/*
* CountingInputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.FilterInputStream;
import java.io.InputStream;
import java.io.IOException;
/**
* Counts the number of bytes read from an input stream.
*/
class CountingInputStream extends FilterInputStream {
private long size = 0;
public CountingInputStream(InputStream in) {
super(in);
}
public int read() throws IOException {
int ret = in.read();
if (ret != -1 && size >= 0)
++size;
return ret;
}
public int read(byte[] b, int off, int len) throws IOException {
int ret = in.read(b, off, len);
if (ret > 0 && size >= 0)
size += ret;
return ret;
}
public long getSize() {
return size;
}
}

View File

@@ -0,0 +1,54 @@
/*
* CountingOutputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.OutputStream;
import java.io.IOException;
/**
* Counts the number of bytes written to an output stream.
* <p>
* The <code>finish</code> method does nothing.
* This is <code>FinishableOutputStream</code> instead
* of <code>OutputStream</code> solely because it allows
* using this as the output stream for a chain of raw filters.
*/
class CountingOutputStream extends FinishableOutputStream {
private final OutputStream out;
private long size = 0;
public CountingOutputStream(OutputStream out) {
this.out = out;
}
public void write(int b) throws IOException {
out.write(b);
if (size >= 0)
++size;
}
public void write(byte[] b, int off, int len) throws IOException {
out.write(b, off, len);
if (size >= 0)
size += len;
}
public void flush() throws IOException {
out.flush();
}
public void close() throws IOException {
out.close();
}
public long getSize() {
return size;
}
}

View File

@@ -0,0 +1,26 @@
/*
* DeltaCoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
abstract class DeltaCoder implements FilterCoder {
public static final long FILTER_ID = 0x03;
public boolean changesSize() {
return false;
}
public boolean nonLastOK() {
return true;
}
public boolean lastOK() {
return false;
}
}

View File

@@ -0,0 +1,32 @@
/*
* DeltaDecoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
class DeltaDecoder extends DeltaCoder implements FilterDecoder {
private final int distance;
DeltaDecoder(byte[] props) throws UnsupportedOptionsException {
if (props.length != 1)
throw new UnsupportedOptionsException(
"Unsupported Delta filter properties");
distance = (props[0] & 0xFF) + 1;
}
public int getMemoryUsage() {
return 1;
}
public InputStream getInputStream(InputStream in) {
return new DeltaInputStream(in, distance);
}
}

View File

@@ -0,0 +1,36 @@
/*
* DeltaEncoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
class DeltaEncoder extends DeltaCoder implements FilterEncoder {
private final DeltaOptions options;
private final byte[] props = new byte[1];
DeltaEncoder(DeltaOptions options) {
props[0] = (byte)(options.getDistance() - 1);
this.options = (DeltaOptions)options.clone();
}
public long getFilterID() {
return FILTER_ID;
}
public byte[] getFilterProps() {
return props;
}
public boolean supportsFlushing() {
return true;
}
public FinishableOutputStream getOutputStream(FinishableOutputStream out) {
return options.getOutputStream(out);
}
}

View File

@@ -0,0 +1,146 @@
/*
* DeltaInputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import java.io.IOException;
import org.tukaani.xz.delta.DeltaDecoder;
/**
* Decodes raw Delta-filtered data (no XZ headers).
* <p>
* The delta filter doesn't change the size of the data and thus it
* cannot have an end-of-payload marker. It will simply decode until
* its input stream indicates end of input.
*/
public class DeltaInputStream extends InputStream {
/**
* Smallest supported delta calculation distance.
*/
public static final int DISTANCE_MIN = 1;
/**
* Largest supported delta calculation distance.
*/
public static final int DISTANCE_MAX = 256;
private InputStream in;
private final DeltaDecoder delta;
private IOException exception = null;
private final byte[] tempBuf = new byte[1];
/**
* Creates a new Delta decoder with the given delta calculation distance.
*
* @param in input stream from which Delta filtered data
* is read
*
* @param distance delta calculation distance, must be in the
* range [<code>DISTANCE_MIN</code>,
* <code>DISTANCE_MAX</code>]
*/
public DeltaInputStream(InputStream in, int distance) {
// Check for null because otherwise null isn't detect
// in this constructor.
if (in == null)
throw new NullPointerException();
this.in = in;
this.delta = new DeltaDecoder(distance);
}
/**
* Decode the next byte from this input stream.
*
* @return the next decoded byte, or <code>-1</code> to indicate
* the end of input on the input stream <code>in</code>
*
* @throws IOException may be thrown by <code>in</code>
*/
public int read() throws IOException {
return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
}
/**
* Decode into an array of bytes.
* <p>
* This calls <code>in.read(buf, off, len)</code> and defilters the
* returned data.
*
* @param buf target buffer for decoded data
* @param off start offset in <code>buf</code>
* @param len maximum number of bytes to read
*
* @return number of bytes read, or <code>-1</code> to indicate
* the end of the input stream <code>in</code>
*
* @throws XZIOException if the stream has been closed
*
* @throws IOException may be thrown by underlaying input
* stream <code>in</code>
*/
public int read(byte[] buf, int off, int len) throws IOException {
if (len == 0)
return 0;
if (in == null)
throw new XZIOException("Stream closed");
if (exception != null)
throw exception;
int size;
try {
size = in.read(buf, off, len);
} catch (IOException e) {
exception = e;
throw e;
}
if (size == -1)
return -1;
delta.decode(buf, off, size);
return size;
}
/**
* Calls <code>in.available()</code>.
*
* @return the value returned by <code>in.available()</code>
*/
public int available() throws IOException {
if (in == null)
throw new XZIOException("Stream closed");
if (exception != null)
throw exception;
return in.available();
}
/**
* Closes the stream and calls <code>in.close()</code>.
* If the stream was already closed, this does nothing.
*
* @throws IOException if thrown by <code>in.close()</code>
*/
public void close() throws IOException {
if (in != null) {
try {
in.close();
} finally {
in = null;
}
}
}
}

View File

@@ -0,0 +1,102 @@
/*
* DeltaOptions
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
/**
* Delta filter options. The Delta filter can be used only as a non-last
* filter in the chain, for example Delta + LZMA2.
* <p>
* Currently only simple byte-wise delta is supported. The only option
* is the delta distance, which you should set to match your data.
* It's not possible to provide a generic default value for it.
* <p>
* For example, with distance = 2 and eight-byte input
* A1 B1 A2 B3 A3 B5 A4 B7, the output will be A1 B1 01 02 01 02 01 02.
* <p>
* The Delta filter can be good with uncompressed bitmap images. It can
* also help with PCM audio, although special-purpose compressors like
* FLAC will give much smaller result at much better compression speed.
*/
public class DeltaOptions extends FilterOptions {
/**
* Smallest supported delta calculation distance.
*/
public static final int DISTANCE_MIN = 1;
/**
* Largest supported delta calculation distance.
*/
public static final int DISTANCE_MAX = 256;
private int distance = DISTANCE_MIN;
/**
* Creates new Delta options and sets the delta distance to 1 byte.
*/
public DeltaOptions() {}
/**
* Creates new Delta options and sets the distance to the given value.
*/
public DeltaOptions(int distance) throws UnsupportedOptionsException {
setDistance(distance);
}
/**
* Sets the delta distance in bytes. The new distance must be in
* the range [DISTANCE_MIN, DISTANCE_MAX].
*/
public void setDistance(int distance) throws UnsupportedOptionsException {
if (distance < DISTANCE_MIN || distance > DISTANCE_MAX)
throw new UnsupportedOptionsException(
"Delta distance must be in the range [" + DISTANCE_MIN
+ ", " + DISTANCE_MAX + "]: " + distance);
this.distance = distance;
}
/**
* Gets the delta distance.
*/
public int getDistance() {
return distance;
}
public int getEncoderMemoryUsage() {
return DeltaOutputStream.getMemoryUsage();
}
public FinishableOutputStream getOutputStream(FinishableOutputStream out) {
return new DeltaOutputStream(out, this);
}
public int getDecoderMemoryUsage() {
return 1;
}
public InputStream getInputStream(InputStream in) {
return new DeltaInputStream(in, distance);
}
FilterEncoder getFilterEncoder() {
return new DeltaEncoder(this);
}
public Object clone() {
try {
return super.clone();
} catch (CloneNotSupportedException e) {
assert false;
throw new RuntimeException();
}
}
}

View File

@@ -0,0 +1,113 @@
/*
* DeltaOutputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.IOException;
import org.tukaani.xz.delta.DeltaEncoder;
class DeltaOutputStream extends FinishableOutputStream {
private static final int FILTER_BUF_SIZE = 4096;
private FinishableOutputStream out;
private final DeltaEncoder delta;
private final byte[] filterBuf = new byte[FILTER_BUF_SIZE];
private boolean finished = false;
private IOException exception = null;
private final byte[] tempBuf = new byte[1];
static int getMemoryUsage() {
return 1 + FILTER_BUF_SIZE / 1024;
}
DeltaOutputStream(FinishableOutputStream out, DeltaOptions options) {
this.out = out;
delta = new DeltaEncoder(options.getDistance());
}
public void write(int b) throws IOException {
tempBuf[0] = (byte)b;
write(tempBuf, 0, 1);
}
public void write(byte[] buf, int off, int len) throws IOException {
if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
throw new IndexOutOfBoundsException();
if (exception != null)
throw exception;
if (finished)
throw new XZIOException("Stream finished");
try {
while (len > FILTER_BUF_SIZE) {
delta.encode(buf, off, FILTER_BUF_SIZE, filterBuf);
out.write(filterBuf);
off += FILTER_BUF_SIZE;
len -= FILTER_BUF_SIZE;
}
delta.encode(buf, off, len, filterBuf);
out.write(filterBuf, 0, len);
} catch (IOException e) {
exception = e;
throw e;
}
}
public void flush() throws IOException {
if (exception != null)
throw exception;
if (finished)
throw new XZIOException("Stream finished or closed");
try {
out.flush();
} catch (IOException e) {
exception = e;
throw e;
}
}
public void finish() throws IOException {
if (!finished) {
if (exception != null)
throw exception;
try {
out.finish();
} catch (IOException e) {
exception = e;
throw e;
}
finished = true;
}
}
public void close() throws IOException {
if (out != null) {
try {
out.close();
} catch (IOException e) {
if (exception == null)
exception = e;
}
out = null;
}
if (exception != null)
throw exception;
}
}

View File

@@ -0,0 +1,16 @@
/*
* FilterCoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
interface FilterCoder {
boolean changesSize();
boolean nonLastOK();
boolean lastOK();
}

View File

@@ -0,0 +1,17 @@
/*
* FilterDecoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
interface FilterDecoder extends FilterCoder {
int getMemoryUsage();
InputStream getInputStream(InputStream in);
}

View File

@@ -0,0 +1,17 @@
/*
* FilterEncoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
interface FilterEncoder extends FilterCoder {
long getFilterID();
byte[] getFilterProps();
boolean supportsFlushing();
FinishableOutputStream getOutputStream(FinishableOutputStream out);
}

View File

@@ -0,0 +1,80 @@
/*
* FilterOptions
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import java.io.IOException;
/**
* Base class for filter-specific options classes.
*/
public abstract class FilterOptions implements Cloneable {
/**
* Gets how much memory the encoder will need with
* the given filter chain. This function simply calls
* <code>getEncoderMemoryUsage()</code> for every filter
* in the array and returns the sum of the returned values.
*/
public static int getEncoderMemoryUsage(FilterOptions[] options) {
int m = 0;
for (int i = 0; i < options.length; ++i)
m += options[i].getEncoderMemoryUsage();
return m;
}
/**
* Gets how much memory the decoder will need with
* the given filter chain. This function simply calls
* <code>getDecoderMemoryUsage()</code> for every filter
* in the array and returns the sum of the returned values.
*/
public static int getDecoderMemoryUsage(FilterOptions[] options) {
int m = 0;
for (int i = 0; i < options.length; ++i)
m += options[i].getDecoderMemoryUsage();
return m;
}
/**
* Gets how much memory the encoder will need with these options.
*/
public abstract int getEncoderMemoryUsage();
/**
* Gets a raw (no XZ headers) encoder output stream using these options.
* Raw streams are an advanced feature. In most cases you want to store
* the compressed data in the .xz container format instead of using
* a raw stream. To use this filter in a .xz file, pass this object
* to XZOutputStream.
*/
public abstract FinishableOutputStream getOutputStream(
FinishableOutputStream out);
/**
* Gets how much memory the decoder will need to decompress the data
* that was encoded with these options.
*/
public abstract int getDecoderMemoryUsage();
/**
* Gets a raw (no XZ headers) decoder input stream using these options.
*/
public abstract InputStream getInputStream(InputStream in)
throws IOException;
abstract FilterEncoder getFilterEncoder();
FilterOptions() {}
}

View File

@@ -0,0 +1,31 @@
/*
* FinishableOutputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.OutputStream;
import java.io.IOException;
/**
* Output stream that supports finishing without closing
* the underlying stream.
*/
public abstract class FinishableOutputStream extends OutputStream {
/**
* Finish the stream without closing the underlying stream.
* No more data may be written to the stream after finishing.
* <p>
* The <code>finish</code> method of <code>FinishableOutputStream</code>
* does nothing. Subclasses should override it if they need finishing
* support, which is the case, for example, with compressors.
*
* @throws IOException
*/
public void finish() throws IOException {};
}

View File

@@ -0,0 +1,70 @@
/*
* FinishableWrapperOutputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.OutputStream;
import java.io.IOException;
/**
* Wraps an output stream to a finishable output stream for use with
* raw encoders. This is not needed for XZ compression and thus most
* people will never need this.
*/
public class FinishableWrapperOutputStream extends FinishableOutputStream {
/**
* The {@link java.io.OutputStream OutputStream} that has been
* wrapped into a FinishableWrapperOutputStream.
*/
protected OutputStream out;
/**
* Creates a new output stream which support finishing.
* The <code>finish()</code> method will do nothing.
*/
public FinishableWrapperOutputStream(OutputStream out) {
this.out = out;
}
/**
* Calls {@link java.io.OutputStream#write(int) out.write(b)}.
*/
public void write(int b) throws IOException {
out.write(b);
}
/**
* Calls {@link java.io.OutputStream#write(byte[]) out.write(buf)}.
*/
public void write(byte[] buf) throws IOException {
out.write(buf);
}
/**
* Calls {@link java.io.OutputStream#write(byte[],int,int)
out.write(buf, off, len)}.
*/
public void write(byte[] buf, int off, int len) throws IOException {
out.write(buf, off, len);
}
/**
* Calls {@link java.io.OutputStream#flush() out.flush()}.
*/
public void flush() throws IOException {
out.flush();
}
/**
* Calls {@link java.io.OutputStream#close() out.close()}.
*/
public void close() throws IOException {
out.close();
}
}

View File

@@ -0,0 +1,36 @@
/*
* IA64Options
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import org.tukaani.xz.simple.IA64;
/**
* BCJ filter for Itanium (IA-64) instructions.
*/
public class IA64Options extends BCJOptions {
private static final int ALIGNMENT = 16;
public IA64Options() {
super(ALIGNMENT);
}
public FinishableOutputStream getOutputStream(FinishableOutputStream out) {
return new SimpleOutputStream(out, new IA64(true, startOffset));
}
public InputStream getInputStream(InputStream in) {
return new SimpleInputStream(in, new IA64(false, startOffset));
}
FilterEncoder getFilterEncoder() {
return new BCJEncoder(this, BCJCoder.IA64_FILTER_ID);
}
}

View File

@@ -0,0 +1,14 @@
/*
* IndexIndicatorException
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
class IndexIndicatorException extends Exception {
private static final long serialVersionUID = 1L;
}

View File

@@ -0,0 +1,26 @@
/*
* LZMA2Coder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
abstract class LZMA2Coder implements FilterCoder {
public static final long FILTER_ID = 0x21;
public boolean changesSize() {
return true;
}
public boolean nonLastOK() {
return false;
}
public boolean lastOK() {
return true;
}
}

View File

@@ -0,0 +1,35 @@
/*
* LZMA2Decoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
class LZMA2Decoder extends LZMA2Coder implements FilterDecoder {
private int dictSize;
LZMA2Decoder(byte[] props) throws UnsupportedOptionsException {
// Up to 1.5 GiB dictionary is supported. The bigger ones
// are too big for int.
if (props.length != 1 || (props[0] & 0xFF) > 37)
throw new UnsupportedOptionsException(
"Unsupported LZMA2 properties");
dictSize = 2 | (props[0] & 1);
dictSize <<= (props[0] >>> 1) + 11;
}
public int getMemoryUsage() {
return LZMA2InputStream.getMemoryUsage(dictSize);
}
public InputStream getInputStream(InputStream in) {
return new LZMA2InputStream(in, dictSize);
}
}

View File

@@ -0,0 +1,50 @@
/*
* LZMA2Encoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import org.tukaani.xz.lzma.LZMAEncoder;
class LZMA2Encoder extends LZMA2Coder implements FilterEncoder {
private final LZMA2Options options;
private final byte[] props = new byte[1];
LZMA2Encoder(LZMA2Options options) {
if (options.getPresetDict() != null)
throw new IllegalArgumentException(
"XZ doesn't support a preset dictionary for now");
if (options.getMode() == LZMA2Options.MODE_UNCOMPRESSED) {
props[0] = (byte)0;
} else {
int d = Math.max(options.getDictSize(),
LZMA2Options.DICT_SIZE_MIN);
props[0] = (byte)(LZMAEncoder.getDistSlot(d - 1) - 23);
}
// Make a private copy so that the caller is free to change its copy.
this.options = (LZMA2Options)options.clone();
}
public long getFilterID() {
return FILTER_ID;
}
public byte[] getFilterProps() {
return props;
}
public boolean supportsFlushing() {
return true;
}
public FinishableOutputStream getOutputStream(FinishableOutputStream out) {
return options.getOutputStream(out);
}
}

View File

@@ -0,0 +1,358 @@
/*
* LZMA2InputStream
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import java.io.DataInputStream;
import java.io.IOException;
import org.tukaani.xz.lz.LZDecoder;
import org.tukaani.xz.rangecoder.RangeDecoderFromBuffer;
import org.tukaani.xz.lzma.LZMADecoder;
/**
* Decompresses a raw LZMA2 stream (no XZ headers).
*/
public class LZMA2InputStream extends InputStream {
/**
* Smallest valid LZMA2 dictionary size.
* <p>
* Very tiny dictionaries would be a performance problem, so
* the minimum is 4 KiB.
*/
public static final int DICT_SIZE_MIN = 4096;
/**
* Largest dictionary size supported by this implementation.
* <p>
* The LZMA2 algorithm allows dictionaries up to one byte less than 4 GiB.
* This implementation supports only 16 bytes less than 2 GiB for raw
* LZMA2 streams, and for .xz files the maximum is 1.5 GiB. This
* limitation is due to Java using signed 32-bit integers for array
* indexing. The limitation shouldn't matter much in practice since so
* huge dictionaries are not normally used.
*/
public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15;
private static final int COMPRESSED_SIZE_MAX = 1 << 16;
private DataInputStream in;
private final LZDecoder lz;
private final RangeDecoderFromBuffer rc
= new RangeDecoderFromBuffer(COMPRESSED_SIZE_MAX);
private LZMADecoder lzma;
private int uncompressedSize = 0;
private boolean isLZMAChunk;
private boolean needDictReset = true;
private boolean needProps = true;
private boolean endReached = false;
private IOException exception = null;
private final byte[] tempBuf = new byte[1];
/**
* Gets approximate decompressor memory requirements as kibibytes for
* the given dictionary size.
*
* @param dictSize LZMA2 dictionary size as bytes, must be
* in the range [<code>DICT_SIZE_MIN</code>,
* <code>DICT_SIZE_MAX</code>]
*
* @return approximate memory requirements as kibibytes (KiB)
*/
public static int getMemoryUsage(int dictSize) {
// The base state is around 30-40 KiB (probabilities etc.),
// range decoder needs COMPRESSED_SIZE_MAX bytes for buffering,
// and LZ decoder needs a dictionary buffer.
return 40 + COMPRESSED_SIZE_MAX / 1024 + getDictSize(dictSize) / 1024;
}
private static int getDictSize(int dictSize) {
if (dictSize < DICT_SIZE_MIN || dictSize > DICT_SIZE_MAX)
throw new IllegalArgumentException(
"Unsupported dictionary size " + dictSize);
// Round dictionary size upward to a multiple of 16. This way LZMA
// can use LZDecoder.getPos() for calculating LZMA's posMask.
// Note that this check is needed only for raw LZMA2 streams; it is
// redundant with .xz.
return (dictSize + 15) & ~15;
}
/**
* Creates a new input stream that decompresses raw LZMA2 data
* from <code>in</code>.
* <p>
* The caller needs to know the dictionary size used when compressing;
* the dictionary size isn't stored as part of a raw LZMA2 stream.
* <p>
* Specifying a too small dictionary size will prevent decompressing
* the stream. Specifying a too big dictionary is waste of memory but
* decompression will work.
* <p>
* There is no need to specify a dictionary bigger than
* the uncompressed size of the data even if a bigger dictionary
* was used when compressing. If you know the uncompressed size
* of the data, this might allow saving some memory.
*
* @param in input stream from which LZMA2-compressed
* data is read
*
* @param dictSize LZMA2 dictionary size as bytes, must be
* in the range [<code>DICT_SIZE_MIN</code>,
* <code>DICT_SIZE_MAX</code>]
*/
public LZMA2InputStream(InputStream in, int dictSize) {
this(in, dictSize, null);
}
/**
* Creates a new LZMA2 decompressor using a preset dictionary.
* <p>
* This is like <code>LZMA2InputStream(InputStream, int)</code> except
* that the dictionary may be initialized using a preset dictionary.
* If a preset dictionary was used when compressing the data, the
* same preset dictionary must be provided when decompressing.
*
* @param in input stream from which LZMA2-compressed
* data is read
*
* @param dictSize LZMA2 dictionary size as bytes, must be
* in the range [<code>DICT_SIZE_MIN</code>,
* <code>DICT_SIZE_MAX</code>]
*
* @param presetDict preset dictionary or <code>null</code>
* to use no preset dictionary
*/
public LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict) {
// Check for null because otherwise null isn't detect
// in this constructor.
if (in == null)
throw new NullPointerException();
this.in = new DataInputStream(in);
this.lz = new LZDecoder(getDictSize(dictSize), presetDict);
if (presetDict != null && presetDict.length > 0)
needDictReset = false;
}
/**
* Decompresses the next byte from this input stream.
* <p>
* Reading lots of data with <code>read()</code> from this input stream
* may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code>
* if you need to read lots of data one byte at a time.
*
* @return the next decompressed byte, or <code>-1</code>
* to indicate the end of the compressed stream
*
* @throws CorruptedInputException
*
* @throws XZIOException if the stream has been closed
*
* @throws EOFException
* compressed input is truncated or corrupt
*
* @throws IOException may be thrown by <code>in</code>
*/
public int read() throws IOException {
return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
}
/**
* Decompresses into an array of bytes.
* <p>
* If <code>len</code> is zero, no bytes are read and <code>0</code>
* is returned. Otherwise this will block until <code>len</code>
* bytes have been decompressed, the end of the LZMA2 stream is reached,
* or an exception is thrown.
*
* @param buf target buffer for uncompressed data
* @param off start offset in <code>buf</code>
* @param len maximum number of uncompressed bytes to read
*
* @return number of bytes read, or <code>-1</code> to indicate
* the end of the compressed stream
*
* @throws CorruptedInputException
*
* @throws XZIOException if the stream has been closed
*
* @throws EOFException
* compressed input is truncated or corrupt
*
* @throws IOException may be thrown by <code>in</code>
*/
public int read(byte[] buf, int off, int len) throws IOException {
if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
throw new IndexOutOfBoundsException();
if (len == 0)
return 0;
if (in == null)
throw new XZIOException("Stream closed");
if (exception != null)
throw exception;
if (endReached)
return -1;
try {
int size = 0;
while (len > 0) {
if (uncompressedSize == 0) {
decodeChunkHeader();
if (endReached)
return size == 0 ? -1 : size;
}
int copySizeMax = Math.min(uncompressedSize, len);
if (!isLZMAChunk) {
lz.copyUncompressed(in, copySizeMax);
} else {
lz.setLimit(copySizeMax);
lzma.decode();
if (!rc.isInBufferOK())
throw new CorruptedInputException();
}
int copiedSize = lz.flush(buf, off);
off += copiedSize;
len -= copiedSize;
size += copiedSize;
uncompressedSize -= copiedSize;
if (uncompressedSize == 0)
if (!rc.isFinished() || lz.hasPending())
throw new CorruptedInputException();
}
return size;
} catch (IOException e) {
exception = e;
throw e;
}
}
private void decodeChunkHeader() throws IOException {
int control = in.readUnsignedByte();
if (control == 0x00) {
endReached = true;
return;
}
if (control >= 0xE0 || control == 0x01) {
needProps = true;
needDictReset = false;
lz.reset();
} else if (needDictReset) {
throw new CorruptedInputException();
}
if (control >= 0x80) {
isLZMAChunk = true;
uncompressedSize = (control & 0x1F) << 16;
uncompressedSize += in.readUnsignedShort() + 1;
int compressedSize = in.readUnsignedShort() + 1;
if (control >= 0xC0) {
needProps = false;
decodeProps();
} else if (needProps) {
throw new CorruptedInputException();
} else if (control >= 0xA0) {
lzma.reset();
}
rc.prepareInputBuffer(in, compressedSize);
} else if (control > 0x02) {
throw new CorruptedInputException();
} else {
isLZMAChunk = false;
uncompressedSize = in.readUnsignedShort() + 1;
}
}
private void decodeProps() throws IOException {
int props = in.readUnsignedByte();
if (props > (4 * 5 + 4) * 9 + 8)
throw new CorruptedInputException();
int pb = props / (9 * 5);
props -= pb * 9 * 5;
int lp = props / 9;
int lc = props - lp * 9;
if (lc + lp > 4)
throw new CorruptedInputException();
lzma = new LZMADecoder(lz, rc, lc, lp, pb);
}
/**
* Returns the number of uncompressed bytes that can be read
* without blocking. The value is returned with an assumption
* that the compressed input data will be valid. If the compressed
* data is corrupt, <code>CorruptedInputException</code> may get
* thrown before the number of bytes claimed to be available have
* been read from this input stream.
* <p>
* In LZMA2InputStream, the return value will be non-zero when the
* decompressor is in the middle of an LZMA2 chunk. The return value
* will then be the number of uncompressed bytes remaining from that
* chunk.
*
* @return the number of uncompressed bytes that can be read
* without blocking
*/
public int available() throws IOException {
if (in == null)
throw new XZIOException("Stream closed");
if (exception != null)
throw exception;
return uncompressedSize;
}
/**
* Closes the stream and calls <code>in.close()</code>.
* If the stream was already closed, this does nothing.
*
* @throws IOException if thrown by <code>in.close()</code>
*/
public void close() throws IOException {
if (in != null) {
try {
in.close();
} finally {
in = null;
}
}
}
}

View File

@@ -0,0 +1,581 @@
/*
* LZMA2Options
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import java.io.IOException;
import org.tukaani.xz.lz.LZEncoder;
import org.tukaani.xz.lzma.LZMAEncoder;
/**
* LZMA2 compression options.
* <p>
* While this allows setting the LZMA2 compression options in detail,
* often you only need <code>LZMA2Options()</code> or
* <code>LZMA2Options(int)</code>.
*/
public class LZMA2Options extends FilterOptions {
/**
* Minimum valid compression preset level is 0.
*/
public static final int PRESET_MIN = 0;
/**
* Maximum valid compression preset level is 9.
*/
public static final int PRESET_MAX = 9;
/**
* Default compression preset level is 6.
*/
public static final int PRESET_DEFAULT = 6;
/**
* Minimum dictionary size is 4 KiB.
*/
public static final int DICT_SIZE_MIN = 4096;
/**
* Maximum dictionary size for compression is 768 MiB.
* <p>
* The decompressor supports bigger dictionaries, up to almost 2 GiB.
* With HC4 the encoder would support dictionaries bigger than 768 MiB.
* The 768 MiB limit comes from the current implementation of BT4 where
* we would otherwise hit the limits of signed ints in array indexing.
* <p>
* If you really need bigger dictionary for decompression,
* use {@link LZMA2InputStream} directly.
*/
public static final int DICT_SIZE_MAX = 768 << 20;
/**
* The default dictionary size is 8 MiB.
*/
public static final int DICT_SIZE_DEFAULT = 8 << 20;
/**
* Maximum value for lc + lp is 4.
*/
public static final int LC_LP_MAX = 4;
/**
* The default number of literal context bits is 3.
*/
public static final int LC_DEFAULT = 3;
/**
* The default number of literal position bits is 0.
*/
public static final int LP_DEFAULT = 0;
/**
* Maximum value for pb is 4.
*/
public static final int PB_MAX = 4;
/**
* The default number of position bits is 2.
*/
public static final int PB_DEFAULT = 2;
/**
* Compression mode: uncompressed.
* The data is wrapped into a LZMA2 stream without compression.
*/
public static final int MODE_UNCOMPRESSED = 0;
/**
* Compression mode: fast.
* This is usually combined with a hash chain match finder.
*/
public static final int MODE_FAST = LZMAEncoder.MODE_FAST;
/**
* Compression mode: normal.
* This is usually combined with a binary tree match finder.
*/
public static final int MODE_NORMAL = LZMAEncoder.MODE_NORMAL;
/**
* Minimum value for <code>niceLen</code> is 8.
*/
public static final int NICE_LEN_MIN = 8;
/**
* Maximum value for <code>niceLen</code> is 273.
*/
public static final int NICE_LEN_MAX = 273;
/**
* Match finder: Hash Chain 2-3-4
*/
public static final int MF_HC4 = LZEncoder.MF_HC4;
/**
* Match finder: Binary tree 2-3-4
*/
public static final int MF_BT4 = LZEncoder.MF_BT4;
private static final int[] presetToDictSize = {
1 << 18, 1 << 20, 1 << 21, 1 << 22, 1 << 22,
1 << 23, 1 << 23, 1 << 24, 1 << 25, 1 << 26 };
private static final int[] presetToDepthLimit = { 4, 8, 24, 48 };
private int dictSize;
private byte[] presetDict = null;
private int lc;
private int lp;
private int pb;
private int mode;
private int niceLen;
private int mf;
private int depthLimit;
/**
* Creates new LZMA2 options and sets them to the default values.
* This is equivalent to <code>LZMA2Options(PRESET_DEFAULT)</code>.
*/
public LZMA2Options() {
try {
setPreset(PRESET_DEFAULT);
} catch (UnsupportedOptionsException e) {
assert false;
throw new RuntimeException();
}
}
/**
* Creates new LZMA2 options and sets them to the given preset.
*
* @throws UnsupportedOptionsException
* <code>preset</code> is not supported
*/
public LZMA2Options(int preset) throws UnsupportedOptionsException {
setPreset(preset);
}
/**
* Creates new LZMA2 options and sets them to the given custom values.
*
* @throws UnsupportedOptionsException
* unsupported options were specified
*/
public LZMA2Options(int dictSize, int lc, int lp, int pb, int mode,
int niceLen, int mf, int depthLimit)
throws UnsupportedOptionsException {
setDictSize(dictSize);
setLcLp(lc, lp);
setPb(pb);
setMode(mode);
setNiceLen(niceLen);
setMatchFinder(mf);
setDepthLimit(depthLimit);
}
/**
* Sets the compression options to the given preset.
* <p>
* The presets 0-3 are fast presets with medium compression.
* The presets 4-6 are fairly slow presets with high compression.
* The default preset (<code>PRESET_DEFAULT</code>) is 6.
* <p>
* The presets 7-9 are like the preset 6 but use bigger dictionaries
* and have higher compressor and decompressor memory requirements.
* Unless the uncompressed size of the file exceeds 8&nbsp;MiB,
* 16&nbsp;MiB, or 32&nbsp;MiB, it is waste of memory to use the
* presets 7, 8, or 9, respectively.
*
* @throws UnsupportedOptionsException
* <code>preset</code> is not supported
*/
public void setPreset(int preset) throws UnsupportedOptionsException {
if (preset < 0 || preset > 9)
throw new UnsupportedOptionsException(
"Unsupported preset: " + preset);
lc = LC_DEFAULT;
lp = LP_DEFAULT;
pb = PB_DEFAULT;
dictSize = presetToDictSize[preset];
if (preset <= 3) {
mode = MODE_FAST;
mf = MF_HC4;
niceLen = preset <= 1 ? 128 : NICE_LEN_MAX;
depthLimit = presetToDepthLimit[preset];
} else {
mode = MODE_NORMAL;
mf = MF_BT4;
niceLen = (preset == 4) ? 16 : (preset == 5) ? 32 : 64;
depthLimit = 0;
}
}
/**
* Sets the dictionary size in bytes.
* <p>
* The dictionary (or history buffer) holds the most recently seen
* uncompressed data. Bigger dictionary usually means better compression.
* However, using a dictioanary bigger than the size of the uncompressed
* data is waste of memory.
* <p>
* Any value in the range [DICT_SIZE_MIN, DICT_SIZE_MAX] is valid,
* but sizes of 2^n and 2^n&nbsp;+&nbsp;2^(n-1) bytes are somewhat
* recommended.
*
* @throws UnsupportedOptionsException
* <code>dictSize</code> is not supported
*/
public void setDictSize(int dictSize) throws UnsupportedOptionsException {
if (dictSize < DICT_SIZE_MIN)
throw new UnsupportedOptionsException(
"LZMA2 dictionary size must be at least 4 KiB: "
+ dictSize + " B");
if (dictSize > DICT_SIZE_MAX)
throw new UnsupportedOptionsException(
"LZMA2 dictionary size must not exceed "
+ (DICT_SIZE_MAX >> 20) + " MiB: " + dictSize + " B");
this.dictSize = dictSize;
}
/**
* Gets the dictionary size in bytes.
*/
public int getDictSize() {
return dictSize;
}
/**
* Sets a preset dictionary. Use null to disable the use of
* a preset dictionary. By default there is no preset dictionary.
* <p>
* <b>The .xz format doesn't support a preset dictionary for now.
* Do not set a preset dictionary unless you use raw LZMA2.</b>
* <p>
* Preset dictionary can be useful when compressing many similar,
* relatively small chunks of data independently from each other.
* A preset dictionary should contain typical strings that occur in
* the files being compressed. The most probable strings should be
* near the end of the preset dictionary. The preset dictionary used
* for compression is also needed for decompression.
*/
public void setPresetDict(byte[] presetDict) {
this.presetDict = presetDict;
}
/**
* Gets the preset dictionary.
*/
public byte[] getPresetDict() {
return presetDict;
}
/**
* Sets the number of literal context bits and literal position bits.
* <p>
* The sum of <code>lc</code> and <code>lp</code> is limited to 4.
* Trying to exceed it will throw an exception. This function lets
* you change both at the same time.
*
* @throws UnsupportedOptionsException
* <code>lc</code> and <code>lp</code>
* are invalid
*/
public void setLcLp(int lc, int lp) throws UnsupportedOptionsException {
if (lc < 0 || lp < 0 || lc > LC_LP_MAX || lp > LC_LP_MAX
|| lc + lp > LC_LP_MAX)
throw new UnsupportedOptionsException(
"lc + lp must not exceed " + LC_LP_MAX + ": "
+ lc + " + " + lp);
this.lc = lc;
this.lp = lp;
}
/**
* Sets the number of literal context bits.
* <p>
* All bytes that cannot be encoded as matches are encoded as literals.
* That is, literals are simply 8-bit bytes that are encoded one at
* a time.
* <p>
* The literal coding makes an assumption that the highest <code>lc</code>
* bits of the previous uncompressed byte correlate with the next byte.
* For example, in typical English text, an upper-case letter is often
* followed by a lower-case letter, and a lower-case letter is usually
* followed by another lower-case letter. In the US-ASCII character set,
* the highest three bits are 010 for upper-case letters and 011 for
* lower-case letters. When <code>lc</code> is at least 3, the literal
* coding can take advantage of this property in the uncompressed data.
* <p>
* The default value (3) is usually good. If you want maximum compression,
* try <code>setLc(4)</code>. Sometimes it helps a little, and sometimes it
* makes compression worse. If it makes it worse, test for example
* <code>setLc(2)</code> too.
*
* @throws UnsupportedOptionsException
* <code>lc</code> is invalid, or the sum
* of <code>lc</code> and <code>lp</code>
* exceed LC_LP_MAX
*/
public void setLc(int lc) throws UnsupportedOptionsException {
setLcLp(lc, lp);
}
/**
* Sets the number of literal position bits.
* <p>
* This affets what kind of alignment in the uncompressed data is
* assumed when encoding literals. See {@link #setPb(int) setPb} for
* more information about alignment.
*
* @throws UnsupportedOptionsException
* <code>lp</code> is invalid, or the sum
* of <code>lc</code> and <code>lp</code>
* exceed LC_LP_MAX
*/
public void setLp(int lp) throws UnsupportedOptionsException {
setLcLp(lc, lp);
}
/**
* Gets the number of literal context bits.
*/
public int getLc() {
return lc;
}
/**
* Gets the number of literal position bits.
*/
public int getLp() {
return lp;
}
/**
* Sets the number of position bits.
* <p>
* This affects what kind of alignment in the uncompressed data is
* assumed in general. The default (2) means four-byte alignment
* (2^<code>pb</code> = 2^2 = 4), which is often a good choice when
* there's no better guess.
* <p>
* When the alignment is known, setting the number of position bits
* accordingly may reduce the file size a little. For example with text
* files having one-byte alignment (US-ASCII, ISO-8859-*, UTF-8), using
* <code>setPb(0)</code> can improve compression slightly. For UTF-16
* text, <code>setPb(1)</code> is a good choice. If the alignment is
* an odd number like 3 bytes, <code>setPb(0)</code> might be the best
* choice.
* <p>
* Even though the assumed alignment can be adjusted with
* <code>setPb</code> and <code>setLp</code>, LZMA2 still slightly favors
* 16-byte alignment. It might be worth taking into account when designing
* file formats that are likely to be often compressed with LZMA2.
*
* @throws UnsupportedOptionsException
* <code>pb</code> is invalid
*/
public void setPb(int pb) throws UnsupportedOptionsException {
if (pb < 0 || pb > PB_MAX)
throw new UnsupportedOptionsException(
"pb must not exceed " + PB_MAX + ": " + pb);
this.pb = pb;
}
/**
* Gets the number of position bits.
*/
public int getPb() {
return pb;
}
/**
* Sets the compression mode.
* <p>
* This specifies the method to analyze the data produced by
* a match finder. The default is <code>MODE_FAST</code> for presets
* 0-3 and <code>MODE_NORMAL</code> for presets 4-9.
* <p>
* Usually <code>MODE_FAST</code> is used with Hash Chain match finders
* and <code>MODE_NORMAL</code> with Binary Tree match finders. This is
* also what the presets do.
* <p>
* The special mode <code>MODE_UNCOMPRESSED</code> doesn't try to
* compress the data at all (and doesn't use a match finder) and will
* simply wrap it in uncompressed LZMA2 chunks.
*
* @throws UnsupportedOptionsException
* <code>mode</code> is not supported
*/
public void setMode(int mode) throws UnsupportedOptionsException {
if (mode < MODE_UNCOMPRESSED || mode > MODE_NORMAL)
throw new UnsupportedOptionsException(
"Unsupported compression mode: " + mode);
this.mode = mode;
}
/**
* Gets the compression mode.
*/
public int getMode() {
return mode;
}
/**
* Sets the nice length of matches.
* Once a match of at least <code>niceLen</code> bytes is found,
* the algorithm stops looking for better matches. Higher values tend
* to give better compression at the expense of speed. The default
* depends on the preset.
*
* @throws UnsupportedOptionsException
* <code>niceLen</code> is invalid
*/
public void setNiceLen(int niceLen) throws UnsupportedOptionsException {
if (niceLen < NICE_LEN_MIN)
throw new UnsupportedOptionsException(
"Minimum nice length of matches is "
+ NICE_LEN_MIN + " bytes: " + niceLen);
if (niceLen > NICE_LEN_MAX)
throw new UnsupportedOptionsException(
"Maximum nice length of matches is " + NICE_LEN_MAX
+ ": " + niceLen);
this.niceLen = niceLen;
}
/**
* Gets the nice length of matches.
*/
public int getNiceLen() {
return niceLen;
}
/**
* Sets the match finder type.
* <p>
* Match finder has a major effect on compression speed, memory usage,
* and compression ratio. Usually Hash Chain match finders are faster
* than Binary Tree match finders. The default depends on the preset:
* 0-3 use <code>MF_HC4</code> and 4-9 use <code>MF_BT4</code>.
*
* @throws UnsupportedOptionsException
* <code>mf</code> is not supported
*/
public void setMatchFinder(int mf) throws UnsupportedOptionsException {
if (mf != MF_HC4 && mf != MF_BT4)
throw new UnsupportedOptionsException(
"Unsupported match finder: " + mf);
this.mf = mf;
}
/**
* Gets the match finder type.
*/
public int getMatchFinder() {
return mf;
}
/**
* Sets the match finder search depth limit.
* <p>
* The default is a special value of <code>0</code> which indicates that
* the depth limit should be automatically calculated by the selected
* match finder from the nice length of matches.
* <p>
* Reasonable depth limit for Hash Chain match finders is 4-100 and
* 16-1000 for Binary Tree match finders. Using very high values can
* make the compressor extremely slow with some files. Avoid settings
* higher than 1000 unless you are prepared to interrupt the compression
* in case it is taking far too long.
*
* @throws UnsupportedOptionsException
* <code>depthLimit</code> is invalid
*/
public void setDepthLimit(int depthLimit)
throws UnsupportedOptionsException {
if (depthLimit < 0)
throw new UnsupportedOptionsException(
"Depth limit cannot be negative: " + depthLimit);
this.depthLimit = depthLimit;
}
/**
* Gets the match finder search depth limit.
*/
public int getDepthLimit() {
return depthLimit;
}
public int getEncoderMemoryUsage() {
return (mode == MODE_UNCOMPRESSED)
? UncompressedLZMA2OutputStream.getMemoryUsage()
: LZMA2OutputStream.getMemoryUsage(this);
}
public FinishableOutputStream getOutputStream(FinishableOutputStream out) {
if (mode == MODE_UNCOMPRESSED)
return new UncompressedLZMA2OutputStream(out);
return new LZMA2OutputStream(out, this);
}
/**
* Gets how much memory the LZMA2 decoder will need to decompress the data
* that was encoded with these options and stored in a .xz file.
* <p>
* The returned value may bigger than the value returned by a direct call
* to {@link LZMA2InputStream#getMemoryUsage(int)} if the dictionary size
* is not 2^n or 2^n&nbsp;+&nbsp;2^(n-1) bytes. This is because the .xz
* headers store the dictionary size in such a format and other values
* are rounded up to the next such value. Such rounding is harmess except
* it might waste some memory if an unsual dictionary size is used.
* <p>
* If you use raw LZMA2 streams and unusual dictioanary size, call
* {@link LZMA2InputStream#getMemoryUsage} directly to get raw decoder
* memory requirements.
*/
public int getDecoderMemoryUsage() {
// Round the dictionary size up to the next 2^n or 2^n + 2^(n-1).
int d = dictSize - 1;
d |= d >>> 2;
d |= d >>> 3;
d |= d >>> 4;
d |= d >>> 8;
d |= d >>> 16;
return LZMA2InputStream.getMemoryUsage(d + 1);
}
public InputStream getInputStream(InputStream in) throws IOException {
return new LZMA2InputStream(in, dictSize);
}
FilterEncoder getFilterEncoder() {
return new LZMA2Encoder(this);
}
public Object clone() {
try {
return super.clone();
} catch (CloneNotSupportedException e) {
assert false;
throw new RuntimeException();
}
}
}

View File

@@ -0,0 +1,261 @@
/*
* LZMA2OutputStream
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.DataOutputStream;
import java.io.IOException;
import org.tukaani.xz.lz.LZEncoder;
import org.tukaani.xz.rangecoder.RangeEncoder;
import org.tukaani.xz.lzma.LZMAEncoder;
class LZMA2OutputStream extends FinishableOutputStream {
static final int COMPRESSED_SIZE_MAX = 64 << 10;
private FinishableOutputStream out;
private final DataOutputStream outData;
private final LZEncoder lz;
private final RangeEncoder rc;
private final LZMAEncoder lzma;
private final int props; // Cannot change props on the fly for now.
private boolean dictResetNeeded = true;
private boolean stateResetNeeded = true;
private boolean propsNeeded = true;
private int pendingSize = 0;
private boolean finished = false;
private IOException exception = null;
private final byte[] tempBuf = new byte[1];
private static int getExtraSizeBefore(int dictSize) {
return COMPRESSED_SIZE_MAX > dictSize
? COMPRESSED_SIZE_MAX - dictSize : 0;
}
static int getMemoryUsage(LZMA2Options options) {
// 64 KiB buffer for the range encoder + a little extra + LZMAEncoder
int dictSize = options.getDictSize();
int extraSizeBefore = getExtraSizeBefore(dictSize);
return 70 + LZMAEncoder.getMemoryUsage(options.getMode(),
dictSize, extraSizeBefore,
options.getMatchFinder());
}
LZMA2OutputStream(FinishableOutputStream out, LZMA2Options options) {
if (out == null)
throw new NullPointerException();
this.out = out;
outData = new DataOutputStream(out);
rc = new RangeEncoder(COMPRESSED_SIZE_MAX);
int dictSize = options.getDictSize();
int extraSizeBefore = getExtraSizeBefore(dictSize);
lzma = LZMAEncoder.getInstance(rc,
options.getLc(), options.getLp(), options.getPb(),
options.getMode(),
dictSize, extraSizeBefore, options.getNiceLen(),
options.getMatchFinder(), options.getDepthLimit());
lz = lzma.getLZEncoder();
byte[] presetDict = options.getPresetDict();
if (presetDict != null && presetDict.length > 0) {
lz.setPresetDict(dictSize, presetDict);
dictResetNeeded = false;
}
props = (options.getPb() * 5 + options.getLp()) * 9 + options.getLc();
}
public void write(int b) throws IOException {
tempBuf[0] = (byte)b;
write(tempBuf, 0, 1);
}
public void write(byte[] buf, int off, int len) throws IOException {
if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
throw new IndexOutOfBoundsException();
if (exception != null)
throw exception;
if (finished)
throw new XZIOException("Stream finished or closed");
try {
while (len > 0) {
int used = lz.fillWindow(buf, off, len);
off += used;
len -= used;
pendingSize += used;
if (lzma.encodeForLZMA2())
writeChunk();
}
} catch (IOException e) {
exception = e;
throw e;
}
}
private void writeChunk() throws IOException {
int compressedSize = rc.finish();
int uncompressedSize = lzma.getUncompressedSize();
assert compressedSize > 0 : compressedSize;
assert uncompressedSize > 0 : uncompressedSize;
// +2 because the header of a compressed chunk is 2 bytes
// bigger than the header of an uncompressed chunk.
if (compressedSize + 2 < uncompressedSize) {
writeLZMA(uncompressedSize, compressedSize);
} else {
lzma.reset();
uncompressedSize = lzma.getUncompressedSize();
assert uncompressedSize > 0 : uncompressedSize;
writeUncompressed(uncompressedSize);
}
pendingSize -= uncompressedSize;
lzma.resetUncompressedSize();
rc.reset();
}
private void writeLZMA(int uncompressedSize, int compressedSize)
throws IOException {
int control;
if (propsNeeded) {
if (dictResetNeeded)
control = 0x80 + (3 << 5);
else
control = 0x80 + (2 << 5);
} else {
if (stateResetNeeded)
control = 0x80 + (1 << 5);
else
control = 0x80;
}
control |= (uncompressedSize - 1) >>> 16;
outData.writeByte(control);
outData.writeShort(uncompressedSize - 1);
outData.writeShort(compressedSize - 1);
if (propsNeeded)
outData.writeByte(props);
rc.write(out);
propsNeeded = false;
stateResetNeeded = false;
dictResetNeeded = false;
}
private void writeUncompressed(int uncompressedSize) throws IOException {
while (uncompressedSize > 0) {
int chunkSize = Math.min(uncompressedSize, COMPRESSED_SIZE_MAX);
outData.writeByte(dictResetNeeded ? 0x01 : 0x02);
outData.writeShort(chunkSize - 1);
lz.copyUncompressed(out, uncompressedSize, chunkSize);
uncompressedSize -= chunkSize;
dictResetNeeded = false;
}
stateResetNeeded = true;
}
private void writeEndMarker() throws IOException {
assert !finished;
if (exception != null)
throw exception;
lz.setFinishing();
try {
while (pendingSize > 0) {
lzma.encodeForLZMA2();
writeChunk();
}
out.write(0x00);
} catch (IOException e) {
exception = e;
throw e;
}
finished = true;
}
public void flush() throws IOException {
if (exception != null)
throw exception;
if (finished)
throw new XZIOException("Stream finished or closed");
try {
lz.setFlushing();
while (pendingSize > 0) {
lzma.encodeForLZMA2();
writeChunk();
}
out.flush();
} catch (IOException e) {
exception = e;
throw e;
}
}
public void finish() throws IOException {
if (!finished) {
writeEndMarker();
try {
out.finish();
} catch (IOException e) {
exception = e;
throw e;
}
finished = true;
}
}
public void close() throws IOException {
if (out != null) {
if (!finished) {
try {
writeEndMarker();
} catch (IOException e) {}
}
try {
out.close();
} catch (IOException e) {
if (exception == null)
exception = e;
}
out = null;
}
if (exception != null)
throw exception;
}
}

View File

@@ -0,0 +1,569 @@
/*
* LZMAInputStream
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import java.io.DataInputStream;
import java.io.IOException;
import org.tukaani.xz.lz.LZDecoder;
import org.tukaani.xz.rangecoder.RangeDecoderFromStream;
import org.tukaani.xz.lzma.LZMADecoder;
/**
* Decompresses legacy .lzma files and raw LZMA streams (no .lzma header).
* <p>
* <b>IMPORTANT:</b> In contrast to other classes in this package, this class
* reads data from its input stream one byte at a time. If the input stream
* is for example {@link java.io.FileInputStream}, wrapping it into
* {@link java.io.BufferedInputStream} tends to improve performance a lot.
* This is not automatically done by this class because there may be use
* cases where it is desired that this class won't read any bytes past
* the end of the LZMA stream.
* <p>
* Even when using <code>BufferedInputStream</code>, the performance tends
* to be worse (maybe 10-20&nbsp;% slower) than with {@link LZMA2InputStream}
* or {@link XZInputStream} (when the .xz file contains LZMA2-compressed data).
*
* @since 1.4
*/
public class LZMAInputStream extends InputStream {
/**
* Largest dictionary size supported by this implementation.
* <p>
* LZMA allows dictionaries up to one byte less than 4 GiB. This
* implementation supports only 16 bytes less than 2 GiB. This
* limitation is due to Java using signed 32-bit integers for array
* indexing. The limitation shouldn't matter much in practice since so
* huge dictionaries are not normally used.
*/
public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15;
private InputStream in;
private LZDecoder lz;
private RangeDecoderFromStream rc;
private LZMADecoder lzma;
private boolean endReached = false;
private final byte[] tempBuf = new byte[1];
/**
* Number of uncompressed bytes left to be decompressed, or -1 if
* the end marker is used.
*/
private long remainingSize;
private IOException exception = null;
/**
* Gets approximate decompressor memory requirements as kibibytes for
* the given dictionary size and LZMA properties byte (lc, lp, and pb).
*
* @param dictSize LZMA dictionary size as bytes, should be
* in the range [<code>0</code>,
* <code>DICT_SIZE_MAX</code>]
*
* @param propsByte LZMA properties byte that encodes the values
* of lc, lp, and pb
*
* @return approximate memory requirements as kibibytes (KiB)
*
* @throws UnsupportedOptionsException
* if <code>dictSize</code> is outside
* the range [<code>0</code>,
* <code>DICT_SIZE_MAX</code>]
*
* @throws CorruptedInputException
* if <code>propsByte</code> is invalid
*/
public static int getMemoryUsage(int dictSize, byte propsByte)
throws UnsupportedOptionsException, CorruptedInputException {
if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
throw new UnsupportedOptionsException(
"LZMA dictionary is too big for this implementation");
int props = propsByte & 0xFF;
if (props > (4 * 5 + 4) * 9 + 8)
throw new CorruptedInputException("Invalid LZMA properties byte");
props %= 9 * 5;
int lp = props / 9;
int lc = props - lp * 9;
return getMemoryUsage(dictSize, lc, lp);
}
/**
* Gets approximate decompressor memory requirements as kibibytes for
* the given dictionary size, lc, and lp. Note that pb isn't needed.
*
* @param dictSize LZMA dictionary size as bytes, must be
* in the range [<code>0</code>,
* <code>DICT_SIZE_MAX</code>]
*
* @param lc number of literal context bits, must be
* in the range [0, 8]
*
* @param lp number of literal position bits, must be
* in the range [0, 4]
*
* @return approximate memory requirements as kibibytes (KiB)
*/
public static int getMemoryUsage(int dictSize, int lc, int lp) {
if (lc < 0 || lc > 8 || lp < 0 || lp > 4)
throw new IllegalArgumentException("Invalid lc or lp");
// Probability variables have the type "short". There are
// 0x300 (768) probability variables in each literal subcoder.
// The number of literal subcoders is 2^(lc + lp).
//
// Roughly 10 KiB for the base state + LZ decoder's dictionary buffer
// + sizeof(short) * number probability variables per literal subcoder
// * number of literal subcoders
return 10 + getDictSize(dictSize) / 1024
+ ((2 * 0x300) << (lc + lp)) / 1024;
}
private static int getDictSize(int dictSize) {
if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
throw new IllegalArgumentException(
"LZMA dictionary is too big for this implementation");
// For performance reasons, use a 4 KiB dictionary if something
// smaller was requested. It's a rare situation and the performance
// difference isn't huge, and it starts to matter mostly when the
// dictionary is just a few bytes. But we need to handle the special
// case of dictSize == 0 anyway, which is an allowed value but in
// practice means one-byte dictionary.
//
// Note that using a dictionary bigger than specified in the headers
// can hide errors if there is a reference to data beyond the original
// dictionary size but is still within 4 KiB.
if (dictSize < 4096)
dictSize = 4096;
// Round dictionary size upward to a multiple of 16. This way LZMA
// can use LZDecoder.getPos() for calculating LZMA's posMask.
return (dictSize + 15) & ~15;
}
/**
* Creates a new .lzma file format decompressor without
* a memory usage limit.
*
* @param in input stream from which .lzma data is read;
* it might be a good idea to wrap it in
* <code>BufferedInputStream</code>, see the
* note at the top of this page
*
* @throws CorruptedInputException
* file is corrupt or perhaps not in
* the .lzma format at all
*
* @throws UnsupportedOptionsException
* dictionary size or uncompressed size is too
* big for this implementation
*
* @throws EOFException
* file is truncated or perhaps not in
* the .lzma format at all
*
* @throws IOException may be thrown by <code>in</code>
*/
public LZMAInputStream(InputStream in) throws IOException {
this(in, -1);
}
/**
* Creates a new .lzma file format decompressor with an optional
* memory usage limit.
*
* @param in input stream from which .lzma data is read;
* it might be a good idea to wrap it in
* <code>BufferedInputStream</code>, see the
* note at the top of this page
*
* @param memoryLimit memory usage limit in kibibytes (KiB)
* or <code>-1</code> to impose no
* memory usage limit
*
* @throws CorruptedInputException
* file is corrupt or perhaps not in
* the .lzma format at all
*
* @throws UnsupportedOptionsException
* dictionary size or uncompressed size is too
* big for this implementation
*
* @throws MemoryLimitException
* memory usage limit was exceeded
*
* @throws EOFException
* file is truncated or perhaps not in
* the .lzma format at all
*
* @throws IOException may be thrown by <code>in</code>
*/
public LZMAInputStream(InputStream in, int memoryLimit)
throws IOException {
DataInputStream inData = new DataInputStream(in);
// Properties byte (lc, lp, and pb)
byte propsByte = inData.readByte();
// Dictionary size is an unsigned 32-bit little endian integer.
int dictSize = 0;
for (int i = 0; i < 4; ++i)
dictSize |= inData.readUnsignedByte() << (8 * i);
// Uncompressed size is an unsigned 64-bit little endian integer.
// The maximum 64-bit value is a special case (becomes -1 here)
// which indicates that the end marker is used instead of knowing
// the uncompressed size beforehand.
long uncompSize = 0;
for (int i = 0; i < 8; ++i)
uncompSize |= (long)inData.readUnsignedByte() << (8 * i);
// Check the memory usage limit.
int memoryNeeded = getMemoryUsage(dictSize, propsByte);
if (memoryLimit != -1 && memoryNeeded > memoryLimit)
throw new MemoryLimitException(memoryNeeded, memoryLimit);
initialize(in, uncompSize, propsByte, dictSize, null);
}
/**
* Creates a new input stream that decompresses raw LZMA data (no .lzma
* header) from <code>in</code>.
* <p>
* The caller needs to know if the "end of payload marker (EOPM)" alias
* "end of stream marker (EOS marker)" alias "end marker" present.
* If the end marker isn't used, the caller must know the exact
* uncompressed size of the stream.
* <p>
* The caller also needs to provide the LZMA properties byte that encodes
* the number of literal context bits (lc), literal position bits (lp),
* and position bits (pb).
* <p>
* The dictionary size used when compressing is also needed. Specifying
* a too small dictionary size will prevent decompressing the stream.
* Specifying a too big dictionary is waste of memory but decompression
* will work.
* <p>
* There is no need to specify a dictionary bigger than
* the uncompressed size of the data even if a bigger dictionary
* was used when compressing. If you know the uncompressed size
* of the data, this might allow saving some memory.
*
* @param in input stream from which compressed
* data is read
*
* @param uncompSize uncompressed size of the LZMA stream or -1
* if the end marker is used in the LZMA stream
*
* @param propsByte LZMA properties byte that has the encoded
* values for literal context bits (lc), literal
* position bits (lp), and position bits (pb)
*
* @param dictSize dictionary size as bytes, must be in the range
* [<code>0</code>, <code>DICT_SIZE_MAX</code>]
*
* @throws CorruptedInputException
* if <code>propsByte</code> is invalid or
* the first input byte is not 0x00
*
* @throws UnsupportedOptionsException
* dictionary size or uncompressed size is too
* big for this implementation
*
*
*/
public LZMAInputStream(InputStream in, long uncompSize, byte propsByte,
int dictSize) throws IOException {
initialize(in, uncompSize, propsByte, dictSize, null);
}
/**
* Creates a new input stream that decompresses raw LZMA data (no .lzma
* header) from <code>in</code> optionally with a preset dictionary.
*
* @param in input stream from which LZMA-compressed
* data is read
*
* @param uncompSize uncompressed size of the LZMA stream or -1
* if the end marker is used in the LZMA stream
*
* @param propsByte LZMA properties byte that has the encoded
* values for literal context bits (lc), literal
* position bits (lp), and position bits (pb)
*
* @param dictSize dictionary size as bytes, must be in the range
* [<code>0</code>, <code>DICT_SIZE_MAX</code>]
*
* @param presetDict preset dictionary or <code>null</code>
* to use no preset dictionary
*
* @throws CorruptedInputException
* if <code>propsByte</code> is invalid or
* the first input byte is not 0x00
*
* @throws UnsupportedOptionsException
* dictionary size or uncompressed size is too
* big for this implementation
*
* @throws EOFException file is truncated or corrupt
*
* @throws IOException may be thrown by <code>in</code>
*/
public LZMAInputStream(InputStream in, long uncompSize, byte propsByte,
int dictSize, byte[] presetDict)
throws IOException {
initialize(in, uncompSize, propsByte, dictSize, presetDict);
}
/**
* Creates a new input stream that decompresses raw LZMA data (no .lzma
* header) from <code>in</code> optionally with a preset dictionary.
*
* @param in input stream from which LZMA-compressed
* data is read
*
* @param uncompSize uncompressed size of the LZMA stream or -1
* if the end marker is used in the LZMA stream
*
* @param lc number of literal context bits, must be
* in the range [0, 8]
*
* @param lp number of literal position bits, must be
* in the range [0, 4]
*
* @param pb number position bits, must be
* in the range [0, 4]
*
* @param dictSize dictionary size as bytes, must be in the range
* [<code>0</code>, <code>DICT_SIZE_MAX</code>]
*
* @param presetDict preset dictionary or <code>null</code>
* to use no preset dictionary
*
* @throws CorruptedInputException
* if the first input byte is not 0x00
*
* @throws EOFException file is truncated or corrupt
*
* @throws IOException may be thrown by <code>in</code>
*/
public LZMAInputStream(InputStream in, long uncompSize,
int lc, int lp, int pb,
int dictSize, byte[] presetDict)
throws IOException {
initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict);
}
private void initialize(InputStream in, long uncompSize, byte propsByte,
int dictSize, byte[] presetDict)
throws IOException {
// Validate the uncompressed size since the other "initialize" throws
// IllegalArgumentException if uncompSize < -1.
if (uncompSize < -1)
throw new UnsupportedOptionsException(
"Uncompressed size is too big");
// Decode the properties byte. In contrast to LZMA2, there is no
// limit of lc + lp <= 4.
int props = propsByte & 0xFF;
if (props > (4 * 5 + 4) * 9 + 8)
throw new CorruptedInputException("Invalid LZMA properties byte");
int pb = props / (9 * 5);
props -= pb * 9 * 5;
int lp = props / 9;
int lc = props - lp * 9;
// Validate the dictionary size since the other "initialize" throws
// IllegalArgumentException if dictSize is not supported.
if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
throw new UnsupportedOptionsException(
"LZMA dictionary is too big for this implementation");
initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict);
}
private void initialize(InputStream in, long uncompSize,
int lc, int lp, int pb,
int dictSize, byte[] presetDict)
throws IOException {
// getDictSize validates dictSize and gives a message in
// the exception too, so skip validating dictSize here.
if (uncompSize < -1 || lc < 0 || lc > 8 || lp < 0 || lp > 4
|| pb < 0 || pb > 4)
throw new IllegalArgumentException();
this.in = in;
// If uncompressed size is known, use it to avoid wasting memory for
// a uselessly large dictionary buffer.
dictSize = getDictSize(dictSize);
if (uncompSize >= 0 && dictSize > uncompSize)
dictSize = getDictSize((int)uncompSize);
lz = new LZDecoder(getDictSize(dictSize), presetDict);
rc = new RangeDecoderFromStream(in);
lzma = new LZMADecoder(lz, rc, lc, lp, pb);
remainingSize = uncompSize;
}
/**
* Decompresses the next byte from this input stream.
* <p>
* Reading lots of data with <code>read()</code> from this input stream
* may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code>
* if you need to read lots of data one byte at a time.
*
* @return the next decompressed byte, or <code>-1</code>
* to indicate the end of the compressed stream
*
* @throws CorruptedInputException
*
* @throws XZIOException if the stream has been closed
*
* @throws EOFException
* compressed input is truncated or corrupt
*
* @throws IOException may be thrown by <code>in</code>
*/
public int read() throws IOException {
return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
}
/**
* Decompresses into an array of bytes.
* <p>
* If <code>len</code> is zero, no bytes are read and <code>0</code>
* is returned. Otherwise this will block until <code>len</code>
* bytes have been decompressed, the end of the LZMA stream is reached,
* or an exception is thrown.
*
* @param buf target buffer for uncompressed data
* @param off start offset in <code>buf</code>
* @param len maximum number of uncompressed bytes to read
*
* @return number of bytes read, or <code>-1</code> to indicate
* the end of the compressed stream
*
* @throws CorruptedInputException
*
* @throws XZIOException if the stream has been closed
*
* @throws EOFException compressed input is truncated or corrupt
*
* @throws IOException may be thrown by <code>in</code>
*/
public int read(byte[] buf, int off, int len) throws IOException {
if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
throw new IndexOutOfBoundsException();
if (len == 0)
return 0;
if (in == null)
throw new XZIOException("Stream closed");
if (exception != null)
throw exception;
if (endReached)
return -1;
try {
int size = 0;
while (len > 0) {
// If uncompressed size is known and thus no end marker will
// be present, set the limit so that the uncompressed size
// won't be exceeded.
int copySizeMax = len;
if (remainingSize >= 0 && remainingSize < len)
copySizeMax = (int)remainingSize;
lz.setLimit(copySizeMax);
// Decode into the dictionary buffer.
try {
lzma.decode();
} catch (CorruptedInputException e) {
// The end marker is encoded with a LZMA symbol that
// indicates maximum match distance. This is larger
// than any supported dictionary and thus causes
// CorruptedInputException from LZDecoder.repeat.
if (remainingSize != -1 || !lzma.endMarkerDetected())
throw e;
endReached = true;
// The exception makes lzma.decode() miss the last range
// decoder normalization, so do it here. This might
// cause an IOException if it needs to read a byte
// from the input stream.
rc.normalize();
}
// Copy from the dictionary to buf.
int copiedSize = lz.flush(buf, off);
off += copiedSize;
len -= copiedSize;
size += copiedSize;
if (remainingSize >= 0) {
// Update the number of bytes left to be decompressed.
remainingSize -= copiedSize;
assert remainingSize >= 0;
if (remainingSize == 0)
endReached = true;
}
if (endReached) {
// Checking these helps a lot when catching corrupt
// or truncated .lzma files. LZMA Utils doesn't do
// the first check and thus it accepts many invalid
// files that this implementation and XZ Utils don't.
if (!rc.isFinished() || lz.hasPending())
throw new CorruptedInputException();
return size == 0 ? -1 : size;
}
}
return size;
} catch (IOException e) {
exception = e;
throw e;
}
}
/**
* Closes the stream and calls <code>in.close()</code>.
* If the stream was already closed, this does nothing.
*
* @throws IOException if thrown by <code>in.close()</code>
*/
public void close() throws IOException {
if (in != null) {
try {
in.close();
} finally {
in = null;
}
}
}
}

View File

@@ -0,0 +1,60 @@
/*
* MemoryLimitException
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
/**
* Thrown when the memory usage limit given to the XZ decompressor
* would be exceeded.
* <p>
* The amount of memory required and the memory usage limit are
* included in the error detail message in human readable format.
*/
public class MemoryLimitException extends XZIOException {
private static final long serialVersionUID = 3L;
private final int memoryNeeded;
private final int memoryLimit;
/**
* Creates a new MemoryLimitException.
* <p>
* The amount of memory needed and the memory usage limit are
* included in the error detail message.
*
* @param memoryNeeded amount of memory needed as kibibytes (KiB)
* @param memoryLimit specified memory usage limit as kibibytes (KiB)
*/
public MemoryLimitException(int memoryNeeded, int memoryLimit) {
super("" + memoryNeeded + " KiB of memory would be needed; limit was "
+ memoryLimit + " KiB");
this.memoryNeeded = memoryNeeded;
this.memoryLimit = memoryLimit;
}
/**
* Gets how much memory is required to decompress the data.
*
* @return amount of memory needed as kibibytes (KiB)
*/
public int getMemoryNeeded() {
return memoryNeeded;
}
/**
* Gets what the memory usage limit was at the time the exception
* was created.
*
* @return memory usage limit as kibibytes (KiB)
*/
public int getMemoryLimit() {
return memoryLimit;
}
}

View File

@@ -0,0 +1,36 @@
/*
* PowerPCOptions
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import org.tukaani.xz.simple.PowerPC;
/**
* BCJ filter for big endian PowerPC instructions.
*/
public class PowerPCOptions extends BCJOptions {
private static final int ALIGNMENT = 4;
public PowerPCOptions() {
super(ALIGNMENT);
}
public FinishableOutputStream getOutputStream(FinishableOutputStream out) {
return new SimpleOutputStream(out, new PowerPC(true, startOffset));
}
public InputStream getInputStream(InputStream in) {
return new SimpleInputStream(in, new PowerPC(false, startOffset));
}
FilterEncoder getFilterEncoder() {
return new BCJEncoder(this, BCJCoder.POWERPC_FILTER_ID);
}
}

View File

@@ -0,0 +1,33 @@
/*
* RawCoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
class RawCoder {
static void validate(FilterCoder[] filters)
throws UnsupportedOptionsException {
for (int i = 0; i < filters.length - 1; ++i)
if (!filters[i].nonLastOK())
throw new UnsupportedOptionsException(
"Unsupported XZ filter chain");
if (!filters[filters.length - 1].lastOK())
throw new UnsupportedOptionsException(
"Unsupported XZ filter chain");
int changesSizeCount = 0;
for (int i = 0; i < filters.length; ++i)
if (filters[i].changesSize())
++changesSizeCount;
if (changesSizeCount > 3)
throw new UnsupportedOptionsException(
"Unsupported XZ filter chain");
}
}

View File

@@ -0,0 +1,36 @@
/*
* SPARCOptions
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import org.tukaani.xz.simple.SPARC;
/**
* BCJ filter for SPARC.
*/
public class SPARCOptions extends BCJOptions {
private static final int ALIGNMENT = 4;
public SPARCOptions() {
super(ALIGNMENT);
}
public FinishableOutputStream getOutputStream(FinishableOutputStream out) {
return new SimpleOutputStream(out, new SPARC(true, startOffset));
}
public InputStream getInputStream(InputStream in) {
return new SimpleInputStream(in, new SPARC(false, startOffset));
}
FilterEncoder getFilterEncoder() {
return new BCJEncoder(this, BCJCoder.SPARC_FILTER_ID);
}
}

View File

@@ -0,0 +1,102 @@
/*
* SeekableFileInputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.File;
import java.io.RandomAccessFile;
import java.io.IOException;
import java.io.FileNotFoundException;
/**
* Wraps a {@link java.io.RandomAccessFile RandomAccessFile}
* in a SeekableInputStream.
*/
public class SeekableFileInputStream extends SeekableInputStream {
/**
* The RandomAccessFile that has been wrapped
* into a SeekableFileInputStream.
*/
protected RandomAccessFile randomAccessFile;
/**
* Creates a new seekable input stream that reads from the specified file.
*/
public SeekableFileInputStream(File file) throws FileNotFoundException {
randomAccessFile = new RandomAccessFile(file, "r");
}
/**
* Creates a new seekable input stream that reads from a file with
* the specified name.
*/
public SeekableFileInputStream(String name) throws FileNotFoundException {
randomAccessFile = new RandomAccessFile(name, "r");
}
/**
* Creates a new seekable input stream from an existing
* <code>RandomAccessFile</code> object.
*/
public SeekableFileInputStream(RandomAccessFile randomAccessFile) {
this.randomAccessFile = randomAccessFile;
}
/**
* Calls {@link RandomAccessFile#read() randomAccessFile.read()}.
*/
public int read() throws IOException {
return randomAccessFile.read();
}
/**
* Calls {@link RandomAccessFile#read(byte[]) randomAccessFile.read(buf)}.
*/
public int read(byte[] buf) throws IOException {
return randomAccessFile.read(buf);
}
/**
* Calls
* {@link RandomAccessFile#read(byte[],int,int)
* randomAccessFile.read(buf, off, len)}.
*/
public int read(byte[] buf, int off, int len) throws IOException {
return randomAccessFile.read(buf, off, len);
}
/**
* Calls {@link RandomAccessFile#close() randomAccessFile.close()}.
*/
public void close() throws IOException {
randomAccessFile.close();
}
/**
* Calls {@link RandomAccessFile#length() randomAccessFile.length()}.
*/
public long length() throws IOException {
return randomAccessFile.length();
}
/**
* Calls {@link RandomAccessFile#getFilePointer()
randomAccessFile.getFilePointer()}.
*/
public long position() throws IOException {
return randomAccessFile.getFilePointer();
}
/**
* Calls {@link RandomAccessFile#seek(long) randomAccessFile.seek(long)}.
*/
public void seek(long pos) throws IOException {
randomAccessFile.seek(pos);
}
}

View File

@@ -0,0 +1,81 @@
/*
* SeekableInputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import java.io.IOException;
/**
* Input stream with random access support.
*/
public abstract class SeekableInputStream extends InputStream {
/**
* Seeks <code>n</code> bytes forward in this stream.
* <p>
* This will not seek past the end of the file. If the current position
* is already at or past the end of the file, this doesn't seek at all
* and returns <code>0</code>. Otherwise, if skipping <code>n</code> bytes
* would cause the position to exceed the stream size, this will do
* equivalent of <code>seek(length())</code> and the return value will
* be adjusted accordingly.
* <p>
* If <code>n</code> is negative, the position isn't changed and
* the return value is <code>0</code>. It doesn't seek backward
* because it would conflict with the specification of
* {@link java.io.InputStream#skip(long) InputStream.skip}.
*
* @return <code>0</code> if <code>n</code> is negative,
* less than <code>n</code> if skipping <code>n</code>
* bytes would seek past the end of the file,
* <code>n</code> otherwise
*
* @throws IOException might be thrown by {@link #seek(long)}
*/
public long skip(long n) throws IOException {
if (n <= 0)
return 0;
long size = length();
long pos = position();
if (pos >= size)
return 0;
if (size - pos < n)
n = size - pos;
seek(pos + n);
return n;
}
/**
* Gets the size of the stream.
*/
public abstract long length() throws IOException;
/**
* Gets the current position in the stream.
*/
public abstract long position() throws IOException;
/**
* Seeks to the specified absolute position in the stream.
* <p>
* Seeking past the end of the file should be supported by the subclasses
* unless there is a good reason to do otherwise. If one has seeked
* past the end of the stream, <code>read</code> will return
* <code>-1</code> to indicate end of stream.
*
* @param pos new read position in the stream
*
* @throws IOException if <code>pos</code> is negative or if
* a stream-specific I/O error occurs
*/
public abstract void seek(long pos) throws IOException;
}

View File

@@ -0,0 +1,896 @@
/*
* SeekableXZInputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.util.Arrays;
import java.util.ArrayList;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.EOFException;
import org.tukaani.xz.common.DecoderUtil;
import org.tukaani.xz.common.StreamFlags;
import org.tukaani.xz.check.Check;
import org.tukaani.xz.index.IndexDecoder;
import org.tukaani.xz.index.BlockInfo;
/**
* Decompresses a .xz file in random access mode.
* This supports decompressing concatenated .xz files.
* <p>
* Each .xz file consist of one or more Streams. Each Stream consist of zero
* or more Blocks. Each Stream contains an Index of Streams' Blocks.
* The Indexes from all Streams are loaded in RAM by a constructor of this
* class. A typical .xz file has only one Stream, and parsing its Index will
* need only three or four seeks.
* <p>
* To make random access possible, the data in a .xz file must be splitted
* into multiple Blocks of reasonable size. Decompression can only start at
* a Block boundary. When seeking to an uncompressed position that is not at
* a Block boundary, decompression starts at the beginning of the Block and
* throws away data until the target position is reached. Thus, smaller Blocks
* mean faster seeks to arbitrary uncompressed positions. On the other hand,
* smaller Blocks mean worse compression. So one has to make a compromise
* between random access speed and compression ratio.
* <p>
* Implementation note: This class uses linear search to locate the correct
* Stream from the data structures in RAM. It was the simplest to implement
* and should be fine as long as there aren't too many Streams. The correct
* Block inside a Stream is located using binary search and thus is fast
* even with a huge number of Blocks.
*
* <h4>Memory usage</h4>
* <p>
* The amount of memory needed for the Indexes is taken into account when
* checking the memory usage limit. Each Stream is calculated to need at
* least 1&nbsp;KiB of memory and each Block 16 bytes of memory, rounded up
* to the next kibibyte. So unless the file has a huge number of Streams or
* Blocks, these don't take significant amount of memory.
*
* <h4>Creating random-accessible .xz files</h4>
* <p>
* When using {@link XZOutputStream}, a new Block can be started by calling
* its {@link XZOutputStream#endBlock() endBlock} method. If you know
* that the decompressor will only need to seek to certain uncompressed
* positions, it can be a good idea to start a new Block at (some of) these
* positions (and only at these positions to get better compression ratio).
* <p>
* liblzma in XZ Utils supports starting a new Block with
* <code>LZMA_FULL_FLUSH</code>. XZ Utils 5.1.1alpha added threaded
* compression which creates multi-Block .xz files. XZ Utils 5.1.1alpha
* also added the option <code>--block-size=SIZE</code> to the xz command
* line tool. XZ Utils 5.1.2alpha added a partial implementation of
* <code>--block-list=SIZES</code> which allows specifying sizes of
* individual Blocks.
*
* @see SeekableFileInputStream
* @see XZInputStream
* @see XZOutputStream
*/
public class SeekableXZInputStream extends SeekableInputStream {
/**
* The input stream containing XZ compressed data.
*/
private SeekableInputStream in;
/**
* Memory usage limit after the memory usage of the IndexDecoders have
* been substracted.
*/
private final int memoryLimit;
/**
* Memory usage of the IndexDecoders.
* <code>memoryLimit + indexMemoryUsage</code> equals the original
* memory usage limit that was passed to the constructor.
*/
private int indexMemoryUsage = 0;
/**
* List of IndexDecoders, one for each Stream in the file.
* The list is in reverse order: The first element is
* the last Stream in the file.
*/
private final ArrayList streams = new ArrayList();
/**
* Bitmask of all Check IDs seen.
*/
private int checkTypes = 0;
/**
* Uncompressed size of the file (all Streams).
*/
private long uncompressedSize = 0;
/**
* Uncompressed size of the largest XZ Block in the file.
*/
private long largestBlockSize = 0;
/**
* Number of XZ Blocks in the file.
*/
private int blockCount = 0;
/**
* Size and position information about the current Block.
* If there are no Blocks, all values will be <code>-1</code>.
*/
private final BlockInfo curBlockInfo;
/**
* Temporary (and cached) information about the Block whose information
* is queried via <code>getBlockPos</code> and related functions.
*/
private final BlockInfo queriedBlockInfo;
/**
* Integrity Check in the current XZ Stream. The constructor leaves
* this to point to the Check of the first Stream.
*/
private Check check;
/**
* Decoder of the current XZ Block, if any.
*/
private BlockInputStream blockDecoder = null;
/**
* Current uncompressed position.
*/
private long curPos = 0;
/**
* Target position for seeking.
*/
private long seekPos;
/**
* True when <code>seek(long)</code> has been called but the actual
* seeking hasn't been done yet.
*/
private boolean seekNeeded = false;
/**
* True when end of the file was reached. This can be cleared by
* calling <code>seek(long)</code>.
*/
private boolean endReached = false;
/**
* Pending exception from an earlier error.
*/
private IOException exception = null;
/**
* Temporary buffer for read(). This avoids reallocating memory
* on every read() call.
*/
private final byte[] tempBuf = new byte[1];
/**
* Creates a new seekable XZ decompressor without a memory usage limit.
*
* @param in seekable input stream containing one or more
* XZ Streams; the whole input stream is used
*
* @throws XZFormatException
* input is not in the XZ format
*
* @throws CorruptedInputException
* XZ data is corrupt or truncated
*
* @throws UnsupportedOptionsException
* XZ headers seem valid but they specify
* options not supported by this implementation
*
* @throws EOFException
* less than 6 bytes of input was available
* from <code>in</code>, or (unlikely) the size
* of the underlying stream got smaller while
* this was reading from it
*
* @throws IOException may be thrown by <code>in</code>
*/
public SeekableXZInputStream(SeekableInputStream in)
throws IOException {
this(in, -1);
}
/**
* Creates a new seekable XZ decomporessor with an optional
* memory usage limit.
*
* @param in seekable input stream containing one or more
* XZ Streams; the whole input stream is used
*
* @param memoryLimit memory usage limit in kibibytes (KiB)
* or <code>-1</code> to impose no
* memory usage limit
*
* @throws XZFormatException
* input is not in the XZ format
*
* @throws CorruptedInputException
* XZ data is corrupt or truncated
*
* @throws UnsupportedOptionsException
* XZ headers seem valid but they specify
* options not supported by this implementation
*
* @throws MemoryLimitException
* decoded XZ Indexes would need more memory
* than allowed by the memory usage limit
*
* @throws EOFException
* less than 6 bytes of input was available
* from <code>in</code>, or (unlikely) the size
* of the underlying stream got smaller while
* this was reading from it
*
* @throws IOException may be thrown by <code>in</code>
*/
public SeekableXZInputStream(SeekableInputStream in, int memoryLimit)
throws IOException {
this.in = in;
DataInputStream inData = new DataInputStream(in);
// Check the magic bytes in the beginning of the file.
{
in.seek(0);
byte[] buf = new byte[XZ.HEADER_MAGIC.length];
inData.readFully(buf);
if (!Arrays.equals(buf, XZ.HEADER_MAGIC))
throw new XZFormatException();
}
// Get the file size and verify that it is a multiple of 4 bytes.
long pos = in.length();
if ((pos & 3) != 0)
throw new CorruptedInputException(
"XZ file size is not a multiple of 4 bytes");
// Parse the headers starting from the end of the file.
byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
long streamPadding = 0;
while (pos > 0) {
if (pos < DecoderUtil.STREAM_HEADER_SIZE)
throw new CorruptedInputException();
// Read the potential Stream Footer.
in.seek(pos - DecoderUtil.STREAM_HEADER_SIZE);
inData.readFully(buf);
// Skip Stream Padding four bytes at a time.
// Skipping more at once would be faster,
// but usually there isn't much Stream Padding.
if (buf[8] == 0x00 && buf[9] == 0x00 && buf[10] == 0x00
&& buf[11] == 0x00) {
streamPadding += 4;
pos -= 4;
continue;
}
// It's not Stream Padding. Update pos.
pos -= DecoderUtil.STREAM_HEADER_SIZE;
// Decode the Stream Footer and check if Backward Size
// looks reasonable.
StreamFlags streamFooter = DecoderUtil.decodeStreamFooter(buf);
if (streamFooter.backwardSize >= pos)
throw new CorruptedInputException(
"Backward Size in XZ Stream Footer is too big");
// Check that the Check ID is supported. Store it in case this
// is the first Stream in the file.
check = Check.getInstance(streamFooter.checkType);
// Remember which Check IDs have been seen.
checkTypes |= 1 << streamFooter.checkType;
// Seek to the beginning of the Index.
in.seek(pos - streamFooter.backwardSize);
// Decode the Index field.
IndexDecoder index;
try {
index = new IndexDecoder(in, streamFooter, streamPadding,
memoryLimit);
} catch (MemoryLimitException e) {
// IndexDecoder doesn't know how much memory we had
// already needed so we need to recreate the exception.
assert memoryLimit >= 0;
throw new MemoryLimitException(
e.getMemoryNeeded() + indexMemoryUsage,
memoryLimit + indexMemoryUsage);
}
// Update the memory usage and limit counters.
indexMemoryUsage += index.getMemoryUsage();
if (memoryLimit >= 0) {
memoryLimit -= index.getMemoryUsage();
assert memoryLimit >= 0;
}
// Remember the uncompressed size of the largest Block.
if (largestBlockSize < index.getLargestBlockSize())
largestBlockSize = index.getLargestBlockSize();
// Calculate the offset to the beginning of this XZ Stream and
// check that it looks sane.
long off = index.getStreamSize() - DecoderUtil.STREAM_HEADER_SIZE;
if (pos < off)
throw new CorruptedInputException("XZ Index indicates "
+ "too big compressed size for the XZ Stream");
// Seek to the beginning of this Stream.
pos -= off;
in.seek(pos);
// Decode the Stream Header.
inData.readFully(buf);
StreamFlags streamHeader = DecoderUtil.decodeStreamHeader(buf);
// Verify that the Stream Header matches the Stream Footer.
if (!DecoderUtil.areStreamFlagsEqual(streamHeader, streamFooter))
throw new CorruptedInputException(
"XZ Stream Footer does not match Stream Header");
// Update the total uncompressed size of the file and check that
// it doesn't overflow.
uncompressedSize += index.getUncompressedSize();
if (uncompressedSize < 0)
throw new UnsupportedOptionsException("XZ file is too big");
// Update the Block count and check that it fits into an int.
blockCount += index.getRecordCount();
if (blockCount < 0)
throw new UnsupportedOptionsException(
"XZ file has over " + Integer.MAX_VALUE + " Blocks");
// Add this Stream to the list of Streams.
streams.add(index);
// Reset to be ready to parse the next Stream.
streamPadding = 0;
}
assert pos == 0;
// Save it now that indexMemoryUsage has been substracted from it.
this.memoryLimit = memoryLimit;
// Store the relative offsets of the Streams. This way we don't
// need to recalculate them in this class when seeking; the
// IndexDecoder instances will handle them.
IndexDecoder prev = (IndexDecoder)streams.get(streams.size() - 1);
for (int i = streams.size() - 2; i >= 0; --i) {
IndexDecoder cur = (IndexDecoder)streams.get(i);
cur.setOffsets(prev);
prev = cur;
}
// Initialize curBlockInfo to point to the first Stream.
// The blockNumber will be left to -1 so that .hasNext()
// and .setNext() work to get the first Block when starting
// to decompress from the beginning of the file.
IndexDecoder first = (IndexDecoder)streams.get(streams.size() - 1);
curBlockInfo = new BlockInfo(first);
// queriedBlockInfo needs to be allocated too. The Stream used for
// initialization doesn't matter though.
queriedBlockInfo = new BlockInfo(first);
}
/**
* Gets the types of integrity checks used in the .xz file.
* Multiple checks are possible only if there are multiple
* concatenated XZ Streams.
* <p>
* The returned value has a bit set for every check type that is present.
* For example, if CRC64 and SHA-256 were used, the return value is
* <code>(1&nbsp;&lt;&lt;&nbsp;XZ.CHECK_CRC64)
* | (1&nbsp;&lt;&lt;&nbsp;XZ.CHECK_SHA256)</code>.
*/
public int getCheckTypes() {
return checkTypes;
}
/**
* Gets the amount of memory in kibibytes (KiB) used by
* the data structures needed to locate the XZ Blocks.
* This is usually useless information but since it is calculated
* for memory usage limit anyway, it is nice to make it available to too.
*/
public int getIndexMemoryUsage() {
return indexMemoryUsage;
}
/**
* Gets the uncompressed size of the largest XZ Block in bytes.
* This can be useful if you want to check that the file doesn't
* have huge XZ Blocks which could make seeking to arbitrary offsets
* very slow. Note that huge Blocks don't automatically mean that
* seeking would be slow, for example, seeking to the beginning of
* any Block is always fast.
*/
public long getLargestBlockSize() {
return largestBlockSize;
}
/**
* Gets the number of Streams in the .xz file.
*
* @since 1.3
*/
public int getStreamCount() {
return streams.size();
}
/**
* Gets the number of Blocks in the .xz file.
*
* @since 1.3
*/
public int getBlockCount() {
return blockCount;
}
/**
* Gets the uncompressed start position of the given Block.
*
* @throws IndexOutOfBoundsException if
* <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
* <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
*
* @since 1.3
*/
public long getBlockPos(int blockNumber) {
locateBlockByNumber(queriedBlockInfo, blockNumber);
return queriedBlockInfo.uncompressedOffset;
}
/**
* Gets the uncompressed size of the given Block.
*
* @throws IndexOutOfBoundsException if
* <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
* <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
*
* @since 1.3
*/
public long getBlockSize(int blockNumber) {
locateBlockByNumber(queriedBlockInfo, blockNumber);
return queriedBlockInfo.uncompressedSize;
}
/**
* Gets the position where the given compressed Block starts in
* the underlying .xz file.
* This information is rarely useful to the users of this class.
*
* @throws IndexOutOfBoundsException if
* <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
* <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
*
* @since 1.3
*/
public long getBlockCompPos(int blockNumber) {
locateBlockByNumber(queriedBlockInfo, blockNumber);
return queriedBlockInfo.compressedOffset;
}
/**
* Gets the compressed size of the given Block.
* This together with the uncompressed size can be used to calculate
* the compression ratio of the specific Block.
*
* @throws IndexOutOfBoundsException if
* <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
* <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
*
* @since 1.3
*/
public long getBlockCompSize(int blockNumber) {
locateBlockByNumber(queriedBlockInfo, blockNumber);
return (queriedBlockInfo.unpaddedSize + 3) & ~3;
}
/**
* Gets integrity check type (Check ID) of the given Block.
*
* @throws IndexOutOfBoundsException if
* <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
* <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
*
* @see #getCheckTypes()
*
* @since 1.3
*/
public int getBlockCheckType(int blockNumber) {
locateBlockByNumber(queriedBlockInfo, blockNumber);
return queriedBlockInfo.getCheckType();
}
/**
* Gets the number of the Block that contains the byte at the given
* uncompressed position.
*
* @throws IndexOutOfBoundsException if
* <code>pos&nbsp;&lt;&nbsp;0</code> or
* <code>pos&nbsp;&gt;=&nbsp;length()</code>.
*
* @since 1.3
*/
public int getBlockNumber(long pos) {
locateBlockByPos(queriedBlockInfo, pos);
return queriedBlockInfo.blockNumber;
}
/**
* Decompresses the next byte from this input stream.
*
* @return the next decompressed byte, or <code>-1</code>
* to indicate the end of the compressed stream
*
* @throws CorruptedInputException
* @throws UnsupportedOptionsException
* @throws MemoryLimitException
*
* @throws XZIOException if the stream has been closed
*
* @throws IOException may be thrown by <code>in</code>
*/
public int read() throws IOException {
return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
}
/**
* Decompresses into an array of bytes.
* <p>
* If <code>len</code> is zero, no bytes are read and <code>0</code>
* is returned. Otherwise this will try to decompress <code>len</code>
* bytes of uncompressed data. Less than <code>len</code> bytes may
* be read only in the following situations:
* <ul>
* <li>The end of the compressed data was reached successfully.</li>
* <li>An error is detected after at least one but less than
* <code>len</code> bytes have already been successfully
* decompressed. The next call with non-zero <code>len</code>
* will immediately throw the pending exception.</li>
* <li>An exception is thrown.</li>
* </ul>
*
* @param buf target buffer for uncompressed data
* @param off start offset in <code>buf</code>
* @param len maximum number of uncompressed bytes to read
*
* @return number of bytes read, or <code>-1</code> to indicate
* the end of the compressed stream
*
* @throws CorruptedInputException
* @throws UnsupportedOptionsException
* @throws MemoryLimitException
*
* @throws XZIOException if the stream has been closed
*
* @throws IOException may be thrown by <code>in</code>
*/
public int read(byte[] buf, int off, int len) throws IOException {
if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
throw new IndexOutOfBoundsException();
if (len == 0)
return 0;
if (in == null)
throw new XZIOException("Stream closed");
if (exception != null)
throw exception;
int size = 0;
try {
if (seekNeeded)
seek();
if (endReached)
return -1;
while (len > 0) {
if (blockDecoder == null) {
seek();
if (endReached)
break;
}
int ret = blockDecoder.read(buf, off, len);
if (ret > 0) {
curPos += ret;
size += ret;
off += ret;
len -= ret;
} else if (ret == -1) {
blockDecoder = null;
}
}
} catch (IOException e) {
// We know that the file isn't simply truncated because we could
// parse the Indexes in the constructor. So convert EOFException
// to CorruptedInputException.
if (e instanceof EOFException)
e = new CorruptedInputException();
exception = e;
if (size == 0)
throw e;
}
return size;
}
/**
* Returns the number of uncompressed bytes that can be read
* without blocking. The value is returned with an assumption
* that the compressed input data will be valid. If the compressed
* data is corrupt, <code>CorruptedInputException</code> may get
* thrown before the number of bytes claimed to be available have
* been read from this input stream.
*
* @return the number of uncompressed bytes that can be read
* without blocking
*/
public int available() throws IOException {
if (in == null)
throw new XZIOException("Stream closed");
if (exception != null)
throw exception;
if (endReached || seekNeeded || blockDecoder == null)
return 0;
return blockDecoder.available();
}
/**
* Closes the stream and calls <code>in.close()</code>.
* If the stream was already closed, this does nothing.
*
* @throws IOException if thrown by <code>in.close()</code>
*/
public void close() throws IOException {
if (in != null) {
try {
in.close();
} finally {
in = null;
}
}
}
/**
* Gets the uncompressed size of this input stream. If there are multiple
* XZ Streams, the total uncompressed size of all XZ Streams is returned.
*/
public long length() {
return uncompressedSize;
}
/**
* Gets the current uncompressed position in this input stream.
*
* @throws XZIOException if the stream has been closed
*/
public long position() throws IOException {
if (in == null)
throw new XZIOException("Stream closed");
return seekNeeded ? seekPos : curPos;
}
/**
* Seeks to the specified absolute uncompressed position in the stream.
* This only stores the new position, so this function itself is always
* very fast. The actual seek is done when <code>read</code> is called
* to read at least one byte.
* <p>
* Seeking past the end of the stream is possible. In that case
* <code>read</code> will return <code>-1</code> to indicate
* the end of the stream.
*
* @param pos new uncompressed read position
*
* @throws XZIOException
* if <code>pos</code> is negative, or
* if stream has been closed
*/
public void seek(long pos) throws IOException {
if (in == null)
throw new XZIOException("Stream closed");
if (pos < 0)
throw new XZIOException("Negative seek position: " + pos);
seekPos = pos;
seekNeeded = true;
}
/**
* Seeks to the beginning of the given XZ Block.
*
* @throws XZIOException
* if <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
* <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>,
* or if stream has been closed
*
* @since 1.3
*/
public void seekToBlock(int blockNumber) throws IOException {
if (in == null)
throw new XZIOException("Stream closed");
if (blockNumber < 0 || blockNumber >= blockCount)
throw new XZIOException("Invalid XZ Block number: " + blockNumber);
// This is a bit silly implementation. Here we locate the uncompressed
// offset of the specified Block, then when doing the actual seek in
// seek(), we need to find the Block number based on seekPos.
seekPos = getBlockPos(blockNumber);
seekNeeded = true;
}
/**
* Does the actual seeking. This is also called when <code>read</code>
* needs a new Block to decode.
*/
private void seek() throws IOException {
// If seek(long) wasn't called, we simply need to get the next Block
// from the same Stream. If there are no more Blocks in this Stream,
// then we behave as if seek(long) had been called.
if (!seekNeeded) {
if (curBlockInfo.hasNext()) {
curBlockInfo.setNext();
initBlockDecoder();
return;
}
seekPos = curPos;
}
seekNeeded = false;
// Check if we are seeking to or past the end of the file.
if (seekPos >= uncompressedSize) {
curPos = seekPos;
blockDecoder = null;
endReached = true;
return;
}
endReached = false;
// Locate the Block that contains the uncompressed target position.
locateBlockByPos(curBlockInfo, seekPos);
// Seek in the underlying stream and create a new Block decoder
// only if really needed. We can skip it if the current position
// is already in the correct Block and the target position hasn't
// been decompressed yet.
//
// NOTE: If curPos points to the beginning of this Block, it's
// because it was left there after decompressing an earlier Block.
// In that case, decoding of the current Block hasn't been started
// yet. (Decoding of a Block won't be started until at least one
// byte will also be read from it.)
if (!(curPos > curBlockInfo.uncompressedOffset && curPos <= seekPos)) {
// Seek to the beginning of the Block.
in.seek(curBlockInfo.compressedOffset);
// Since it is possible that this Block is from a different
// Stream than the previous Block, initialize a new Check.
check = Check.getInstance(curBlockInfo.getCheckType());
// Create a new Block decoder.
initBlockDecoder();
curPos = curBlockInfo.uncompressedOffset;
}
// If the target wasn't at a Block boundary, decompress and throw
// away data to reach the target position.
if (seekPos > curPos) {
// NOTE: The "if" below is there just in case. In this situation,
// blockDecoder.skip will always skip the requested amount
// or throw an exception.
long skipAmount = seekPos - curPos;
if (blockDecoder.skip(skipAmount) != skipAmount)
throw new CorruptedInputException();
curPos = seekPos;
}
}
/**
* Locates the Block that contains the given uncompressed position.
*/
private void locateBlockByPos(BlockInfo info, long pos) {
if (pos < 0 || pos >= uncompressedSize)
throw new IndexOutOfBoundsException(
"Invalid uncompressed position: " + pos);
// Locate the Stream that contains the target position.
IndexDecoder index;
for (int i = 0; ; ++i) {
index = (IndexDecoder)streams.get(i);
if (index.hasUncompressedOffset(pos))
break;
}
// Locate the Block from the Stream that contains the target position.
index.locateBlock(info, pos);
assert (info.compressedOffset & 3) == 0;
assert info.uncompressedSize > 0;
assert pos >= info.uncompressedOffset;
assert pos < info.uncompressedOffset + info.uncompressedSize;
}
/**
* Locates the given Block and stores information about it
* to <code>info</code>.
*/
private void locateBlockByNumber(BlockInfo info, int blockNumber) {
// Validate.
if (blockNumber < 0 || blockNumber >= blockCount)
throw new IndexOutOfBoundsException(
"Invalid XZ Block number: " + blockNumber);
// Skip the search if info already points to the correct Block.
if (info.blockNumber == blockNumber)
return;
// Search the Stream that contains the given Block and then
// search the Block from that Stream.
for (int i = 0; ; ++i) {
IndexDecoder index = (IndexDecoder)streams.get(i);
if (index.hasRecord(blockNumber)) {
index.setBlockInfo(info, blockNumber);
return;
}
}
}
/**
* Initializes a new BlockInputStream. This is a helper function for
* <code>seek()</code>.
*/
private void initBlockDecoder() throws IOException {
try {
// Set it to null first so that GC can collect it if memory
// runs tight when initializing a new BlockInputStream.
blockDecoder = null;
blockDecoder = new BlockInputStream(in, check, memoryLimit,
curBlockInfo.unpaddedSize, curBlockInfo.uncompressedSize);
} catch (MemoryLimitException e) {
// BlockInputStream doesn't know how much memory we had
// already needed so we need to recreate the exception.
assert memoryLimit >= 0;
throw new MemoryLimitException(
e.getMemoryNeeded() + indexMemoryUsage,
memoryLimit + indexMemoryUsage);
} catch (IndexIndicatorException e) {
// It cannot be Index so the file must be corrupt.
throw new CorruptedInputException();
}
}
}

View File

@@ -0,0 +1,138 @@
/*
* SimpleInputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import java.io.IOException;
import org.tukaani.xz.simple.SimpleFilter;
class SimpleInputStream extends InputStream {
private static final int FILTER_BUF_SIZE = 4096;
private InputStream in;
private final SimpleFilter simpleFilter;
private final byte[] filterBuf = new byte[FILTER_BUF_SIZE];
private int pos = 0;
private int filtered = 0;
private int unfiltered = 0;
private boolean endReached = false;
private IOException exception = null;
private final byte[] tempBuf = new byte[1];
static int getMemoryUsage() {
return 1 + FILTER_BUF_SIZE / 1024;
}
SimpleInputStream(InputStream in, SimpleFilter simpleFilter) {
// Check for null because otherwise null isn't detect
// in this constructor.
if (in == null)
throw new NullPointerException();
// The simpleFilter argument comes from this package
// so it is known to be non-null already.
assert simpleFilter != null;
this.in = in;
this.simpleFilter = simpleFilter;
}
public int read() throws IOException {
return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
}
public int read(byte[] buf, int off, int len) throws IOException {
if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
throw new IndexOutOfBoundsException();
if (len == 0)
return 0;
if (in == null)
throw new XZIOException("Stream closed");
if (exception != null)
throw exception;
try {
int size = 0;
while (true) {
// Copy filtered data into the caller-provided buffer.
int copySize = Math.min(filtered, len);
System.arraycopy(filterBuf, pos, buf, off, copySize);
pos += copySize;
filtered -= copySize;
off += copySize;
len -= copySize;
size += copySize;
// If end of filterBuf was reached, move the pending data to
// the beginning of the buffer so that more data can be
// copied into filterBuf on the next loop iteration.
if (pos + filtered + unfiltered == FILTER_BUF_SIZE) {
System.arraycopy(filterBuf, pos, filterBuf, 0,
filtered + unfiltered);
pos = 0;
}
if (len == 0 || endReached)
return size > 0 ? size : -1;
assert filtered == 0;
// Get more data into the temporary buffer.
int inSize = FILTER_BUF_SIZE - (pos + filtered + unfiltered);
inSize = in.read(filterBuf, pos + filtered + unfiltered,
inSize);
if (inSize == -1) {
// Mark the remaining unfiltered bytes to be ready
// to be copied out.
endReached = true;
filtered = unfiltered;
unfiltered = 0;
} else {
// Filter the data in filterBuf.
unfiltered += inSize;
filtered = simpleFilter.code(filterBuf, pos, unfiltered);
assert filtered <= unfiltered;
unfiltered -= filtered;
}
}
} catch (IOException e) {
exception = e;
throw e;
}
}
public int available() throws IOException {
if (in == null)
throw new XZIOException("Stream closed");
if (exception != null)
throw exception;
return filtered;
}
public void close() throws IOException {
if (in != null) {
try {
in.close();
} finally {
in = null;
}
}
}
}

View File

@@ -0,0 +1,151 @@
/*
* SimpleOutputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.IOException;
import org.tukaani.xz.simple.SimpleFilter;
class SimpleOutputStream extends FinishableOutputStream {
private static final int FILTER_BUF_SIZE = 4096;
private FinishableOutputStream out;
private final SimpleFilter simpleFilter;
private final byte[] filterBuf = new byte[FILTER_BUF_SIZE];
private int pos = 0;
private int unfiltered = 0;
private IOException exception = null;
private boolean finished = false;
private final byte[] tempBuf = new byte[1];
static int getMemoryUsage() {
return 1 + FILTER_BUF_SIZE / 1024;
}
SimpleOutputStream(FinishableOutputStream out,
SimpleFilter simpleFilter) {
if (out == null)
throw new NullPointerException();
this.out = out;
this.simpleFilter = simpleFilter;
}
public void write(int b) throws IOException {
tempBuf[0] = (byte)b;
write(tempBuf, 0, 1);
}
public void write(byte[] buf, int off, int len) throws IOException {
if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
throw new IndexOutOfBoundsException();
if (exception != null)
throw exception;
if (finished)
throw new XZIOException("Stream finished or closed");
while (len > 0) {
// Copy more unfiltered data into filterBuf.
int copySize = Math.min(len, FILTER_BUF_SIZE - (pos + unfiltered));
System.arraycopy(buf, off, filterBuf, pos + unfiltered, copySize);
off += copySize;
len -= copySize;
unfiltered += copySize;
// Filter the data in filterBuf.
int filtered = simpleFilter.code(filterBuf, pos, unfiltered);
assert filtered <= unfiltered;
unfiltered -= filtered;
// Write out the filtered data.
try {
out.write(filterBuf, pos, filtered);
} catch (IOException e) {
exception = e;
throw e;
}
pos += filtered;
// If end of filterBuf was reached, move the pending unfiltered
// data to the beginning of the buffer so that more data can
// be copied into filterBuf on the next loop iteration.
if (pos + unfiltered == FILTER_BUF_SIZE) {
System.arraycopy(filterBuf, pos, filterBuf, 0, unfiltered);
pos = 0;
}
}
}
private void writePending() throws IOException {
assert !finished;
if (exception != null)
throw exception;
try {
out.write(filterBuf, pos, unfiltered);
} catch (IOException e) {
exception = e;
throw e;
}
finished = true;
}
public void flush() throws IOException {
throw new UnsupportedOptionsException("Flushing is not supported");
}
public void finish() throws IOException {
if (!finished) {
// If it fails, don't call out.finish().
writePending();
try {
out.finish();
} catch (IOException e) {
exception = e;
throw e;
}
}
}
public void close() throws IOException {
if (out != null) {
if (!finished) {
// out.close() must be called even if writePending() fails.
// writePending() saves the possible exception so we can
// ignore exceptions here.
try {
writePending();
} catch (IOException e) {}
}
try {
out.close();
} catch (IOException e) {
// If there is an earlier exception, the exception
// from out.close() is lost.
if (exception == null)
exception = e;
}
out = null;
}
if (exception != null)
throw exception;
}
}

View File

@@ -0,0 +1,318 @@
/*
* SingleXZInputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.EOFException;
import org.tukaani.xz.common.DecoderUtil;
import org.tukaani.xz.common.StreamFlags;
import org.tukaani.xz.index.IndexHash;
import org.tukaani.xz.check.Check;
/**
* Decompresses exactly one XZ Stream in streamed mode (no seeking).
* The decompression stops after the first XZ Stream has been decompressed,
* and the read position in the input stream is left at the first byte
* after the end of the XZ Stream. This can be useful when XZ data has
* been stored inside some other file format or protocol.
* <p>
* Unless you know what you are doing, don't use this class to decompress
* standalone .xz files. For that purpose, use <code>XZInputStream</code>.
*
* <h4>When uncompressed size is known beforehand</h4>
* <p>
* If you are decompressing complete XZ streams and your application knows
* exactly how much uncompressed data there should be, it is good to try
* reading one more byte by calling <code>read()</code> and checking
* that it returns <code>-1</code>. This way the decompressor will parse the
* file footers and verify the integrity checks, giving the caller more
* confidence that the uncompressed data is valid.
*
* @see XZInputStream
*/
public class SingleXZInputStream extends InputStream {
private InputStream in;
private int memoryLimit;
private StreamFlags streamHeaderFlags;
private Check check;
private BlockInputStream blockDecoder = null;
private final IndexHash indexHash = new IndexHash();
private boolean endReached = false;
private IOException exception = null;
private final byte[] tempBuf = new byte[1];
/**
* Creates a new XZ decompressor that decompresses exactly one
* XZ Stream from <code>in</code> without a memory usage limit.
* <p>
* This constructor reads and parses the XZ Stream Header (12 bytes)
* from <code>in</code>. The header of the first Block is not read
* until <code>read</code> is called.
*
* @param in input stream from which XZ-compressed
* data is read
*
* @throws XZFormatException
* input is not in the XZ format
*
* @throws CorruptedInputException
* XZ header CRC32 doesn't match
*
* @throws UnsupportedOptionsException
* XZ header is valid but specifies options
* not supported by this implementation
*
* @throws EOFException
* less than 12 bytes of input was available
* from <code>in</code>
*
* @throws IOException may be thrown by <code>in</code>
*/
public SingleXZInputStream(InputStream in) throws IOException {
initialize(in, -1);
}
/**
* Creates a new XZ decompressor that decompresses exactly one
* XZ Stream from <code>in</code> with an optional memory usage limit.
* <p>
* This is identical to <code>SingleXZInputStream(InputStream)</code>
* except that this takes also the <code>memoryLimit</code> argument.
*
* @param in input stream from which XZ-compressed
* data is read
*
* @param memoryLimit memory usage limit in kibibytes (KiB)
* or <code>-1</code> to impose no
* memory usage limit
*
* @throws XZFormatException
* input is not in the XZ format
*
* @throws CorruptedInputException
* XZ header CRC32 doesn't match
*
* @throws UnsupportedOptionsException
* XZ header is valid but specifies options
* not supported by this implementation
*
* @throws EOFException
* less than 12 bytes of input was available
* from <code>in</code>
*
* @throws IOException may be thrown by <code>in</code>
*/
public SingleXZInputStream(InputStream in, int memoryLimit)
throws IOException {
initialize(in, memoryLimit);
}
SingleXZInputStream(InputStream in, int memoryLimit,
byte[] streamHeader) throws IOException {
initialize(in, memoryLimit, streamHeader);
}
private void initialize(InputStream in, int memoryLimit)
throws IOException {
byte[] streamHeader = new byte[DecoderUtil.STREAM_HEADER_SIZE];
new DataInputStream(in).readFully(streamHeader);
initialize(in, memoryLimit, streamHeader);
}
private void initialize(InputStream in, int memoryLimit,
byte[] streamHeader) throws IOException {
this.in = in;
this.memoryLimit = memoryLimit;
streamHeaderFlags = DecoderUtil.decodeStreamHeader(streamHeader);
check = Check.getInstance(streamHeaderFlags.checkType);
}
/**
* Gets the ID of the integrity check used in this XZ Stream.
*
* @return the Check ID specified in the XZ Stream Header
*/
public int getCheckType() {
return streamHeaderFlags.checkType;
}
/**
* Gets the name of the integrity check used in this XZ Stream.
*
* @return the name of the check specified in the XZ Stream Header
*/
public String getCheckName() {
return check.getName();
}
/**
* Decompresses the next byte from this input stream.
* <p>
* Reading lots of data with <code>read()</code> from this input stream
* may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
* if you need to read lots of data one byte at a time.
*
* @return the next decompressed byte, or <code>-1</code>
* to indicate the end of the compressed stream
*
* @throws CorruptedInputException
* @throws UnsupportedOptionsException
* @throws MemoryLimitException
*
* @throws XZIOException if the stream has been closed
*
* @throws EOFException
* compressed input is truncated or corrupt
*
* @throws IOException may be thrown by <code>in</code>
*/
public int read() throws IOException {
return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
}
/**
* Decompresses into an array of bytes.
* <p>
* If <code>len</code> is zero, no bytes are read and <code>0</code>
* is returned. Otherwise this will try to decompress <code>len</code>
* bytes of uncompressed data. Less than <code>len</code> bytes may
* be read only in the following situations:
* <ul>
* <li>The end of the compressed data was reached successfully.</li>
* <li>An error is detected after at least one but less <code>len</code>
* bytes have already been successfully decompressed.
* The next call with non-zero <code>len</code> will immediately
* throw the pending exception.</li>
* <li>An exception is thrown.</li>
* </ul>
*
* @param buf target buffer for uncompressed data
* @param off start offset in <code>buf</code>
* @param len maximum number of uncompressed bytes to read
*
* @return number of bytes read, or <code>-1</code> to indicate
* the end of the compressed stream
*
* @throws CorruptedInputException
* @throws UnsupportedOptionsException
* @throws MemoryLimitException
*
* @throws XZIOException if the stream has been closed
*
* @throws EOFException
* compressed input is truncated or corrupt
*
* @throws IOException may be thrown by <code>in</code>
*/
public int read(byte[] buf, int off, int len) throws IOException {
if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
throw new IndexOutOfBoundsException();
if (len == 0)
return 0;
if (in == null)
throw new XZIOException("Stream closed");
if (exception != null)
throw exception;
if (endReached)
return -1;
int size = 0;
try {
while (len > 0) {
if (blockDecoder == null) {
try {
blockDecoder = new BlockInputStream(
in, check, memoryLimit, -1, -1);
} catch (IndexIndicatorException e) {
indexHash.validate(in);
validateStreamFooter();
endReached = true;
return size > 0 ? size : -1;
}
}
int ret = blockDecoder.read(buf, off, len);
if (ret > 0) {
size += ret;
off += ret;
len -= ret;
} else if (ret == -1) {
indexHash.add(blockDecoder.getUnpaddedSize(),
blockDecoder.getUncompressedSize());
blockDecoder = null;
}
}
} catch (IOException e) {
exception = e;
if (size == 0)
throw e;
}
return size;
}
private void validateStreamFooter() throws IOException {
byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
new DataInputStream(in).readFully(buf);
StreamFlags streamFooterFlags = DecoderUtil.decodeStreamFooter(buf);
if (!DecoderUtil.areStreamFlagsEqual(streamHeaderFlags,
streamFooterFlags)
|| indexHash.getIndexSize() != streamFooterFlags.backwardSize)
throw new CorruptedInputException(
"XZ Stream Footer does not match Stream Header");
}
/**
* Returns the number of uncompressed bytes that can be read
* without blocking. The value is returned with an assumption
* that the compressed input data will be valid. If the compressed
* data is corrupt, <code>CorruptedInputException</code> may get
* thrown before the number of bytes claimed to be available have
* been read from this input stream.
*
* @return the number of uncompressed bytes that can be read
* without blocking
*/
public int available() throws IOException {
if (in == null)
throw new XZIOException("Stream closed");
if (exception != null)
throw exception;
return blockDecoder == null ? 0 : blockDecoder.available();
}
/**
* Closes the stream and calls <code>in.close()</code>.
* If the stream was already closed, this does nothing.
*
* @throws IOException if thrown by <code>in.close()</code>
*/
public void close() throws IOException {
if (in != null) {
try {
in.close();
} finally {
in = null;
}
}
}
}

View File

@@ -0,0 +1,153 @@
/*
* UncompressedLZMA2OutputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.DataOutputStream;
import java.io.IOException;
class UncompressedLZMA2OutputStream extends FinishableOutputStream {
private FinishableOutputStream out;
private final DataOutputStream outData;
private final byte[] uncompBuf
= new byte[LZMA2OutputStream.COMPRESSED_SIZE_MAX];
private int uncompPos = 0;
private boolean dictResetNeeded = true;
private boolean finished = false;
private IOException exception = null;
private final byte[] tempBuf = new byte[1];
static int getMemoryUsage() {
// uncompBuf + a little extra
return 70;
}
UncompressedLZMA2OutputStream(FinishableOutputStream out) {
if (out == null)
throw new NullPointerException();
this.out = out;
outData = new DataOutputStream(out);
}
public void write(int b) throws IOException {
tempBuf[0] = (byte)b;
write(tempBuf, 0, 1);
}
public void write(byte[] buf, int off, int len) throws IOException {
if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
throw new IndexOutOfBoundsException();
if (exception != null)
throw exception;
if (finished)
throw new XZIOException("Stream finished or closed");
try {
while (len > 0) {
int copySize = Math.min(uncompBuf.length - uncompPos, len);
System.arraycopy(buf, off, uncompBuf, uncompPos, copySize);
len -= copySize;
uncompPos += copySize;
if (uncompPos == uncompBuf.length)
writeChunk();
}
} catch (IOException e) {
exception = e;
throw e;
}
}
private void writeChunk() throws IOException {
outData.writeByte(dictResetNeeded ? 0x01 : 0x02);
outData.writeShort(uncompPos - 1);
outData.write(uncompBuf, 0, uncompPos);
uncompPos = 0;
dictResetNeeded = false;
}
private void writeEndMarker() throws IOException {
if (exception != null)
throw exception;
if (finished)
throw new XZIOException("Stream finished or closed");
try {
if (uncompPos > 0)
writeChunk();
out.write(0x00);
} catch (IOException e) {
exception = e;
throw e;
}
}
public void flush() throws IOException {
if (exception != null)
throw exception;
if (finished)
throw new XZIOException("Stream finished or closed");
try {
if (uncompPos > 0)
writeChunk();
out.flush();
} catch (IOException e) {
exception = e;
throw e;
}
}
public void finish() throws IOException {
if (!finished) {
writeEndMarker();
try {
out.finish();
} catch (IOException e) {
exception = e;
throw e;
}
finished = true;
}
}
public void close() throws IOException {
if (out != null) {
if (!finished) {
try {
writeEndMarker();
} catch (IOException e) {}
}
try {
out.close();
} catch (IOException e) {
if (exception == null)
exception = e;
}
out = null;
}
if (exception != null)
throw exception;
}
}

View File

@@ -0,0 +1,34 @@
/*
* UnsupportedOptionsException
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
/**
* Thrown when compression options not supported by this implementation
* are detected. Some other implementation might support those options.
*/
public class UnsupportedOptionsException extends XZIOException {
private static final long serialVersionUID = 3L;
/**
* Creates a new UnsupportedOptionsException with null
* as its error detail message.
*/
public UnsupportedOptionsException() {}
/**
* Creates a new UnsupportedOptionsException with the given
* error detail message.
*
* @param s error detail message
*/
public UnsupportedOptionsException(String s) {
super(s);
}
}

View File

@@ -0,0 +1,36 @@
/*
* X86Options
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import org.tukaani.xz.simple.X86;
/**
* BCJ filter for x86 (32-bit and 64-bit) instructions.
*/
public class X86Options extends BCJOptions {
private static final int ALIGNMENT = 1;
public X86Options() {
super(ALIGNMENT);
}
public FinishableOutputStream getOutputStream(FinishableOutputStream out) {
return new SimpleOutputStream(out, new X86(true, startOffset));
}
public InputStream getInputStream(InputStream in) {
return new SimpleInputStream(in, new X86(false, startOffset));
}
FilterEncoder getFilterEncoder() {
return new BCJEncoder(this, BCJCoder.X86_FILTER_ID);
}
}

View File

@@ -0,0 +1,53 @@
/*
* XZ
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
/**
* XZ constants.
*/
public class XZ {
/**
* XZ Header Magic Bytes begin a XZ file.
* This can be useful to detect XZ compressed data.
*/
public static final byte[] HEADER_MAGIC = {
(byte)0xFD, '7', 'z', 'X', 'Z', '\0' };
/**
* XZ Footer Magic Bytes are the last bytes of a XZ Stream.
*/
public static final byte[] FOOTER_MAGIC = { 'Y', 'Z' };
/**
* Integrity check ID indicating that no integrity check is calculated.
* <p>
* Omitting the integrity check is strongly discouraged except when
* the integrity of the data will be verified by other means anyway,
* and calculating the check twice would be useless.
*/
public static final int CHECK_NONE = 0;
/**
* Integrity check ID for CRC32.
*/
public static final int CHECK_CRC32 = 1;
/**
* Integrity check ID for CRC64.
*/
public static final int CHECK_CRC64 = 4;
/**
* Integrity check ID for SHA-256.
*/
public static final int CHECK_SHA256 = 10;
private XZ() {}
}

View File

@@ -0,0 +1,24 @@
/*
* XZFormatException
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
/**
* Thrown when the input data is not in the XZ format.
*/
public class XZFormatException extends XZIOException {
private static final long serialVersionUID = 3L;
/**
* Creates a new exception with the default error detail message.
*/
public XZFormatException() {
super("Input is not in the XZ format");
}
}

View File

@@ -0,0 +1,27 @@
/*
* XZIOException
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
/**
* Generic {@link java.io.IOException IOException} specific to this package.
* The other IOExceptions in this package extend
* from <code>XZIOException</code>.
*/
public class XZIOException extends java.io.IOException {
private static final long serialVersionUID = 3L;
public XZIOException() {
super();
}
public XZIOException(String s) {
super(s);
}
}

View File

@@ -0,0 +1,313 @@
/*
* XZInputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.EOFException;
import org.tukaani.xz.common.DecoderUtil;
/**
* Decompresses a .xz file in streamed mode (no seeking).
* <p>
* Use this to decompress regular standalone .xz files. This reads from
* its input stream until the end of the input or until an error occurs.
* This supports decompressing concatenated .xz files.
*
* <h4>Typical use cases</h4>
* <p>
* Getting an input stream to decompress a .xz file:
* <p><blockquote><pre>
* InputStream infile = new FileInputStream("foo.xz");
* XZInputStream inxz = new XZInputStream(infile);
* </pre></blockquote>
* <p>
* It's important to keep in mind that decompressor memory usage depends
* on the settings used to compress the file. The worst-case memory usage
* of XZInputStream is currently 1.5&nbsp;GiB. Still, very few files will
* require more than about 65&nbsp;MiB because that's how much decompressing
* a file created with the highest preset level will need, and only a few
* people use settings other than the predefined presets.
* <p>
* It is possible to specify a memory usage limit for
* <code>XZInputStream</code>. If decompression requires more memory than
* the specified limit, MemoryLimitException will be thrown when reading
* from the stream. For example, the following sets the memory usage limit
* to 100&nbsp;MiB:
* <p><blockquote><pre>
* InputStream infile = new FileInputStream("foo.xz");
* XZInputStream inxz = new XZInputStream(infile, 100 * 1024);
* </pre></blockquote>
*
* <h4>When uncompressed size is known beforehand</h4>
* <p>
* If you are decompressing complete files and your application knows
* exactly how much uncompressed data there should be, it is good to try
* reading one more byte by calling <code>read()</code> and checking
* that it returns <code>-1</code>. This way the decompressor will parse the
* file footers and verify the integrity checks, giving the caller more
* confidence that the uncompressed data is valid. (This advice seems to
* apply to
* {@link java.util.zip.GZIPInputStream java.util.zip.GZIPInputStream} too.)
*
* @see SingleXZInputStream
*/
public class XZInputStream extends InputStream {
private final int memoryLimit;
private InputStream in;
private SingleXZInputStream xzIn;
private boolean endReached = false;
private IOException exception = null;
private final byte[] tempBuf = new byte[1];
/**
* Creates a new XZ decompressor without a memory usage limit.
* <p>
* This constructor reads and parses the XZ Stream Header (12 bytes)
* from <code>in</code>. The header of the first Block is not read
* until <code>read</code> is called.
*
* @param in input stream from which XZ-compressed
* data is read
*
* @throws XZFormatException
* input is not in the XZ format
*
* @throws CorruptedInputException
* XZ header CRC32 doesn't match
*
* @throws UnsupportedOptionsException
* XZ header is valid but specifies options
* not supported by this implementation
*
* @throws EOFException
* less than 12 bytes of input was available
* from <code>in</code>
*
* @throws IOException may be thrown by <code>in</code>
*/
public XZInputStream(InputStream in) throws IOException {
this(in, -1);
}
/**
* Creates a new XZ decompressor with an optional memory usage limit.
* <p>
* This is identical to <code>XZInputStream(InputStream)</code> except
* that this takes also the <code>memoryLimit</code> argument.
*
* @param in input stream from which XZ-compressed
* data is read
*
* @param memoryLimit memory usage limit in kibibytes (KiB)
* or <code>-1</code> to impose no
* memory usage limit
*
* @throws XZFormatException
* input is not in the XZ format
*
* @throws CorruptedInputException
* XZ header CRC32 doesn't match
*
* @throws UnsupportedOptionsException
* XZ header is valid but specifies options
* not supported by this implementation
*
* @throws EOFException
* less than 12 bytes of input was available
* from <code>in</code>
*
* @throws IOException may be thrown by <code>in</code>
*/
public XZInputStream(InputStream in, int memoryLimit) throws IOException {
this.in = in;
this.memoryLimit = memoryLimit;
this.xzIn = new SingleXZInputStream(in, memoryLimit);
}
/**
* Decompresses the next byte from this input stream.
* <p>
* Reading lots of data with <code>read()</code> from this input stream
* may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
* if you need to read lots of data one byte at a time.
*
* @return the next decompressed byte, or <code>-1</code>
* to indicate the end of the compressed stream
*
* @throws CorruptedInputException
* @throws UnsupportedOptionsException
* @throws MemoryLimitException
*
* @throws XZIOException if the stream has been closed
*
* @throws EOFException
* compressed input is truncated or corrupt
*
* @throws IOException may be thrown by <code>in</code>
*/
public int read() throws IOException {
return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
}
/**
* Decompresses into an array of bytes.
* <p>
* If <code>len</code> is zero, no bytes are read and <code>0</code>
* is returned. Otherwise this will try to decompress <code>len</code>
* bytes of uncompressed data. Less than <code>len</code> bytes may
* be read only in the following situations:
* <ul>
* <li>The end of the compressed data was reached successfully.</li>
* <li>An error is detected after at least one but less <code>len</code>
* bytes have already been successfully decompressed.
* The next call with non-zero <code>len</code> will immediately
* throw the pending exception.</li>
* <li>An exception is thrown.</li>
* </ul>
*
* @param buf target buffer for uncompressed data
* @param off start offset in <code>buf</code>
* @param len maximum number of uncompressed bytes to read
*
* @return number of bytes read, or <code>-1</code> to indicate
* the end of the compressed stream
*
* @throws CorruptedInputException
* @throws UnsupportedOptionsException
* @throws MemoryLimitException
*
* @throws XZIOException if the stream has been closed
*
* @throws EOFException
* compressed input is truncated or corrupt
*
* @throws IOException may be thrown by <code>in</code>
*/
public int read(byte[] buf, int off, int len) throws IOException {
if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
throw new IndexOutOfBoundsException();
if (len == 0)
return 0;
if (in == null)
throw new XZIOException("Stream closed");
if (exception != null)
throw exception;
if (endReached)
return -1;
int size = 0;
try {
while (len > 0) {
if (xzIn == null) {
prepareNextStream();
if (endReached)
return size == 0 ? -1 : size;
}
int ret = xzIn.read(buf, off, len);
if (ret > 0) {
size += ret;
off += ret;
len -= ret;
} else if (ret == -1) {
xzIn = null;
}
}
} catch (IOException e) {
exception = e;
if (size == 0)
throw e;
}
return size;
}
private void prepareNextStream() throws IOException {
DataInputStream inData = new DataInputStream(in);
byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
// The size of Stream Padding must be a multiple of four bytes,
// all bytes zero.
do {
// First try to read one byte to see if we have reached the end
// of the file.
int ret = inData.read(buf, 0, 1);
if (ret == -1) {
endReached = true;
return;
}
// Since we got one byte of input, there must be at least
// three more available in a valid file.
inData.readFully(buf, 1, 3);
} while (buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0);
// Not all bytes are zero. In a valid Stream it indicates the
// beginning of the next Stream. Read the rest of the Stream Header
// and initialize the XZ decoder.
inData.readFully(buf, 4, DecoderUtil.STREAM_HEADER_SIZE - 4);
try {
xzIn = new SingleXZInputStream(in, memoryLimit, buf);
} catch (XZFormatException e) {
// Since this isn't the first .xz Stream, it is more
// logical to tell that the data is corrupt.
throw new CorruptedInputException(
"Garbage after a valid XZ Stream");
}
}
/**
* Returns the number of uncompressed bytes that can be read
* without blocking. The value is returned with an assumption
* that the compressed input data will be valid. If the compressed
* data is corrupt, <code>CorruptedInputException</code> may get
* thrown before the number of bytes claimed to be available have
* been read from this input stream.
*
* @return the number of uncompressed bytes that can be read
* without blocking
*/
public int available() throws IOException {
if (in == null)
throw new XZIOException("Stream closed");
if (exception != null)
throw exception;
return xzIn == null ? 0 : xzIn.available();
}
/**
* Closes the stream and calls <code>in.close()</code>.
* If the stream was already closed, this does nothing.
*
* @throws IOException if thrown by <code>in.close()</code>
*/
public void close() throws IOException {
if (in != null) {
try {
in.close();
} finally {
in = null;
}
}
}
}

View File

@@ -0,0 +1,488 @@
/*
* XZOutputStream
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.OutputStream;
import java.io.IOException;
import org.tukaani.xz.common.EncoderUtil;
import org.tukaani.xz.common.StreamFlags;
import org.tukaani.xz.check.Check;
import org.tukaani.xz.index.IndexEncoder;
/**
* Compresses into the .xz file format.
*
* <h4>Examples</h4>
* <p>
* Getting an output stream to compress with LZMA2 using the default
* settings and the default integrity check type (CRC64):
* <p><blockquote><pre>
* FileOutputStream outfile = new FileOutputStream("foo.xz");
* XZOutputStream outxz = new XZOutputStream(outfile, new LZMA2Options());
* </pre></blockquote>
* <p>
* Using the preset level <code>8</code> for LZMA2 (the default
* is <code>6</code>) and SHA-256 instead of CRC64 for integrity checking:
* <p><blockquote><pre>
* XZOutputStream outxz = new XZOutputStream(outfile, new LZMA2Options(8),
* XZ.CHECK_SHA256);
* </pre></blockquote>
* <p>
* Using the x86 BCJ filter together with LZMA2 to compress x86 executables
* and printing the memory usage information before creating the
* XZOutputStream:
* <p><blockquote><pre>
* X86Options x86 = new X86Options();
* LZMA2Options lzma2 = new LZMA2Options();
* FilterOptions[] options = { x86, lzma2 };
* System.out.println("Encoder memory usage: "
* + FilterOptions.getEncoderMemoryUsage(options)
* + " KiB");
* System.out.println("Decoder memory usage: "
* + FilterOptions.getDecoderMemoryUsage(options)
* + " KiB");
* XZOutputStream outxz = new XZOutputStream(outfile, options);
* </pre></blockquote>
*/
public class XZOutputStream extends FinishableOutputStream {
private OutputStream out;
private final StreamFlags streamFlags = new StreamFlags();
private final Check check;
private final IndexEncoder index = new IndexEncoder();
private BlockOutputStream blockEncoder = null;
private FilterEncoder[] filters;
/**
* True if the current filter chain supports flushing.
* If it doesn't support flushing, <code>flush()</code>
* will use <code>endBlock()</code> as a fallback.
*/
private boolean filtersSupportFlushing;
private IOException exception = null;
private boolean finished = false;
private final byte[] tempBuf = new byte[1];
/**
* Creates a new XZ compressor using one filter and CRC64 as
* the integrity check. This constructor is equivalent to passing
* a single-member FilterOptions array to
* <code>XZOutputStream(OutputStream, FilterOptions[])</code>.
*
* @param out output stream to which the compressed data
* will be written
*
* @param filterOptions
* filter options to use
*
* @throws UnsupportedOptionsException
* invalid filter chain
*
* @throws IOException may be thrown from <code>out</code>
*/
public XZOutputStream(OutputStream out, FilterOptions filterOptions)
throws IOException {
this(out, filterOptions, XZ.CHECK_CRC64);
}
/**
* Creates a new XZ compressor using one filter and the specified
* integrity check type. This constructor is equivalent to
* passing a single-member FilterOptions array to
* <code>XZOutputStream(OutputStream, FilterOptions[], int)</code>.
*
* @param out output stream to which the compressed data
* will be written
*
* @param filterOptions
* filter options to use
*
* @param checkType type of the integrity check,
* for example XZ.CHECK_CRC32
*
* @throws UnsupportedOptionsException
* invalid filter chain
*
* @throws IOException may be thrown from <code>out</code>
*/
public XZOutputStream(OutputStream out, FilterOptions filterOptions,
int checkType) throws IOException {
this(out, new FilterOptions[] { filterOptions }, checkType);
}
/**
* Creates a new XZ compressor using 1-4 filters and CRC64 as
* the integrity check. This constructor is equivalent
* <code>XZOutputStream(out, filterOptions, XZ.CHECK_CRC64)</code>.
*
* @param out output stream to which the compressed data
* will be written
*
* @param filterOptions
* array of filter options to use
*
* @throws UnsupportedOptionsException
* invalid filter chain
*
* @throws IOException may be thrown from <code>out</code>
*/
public XZOutputStream(OutputStream out, FilterOptions[] filterOptions)
throws IOException {
this(out, filterOptions, XZ.CHECK_CRC64);
}
/**
* Creates a new XZ compressor using 1-4 filters and the specified
* integrity check type.
*
* @param out output stream to which the compressed data
* will be written
*
* @param filterOptions
* array of filter options to use
*
* @param checkType type of the integrity check,
* for example XZ.CHECK_CRC32
*
* @throws UnsupportedOptionsException
* invalid filter chain
*
* @throws IOException may be thrown from <code>out</code>
*/
public XZOutputStream(OutputStream out, FilterOptions[] filterOptions,
int checkType) throws IOException {
this.out = out;
updateFilters(filterOptions);
streamFlags.checkType = checkType;
check = Check.getInstance(checkType);
encodeStreamHeader();
}
/**
* Updates the filter chain with a single filter.
* This is equivalent to passing a single-member FilterOptions array
* to <code>updateFilters(FilterOptions[])</code>.
*
* @param filterOptions
* new filter to use
*
* @throws UnsupportedOptionsException
* unsupported filter chain, or trying to change
* the filter chain in the middle of a Block
*/
public void updateFilters(FilterOptions filterOptions)
throws XZIOException {
FilterOptions[] opts = new FilterOptions[1];
opts[0] = filterOptions;
updateFilters(opts);
}
/**
* Updates the filter chain with 1-4 filters.
* <p>
* Currently this cannot be used to update e.g. LZMA2 options in the
* middle of a XZ Block. Use <code>endBlock()</code> to finish the
* current XZ Block before calling this function. The new filter chain
* will then be used for the next XZ Block.
*
* @param filterOptions
* new filter chain to use
*
* @throws UnsupportedOptionsException
* unsupported filter chain, or trying to change
* the filter chain in the middle of a Block
*/
public void updateFilters(FilterOptions[] filterOptions)
throws XZIOException {
if (blockEncoder != null)
throw new UnsupportedOptionsException("Changing filter options "
+ "in the middle of a XZ Block not implemented");
if (filterOptions.length < 1 || filterOptions.length > 4)
throw new UnsupportedOptionsException(
"XZ filter chain must be 1-4 filters");
filtersSupportFlushing = true;
FilterEncoder[] newFilters = new FilterEncoder[filterOptions.length];
for (int i = 0; i < filterOptions.length; ++i) {
newFilters[i] = filterOptions[i].getFilterEncoder();
filtersSupportFlushing &= newFilters[i].supportsFlushing();
}
RawCoder.validate(newFilters);
filters = newFilters;
}
/**
* Writes one byte to be compressed.
*
* @throws XZIOException
* XZ Stream has grown too big
*
* @throws XZIOException
* <code>finish()</code> or <code>close()</code>
* was already called
*
* @throws IOException may be thrown by the underlying output stream
*/
public void write(int b) throws IOException {
tempBuf[0] = (byte)b;
write(tempBuf, 0, 1);
}
/**
* Writes an array of bytes to be compressed.
* The compressors tend to do internal buffering and thus the written
* data won't be readable from the compressed output immediately.
* Use <code>flush()</code> to force everything written so far to
* be written to the underlaying output stream, but be aware that
* flushing reduces compression ratio.
*
* @param buf buffer of bytes to be written
* @param off start offset in <code>buf</code>
* @param len number of bytes to write
*
* @throws XZIOException
* XZ Stream has grown too big: total file size
* about 8&nbsp;EiB or the Index field exceeds
* 16&nbsp;GiB; you shouldn't reach these sizes
* in practice
*
* @throws XZIOException
* <code>finish()</code> or <code>close()</code>
* was already called and len &gt; 0
*
* @throws IOException may be thrown by the underlying output stream
*/
public void write(byte[] buf, int off, int len) throws IOException {
if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
throw new IndexOutOfBoundsException();
if (exception != null)
throw exception;
if (finished)
throw new XZIOException("Stream finished or closed");
try {
if (blockEncoder == null)
blockEncoder = new BlockOutputStream(out, filters, check);
blockEncoder.write(buf, off, len);
} catch (IOException e) {
exception = e;
throw e;
}
}
/**
* Finishes the current XZ Block (but not the whole XZ Stream).
* This doesn't flush the stream so it's possible that not all data will
* be decompressible from the output stream when this function returns.
* Call also <code>flush()</code> if flushing is wanted in addition to
* finishing the current XZ Block.
* <p>
* If there is no unfinished Block open, this function will do nothing.
* (No empty XZ Block will be created.)
* <p>
* This function can be useful, for example, to create
* random-accessible .xz files.
* <p>
* Starting a new XZ Block means that the encoder state is reset.
* Doing this very often will increase the size of the compressed
* file a lot (more than plain <code>flush()</code> would do).
*
* @throws XZIOException
* XZ Stream has grown too big
*
* @throws XZIOException
* stream finished or closed
*
* @throws IOException may be thrown by the underlying output stream
*/
public void endBlock() throws IOException {
if (exception != null)
throw exception;
if (finished)
throw new XZIOException("Stream finished or closed");
// NOTE: Once there is threading with multiple Blocks, it's possible
// that this function will be more like a barrier that returns
// before the last Block has been finished.
if (blockEncoder != null) {
try {
blockEncoder.finish();
index.add(blockEncoder.getUnpaddedSize(),
blockEncoder.getUncompressedSize());
blockEncoder = null;
} catch (IOException e) {
exception = e;
throw e;
}
}
}
/**
* Flushes the encoder and calls <code>out.flush()</code>.
* All buffered pending data will then be decompressible from
* the output stream.
* <p>
* Calling this function very often may increase the compressed
* file size a lot. The filter chain options may affect the size
* increase too. For example, with LZMA2 the HC4 match finder has
* smaller penalty with flushing than BT4.
* <p>
* Some filters don't support flushing. If the filter chain has
* such a filter, <code>flush()</code> will call <code>endBlock()</code>
* before flushing.
*
* @throws XZIOException
* XZ Stream has grown too big
*
* @throws XZIOException
* stream finished or closed
*
* @throws IOException may be thrown by the underlying output stream
*/
public void flush() throws IOException {
if (exception != null)
throw exception;
if (finished)
throw new XZIOException("Stream finished or closed");
try {
if (blockEncoder != null) {
if (filtersSupportFlushing) {
// This will eventually call out.flush() so
// no need to do it here again.
blockEncoder.flush();
} else {
endBlock();
out.flush();
}
} else {
out.flush();
}
} catch (IOException e) {
exception = e;
throw e;
}
}
/**
* Finishes compression without closing the underlying stream.
* No more data can be written to this stream after finishing
* (calling <code>write</code> with an empty buffer is OK).
* <p>
* Repeated calls to <code>finish()</code> do nothing unless
* an exception was thrown by this stream earlier. In that case
* the same exception is thrown again.
* <p>
* After finishing, the stream may be closed normally with
* <code>close()</code>. If the stream will be closed anyway, there
* usually is no need to call <code>finish()</code> separately.
*
* @throws XZIOException
* XZ Stream has grown too big
*
* @throws IOException may be thrown by the underlying output stream
*/
public void finish() throws IOException {
if (!finished) {
// This checks for pending exceptions so we don't need to
// worry about it here.
endBlock();
try {
index.encode(out);
encodeStreamFooter();
} catch (IOException e) {
exception = e;
throw e;
}
// Set it to true only if everything goes fine. Setting it earlier
// would cause repeated calls to finish() do nothing instead of
// throwing an exception to indicate an earlier error.
finished = true;
}
}
/**
* Finishes compression and closes the underlying stream.
* The underlying stream <code>out</code> is closed even if finishing
* fails. If both finishing and closing fail, the exception thrown
* by <code>finish()</code> is thrown and the exception from the failed
* <code>out.close()</code> is lost.
*
* @throws XZIOException
* XZ Stream has grown too big
*
* @throws IOException may be thrown by the underlying output stream
*/
public void close() throws IOException {
if (out != null) {
// If finish() throws an exception, it stores the exception to
// the variable "exception". So we can ignore the possible
// exception here.
try {
finish();
} catch (IOException e) {}
try {
out.close();
} catch (IOException e) {
// Remember the exception but only if there is no previous
// pending exception.
if (exception == null)
exception = e;
}
out = null;
}
if (exception != null)
throw exception;
}
private void encodeStreamFlags(byte[] buf, int off) {
buf[off] = 0x00;
buf[off + 1] = (byte)streamFlags.checkType;
}
private void encodeStreamHeader() throws IOException {
out.write(XZ.HEADER_MAGIC);
byte[] buf = new byte[2];
encodeStreamFlags(buf, 0);
out.write(buf);
EncoderUtil.writeCRC32(out, buf);
}
private void encodeStreamFooter() throws IOException {
byte[] buf = new byte[6];
long backwardSize = index.getIndexSize() / 4 - 1;
for (int i = 0; i < 4; ++i)
buf[i] = (byte)(backwardSize >>> (i * 8));
encodeStreamFlags(buf, 4);
EncoderUtil.writeCRC32(out, buf);
out.write(buf);
out.write(XZ.FOOTER_MAGIC);
}
}

View File

@@ -0,0 +1,33 @@
/*
* CRC32
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.check;
public class CRC32 extends Check {
private final java.util.zip.CRC32 state = new java.util.zip.CRC32();
public CRC32() {
size = 4;
name = "CRC32";
}
public void update(byte[] buf, int off, int len) {
state.update(buf, off, len);
}
public byte[] finish() {
long value = state.getValue();
byte[] buf = { (byte)(value),
(byte)(value >>> 8),
(byte)(value >>> 16),
(byte)(value >>> 24) };
state.reset();
return buf;
}
}

View File

@@ -0,0 +1,54 @@
/*
* CRC64
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.check;
public class CRC64 extends Check {
private static final long poly = 0xC96C5795D7870F42L;
private static final long[] crcTable = new long[256];
private long crc = -1;
static {
for (int b = 0; b < crcTable.length; ++b) {
long r = b;
for (int i = 0; i < 8; ++i) {
if ((r & 1) == 1)
r = (r >>> 1) ^ poly;
else
r >>>= 1;
}
crcTable[b] = r;
}
}
public CRC64() {
size = 8;
name = "CRC64";
}
public void update(byte[] buf, int off, int len) {
int end = off + len;
while (off < end)
crc = crcTable[(buf[off++] ^ (int)crc) & 0xFF] ^ (crc >>> 8);
}
public byte[] finish() {
long value = ~crc;
crc = -1;
byte[] buf = new byte[8];
for (int i = 0; i < buf.length; ++i)
buf[i] = (byte)(value >> (i * 8));
return buf;
}
}

View File

@@ -0,0 +1,57 @@
/*
* Check
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.check;
import org.tukaani.xz.XZ;
import org.tukaani.xz.UnsupportedOptionsException;
public abstract class Check {
int size;
String name;
public abstract void update(byte[] buf, int off, int len);
public abstract byte[] finish();
public void update(byte[] buf) {
update(buf, 0, buf.length);
}
public int getSize() {
return size;
}
public String getName() {
return name;
}
public static Check getInstance(int checkType)
throws UnsupportedOptionsException {
switch (checkType) {
case XZ.CHECK_NONE:
return new None();
case XZ.CHECK_CRC32:
return new CRC32();
case XZ.CHECK_CRC64:
return new CRC64();
case XZ.CHECK_SHA256:
try {
return new SHA256();
} catch (java.security.NoSuchAlgorithmException e) {}
break;
}
throw new UnsupportedOptionsException(
"Unsupported Check ID " + checkType);
}
}

View File

@@ -0,0 +1,24 @@
/*
* None
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.check;
public class None extends Check {
public None() {
size = 0;
name = "None";
}
public void update(byte[] buf, int off, int len) {}
public byte[] finish() {
byte[] empty = new byte[0];
return empty;
}
}

View File

@@ -0,0 +1,30 @@
/*
* SHA256
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.check;
public class SHA256 extends Check {
private final java.security.MessageDigest sha256;
public SHA256() throws java.security.NoSuchAlgorithmException {
size = 32;
name = "SHA-256";
sha256 = java.security.MessageDigest.getInstance("SHA-256");
}
public void update(byte[] buf, int off, int len) {
sha256.update(buf, off, len);
}
public byte[] finish() {
byte[] buf = sha256.digest();
sha256.reset();
return buf;
}
}

View File

@@ -0,0 +1,121 @@
/*
* DecoderUtil
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.common;
import java.io.InputStream;
import java.io.IOException;
import java.io.EOFException;
import java.util.zip.CRC32;
import org.tukaani.xz.XZ;
import org.tukaani.xz.XZFormatException;
import org.tukaani.xz.CorruptedInputException;
import org.tukaani.xz.UnsupportedOptionsException;
public class DecoderUtil extends Util {
public static boolean isCRC32Valid(byte[] buf, int off, int len,
int ref_off) {
CRC32 crc32 = new CRC32();
crc32.update(buf, off, len);
long value = crc32.getValue();
for (int i = 0; i < 4; ++i)
if ((byte)(value >>> (i * 8)) != buf[ref_off + i])
return false;
return true;
}
public static StreamFlags decodeStreamHeader(byte[] buf)
throws IOException {
for (int i = 0; i < XZ.HEADER_MAGIC.length; ++i)
if (buf[i] != XZ.HEADER_MAGIC[i])
throw new XZFormatException();
if (!isCRC32Valid(buf, XZ.HEADER_MAGIC.length, 2,
XZ.HEADER_MAGIC.length + 2))
throw new CorruptedInputException("XZ Stream Header is corrupt");
try {
return decodeStreamFlags(buf, XZ.HEADER_MAGIC.length);
} catch (UnsupportedOptionsException e) {
throw new UnsupportedOptionsException(
"Unsupported options in XZ Stream Header");
}
}
public static StreamFlags decodeStreamFooter(byte[] buf)
throws IOException {
if (buf[10] != XZ.FOOTER_MAGIC[0] || buf[11] != XZ.FOOTER_MAGIC[1]) {
// NOTE: The exception could be XZFormatException too.
// It depends on the situation which one is better.
throw new CorruptedInputException("XZ Stream Footer is corrupt");
}
if (!isCRC32Valid(buf, 4, 6, 0))
throw new CorruptedInputException("XZ Stream Footer is corrupt");
StreamFlags streamFlags;
try {
streamFlags = decodeStreamFlags(buf, 8);
} catch (UnsupportedOptionsException e) {
throw new UnsupportedOptionsException(
"Unsupported options in XZ Stream Footer");
}
streamFlags.backwardSize = 0;
for (int i = 0; i < 4; ++i)
streamFlags.backwardSize |= (buf[i + 4] & 0xFF) << (i * 8);
streamFlags.backwardSize = (streamFlags.backwardSize + 1) * 4;
return streamFlags;
}
private static StreamFlags decodeStreamFlags(byte[] buf, int off)
throws UnsupportedOptionsException {
if (buf[off] != 0x00 || (buf[off + 1] & 0xFF) >= 0x10)
throw new UnsupportedOptionsException();
StreamFlags streamFlags = new StreamFlags();
streamFlags.checkType = buf[off + 1];
return streamFlags;
}
public static boolean areStreamFlagsEqual(StreamFlags a, StreamFlags b) {
// backwardSize is intentionally not compared.
return a.checkType == b.checkType;
}
public static long decodeVLI(InputStream in) throws IOException {
int b = in.read();
if (b == -1)
throw new EOFException();
long num = b & 0x7F;
int i = 0;
while ((b & 0x80) != 0x00) {
if (++i >= VLI_SIZE_MAX)
throw new CorruptedInputException();
b = in.read();
if (b == -1)
throw new EOFException();
if (b == 0x00)
throw new CorruptedInputException();
num |= (long)(b & 0x7F) << (i * 7);
}
return num;
}
}

View File

@@ -0,0 +1,36 @@
/*
* EncoderUtil
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.common;
import java.io.OutputStream;
import java.io.IOException;
import java.util.zip.CRC32;
public class EncoderUtil extends Util {
public static void writeCRC32(OutputStream out, byte[] buf)
throws IOException {
CRC32 crc32 = new CRC32();
crc32.update(buf);
long value = crc32.getValue();
for (int i = 0; i < 4; ++i)
out.write((byte)(value >>> (i * 8)));
}
public static void encodeVLI(OutputStream out, long num)
throws IOException {
while (num >= 0x80) {
out.write((byte)(num | 0x80));
num >>>= 7;
}
out.write((byte)num);
}
}

View File

@@ -0,0 +1,15 @@
/*
* StreamFlags
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.common;
public class StreamFlags {
public int checkType = -1;
public long backwardSize = -1;
}

View File

@@ -0,0 +1,28 @@
/*
* Util
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.common;
public class Util {
public static final int STREAM_HEADER_SIZE = 12;
public static final long BACKWARD_SIZE_MAX = 1L << 34;
public static final int BLOCK_HEADER_SIZE_MAX = 1024;
public static final long VLI_MAX = Long.MAX_VALUE;
public static final int VLI_SIZE_MAX = 9;
public static int getVLISize(long num) {
int size = 0;
do {
++size;
num >>= 7;
} while (num != 0);
return size;
}
}

View File

@@ -0,0 +1,27 @@
/*
* DeltaCoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.delta;
abstract class DeltaCoder {
static final int DISTANCE_MIN = 1;
static final int DISTANCE_MAX = 256;
static final int DISTANCE_MASK = DISTANCE_MAX - 1;
final int distance;
final byte[] history = new byte[DISTANCE_MAX];
int pos = 0;
DeltaCoder(int distance) {
if (distance < DISTANCE_MIN || distance > DISTANCE_MAX)
throw new IllegalArgumentException();
this.distance = distance;
}
}

View File

@@ -0,0 +1,24 @@
/*
* DeltaDecoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.delta;
public class DeltaDecoder extends DeltaCoder {
public DeltaDecoder(int distance) {
super(distance);
}
public void decode(byte[] buf, int off, int len) {
int end = off + len;
for (int i = off; i < end; ++i) {
buf[i] += history[(distance + pos) & DISTANCE_MASK];
history[pos-- & DISTANCE_MASK] = buf[i];
}
}
}

View File

@@ -0,0 +1,24 @@
/*
* DeltaEncoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.delta;
public class DeltaEncoder extends DeltaCoder {
public DeltaEncoder(int distance) {
super(distance);
}
public void encode(byte[] in, int in_off, int len, byte[] out) {
for (int i = 0; i < len; ++i) {
byte tmp = history[(distance + pos) & DISTANCE_MASK];
history[pos-- & DISTANCE_MASK] = in[in_off + i];
out[i] = (byte)(in[in_off + i] - tmp);
}
}
}

View File

@@ -0,0 +1,38 @@
/*
* BlockInfo
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.index;
import org.tukaani.xz.common.StreamFlags;
public class BlockInfo {
public int blockNumber = -1;
public long compressedOffset = -1;
public long uncompressedOffset = -1;
public long unpaddedSize = -1;
public long uncompressedSize = -1;
IndexDecoder index;
public BlockInfo(IndexDecoder indexOfFirstStream) {
index = indexOfFirstStream;
}
public int getCheckType() {
return index.getStreamFlags().checkType;
}
public boolean hasNext() {
return index.hasRecord(blockNumber + 1);
}
public void setNext() {
index.setBlockInfo(this, blockNumber + 1);
}
}

View File

@@ -0,0 +1,56 @@
/*
* IndexBase
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.index;
import org.tukaani.xz.common.Util;
import org.tukaani.xz.XZIOException;
abstract class IndexBase {
private final XZIOException invalidIndexException;
long blocksSum = 0;
long uncompressedSum = 0;
long indexListSize = 0;
long recordCount = 0;
IndexBase(XZIOException invalidIndexException) {
this.invalidIndexException = invalidIndexException;
}
private long getUnpaddedIndexSize() {
// Index Indicator + Number of Records + List of Records + CRC32
return 1 + Util.getVLISize(recordCount) + indexListSize + 4;
}
public long getIndexSize() {
return (getUnpaddedIndexSize() + 3) & ~3;
}
public long getStreamSize() {
return Util.STREAM_HEADER_SIZE + blocksSum + getIndexSize()
+ Util.STREAM_HEADER_SIZE;
}
int getIndexPaddingSize() {
return (int)((4 - getUnpaddedIndexSize()) & 3);
}
void add(long unpaddedSize, long uncompressedSize) throws XZIOException {
blocksSum += (unpaddedSize + 3) & ~3;
uncompressedSum += uncompressedSize;
indexListSize += Util.getVLISize(unpaddedSize)
+ Util.getVLISize(uncompressedSize);
++recordCount;
if (blocksSum < 0 || uncompressedSum < 0
|| getIndexSize() > Util.BACKWARD_SIZE_MAX
|| getStreamSize() < 0)
throw invalidIndexException;
}
}

View File

@@ -0,0 +1,223 @@
/*
* IndexDecoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.index;
import java.io.IOException;
import java.io.EOFException;
import java.util.zip.CheckedInputStream;
import org.tukaani.xz.common.DecoderUtil;
import org.tukaani.xz.common.StreamFlags;
import org.tukaani.xz.SeekableInputStream;
import org.tukaani.xz.CorruptedInputException;
import org.tukaani.xz.MemoryLimitException;
import org.tukaani.xz.UnsupportedOptionsException;
public class IndexDecoder extends IndexBase {
private final StreamFlags streamFlags;
private final long streamPadding;
private final int memoryUsage;
// Unpadded Size and Uncompressed Size fields
private final long[] unpadded;
private final long[] uncompressed;
// Uncompressed size of the largest Block. It is used by
// SeekableXZInputStream to find out the largest Block of the .xz file.
private long largestBlockSize = 0;
// Offsets relative to the beginning of the .xz file. These are all zero
// for the first Stream in the file.
private int recordOffset = 0;
private long compressedOffset = 0;
private long uncompressedOffset = 0;
public IndexDecoder(SeekableInputStream in, StreamFlags streamFooterFlags,
long streamPadding, int memoryLimit)
throws IOException {
super(new CorruptedInputException("XZ Index is corrupt"));
this.streamFlags = streamFooterFlags;
this.streamPadding = streamPadding;
// If endPos is exceeded before the CRC32 field has been decoded,
// the Index is corrupt.
long endPos = in.position() + streamFooterFlags.backwardSize - 4;
java.util.zip.CRC32 crc32 = new java.util.zip.CRC32();
CheckedInputStream inChecked = new CheckedInputStream(in, crc32);
// Index Indicator
if (inChecked.read() != 0x00)
throw new CorruptedInputException("XZ Index is corrupt");
try {
// Number of Records
long count = DecoderUtil.decodeVLI(inChecked);
// Catch Record counts that obviously too high to be valid.
// This test isn't exact because it ignores Index Indicator,
// Number of Records, and CRC32 fields, but this is good enough
// to catch the most obvious problems.
if (count >= streamFooterFlags.backwardSize / 2)
throw new CorruptedInputException("XZ Index is corrupt");
// If the Record count doesn't fit into an int, we cannot
// allocate the arrays to hold the Records.
if (count > Integer.MAX_VALUE)
throw new UnsupportedOptionsException("XZ Index has over "
+ Integer.MAX_VALUE + " Records");
// Calculate approximate memory requirements and check the
// memory usage limit.
memoryUsage = 1 + (int)((16L * count + 1023) / 1024);
if (memoryLimit >= 0 && memoryUsage > memoryLimit)
throw new MemoryLimitException(memoryUsage, memoryLimit);
// Allocate the arrays for the Records.
unpadded = new long[(int)count];
uncompressed = new long[(int)count];
int record = 0;
// Decode the Records.
for (int i = (int)count; i > 0; --i) {
// Get the next Record.
long unpaddedSize = DecoderUtil.decodeVLI(inChecked);
long uncompressedSize = DecoderUtil.decodeVLI(inChecked);
// Check that the input position stays sane. Since this is
// checked only once per loop iteration instead of for
// every input byte read, it's still possible that
// EOFException gets thrown with corrupt input.
if (in.position() > endPos)
throw new CorruptedInputException("XZ Index is corrupt");
// Add the new Record.
unpadded[record] = blocksSum + unpaddedSize;
uncompressed[record] = uncompressedSum + uncompressedSize;
++record;
super.add(unpaddedSize, uncompressedSize);
assert record == recordCount;
// Remember the uncompressed size of the largest Block.
if (largestBlockSize < uncompressedSize)
largestBlockSize = uncompressedSize;
}
} catch (EOFException e) {
// EOFException is caught just in case a corrupt input causes
// DecoderUtil.decodeVLI to read too much at once.
throw new CorruptedInputException("XZ Index is corrupt");
}
// Validate that the size of the Index field matches
// Backward Size.
int indexPaddingSize = getIndexPaddingSize();
if (in.position() + indexPaddingSize != endPos)
throw new CorruptedInputException("XZ Index is corrupt");
// Index Padding
while (indexPaddingSize-- > 0)
if (inChecked.read() != 0x00)
throw new CorruptedInputException("XZ Index is corrupt");
// CRC32
long value = crc32.getValue();
for (int i = 0; i < 4; ++i)
if (((value >>> (i * 8)) & 0xFF) != in.read())
throw new CorruptedInputException("XZ Index is corrupt");
}
public void setOffsets(IndexDecoder prev) {
// NOTE: SeekableXZInputStream checks that the total number of Blocks
// in concatenated Streams fits into an int.
recordOffset = prev.recordOffset + (int)prev.recordCount;
compressedOffset = prev.compressedOffset
+ prev.getStreamSize() + prev.streamPadding;
assert (compressedOffset & 3) == 0;
uncompressedOffset = prev.uncompressedOffset + prev.uncompressedSum;
}
public int getMemoryUsage() {
return memoryUsage;
}
public StreamFlags getStreamFlags() {
return streamFlags;
}
public int getRecordCount() {
// It was already checked in the constructor that it fits into an int.
// Otherwise we couldn't have allocated the arrays.
return (int)recordCount;
}
public long getUncompressedSize() {
return uncompressedSum;
}
public long getLargestBlockSize() {
return largestBlockSize;
}
public boolean hasUncompressedOffset(long pos) {
return pos >= uncompressedOffset
&& pos < uncompressedOffset + uncompressedSum;
}
public boolean hasRecord(int blockNumber) {
return blockNumber >= recordOffset
&& blockNumber < recordOffset + recordCount;
}
public void locateBlock(BlockInfo info, long target) {
assert target >= uncompressedOffset;
target -= uncompressedOffset;
assert target < uncompressedSum;
int left = 0;
int right = unpadded.length - 1;
while (left < right) {
int i = left + (right - left) / 2;
if (uncompressed[i] <= target)
left = i + 1;
else
right = i;
}
setBlockInfo(info, recordOffset + left);
}
public void setBlockInfo(BlockInfo info, int blockNumber) {
// The caller has checked that the given Block number is inside
// this Index.
assert blockNumber >= recordOffset;
assert blockNumber - recordOffset < recordCount;
info.index = this;
info.blockNumber = blockNumber;
int pos = blockNumber - recordOffset;
if (pos == 0) {
info.compressedOffset = 0;
info.uncompressedOffset = 0;
} else {
info.compressedOffset = (unpadded[pos - 1] + 3) & ~3;
info.uncompressedOffset = uncompressed[pos - 1];
}
info.unpaddedSize = unpadded[pos] - info.compressedOffset;
info.uncompressedSize = uncompressed[pos] - info.uncompressedOffset;
info.compressedOffset += compressedOffset
+ DecoderUtil.STREAM_HEADER_SIZE;
info.uncompressedOffset += uncompressedOffset;
}
}

View File

@@ -0,0 +1,59 @@
/*
* IndexEncoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.index;
import java.io.OutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.zip.CheckedOutputStream;
import org.tukaani.xz.common.EncoderUtil;
import org.tukaani.xz.XZIOException;
public class IndexEncoder extends IndexBase {
private final ArrayList records = new ArrayList();
public IndexEncoder() {
super(new XZIOException("XZ Stream or its Index has grown too big"));
}
public void add(long unpaddedSize, long uncompressedSize)
throws XZIOException {
super.add(unpaddedSize, uncompressedSize);
records.add(new IndexRecord(unpaddedSize, uncompressedSize));
}
public void encode(OutputStream out) throws IOException {
java.util.zip.CRC32 crc32 = new java.util.zip.CRC32();
CheckedOutputStream outChecked = new CheckedOutputStream(out, crc32);
// Index Indicator
outChecked.write(0x00);
// Number of Records
EncoderUtil.encodeVLI(outChecked, recordCount);
// List of Records
for (Iterator i = records.iterator(); i.hasNext(); ) {
IndexRecord record = (IndexRecord)i.next();
EncoderUtil.encodeVLI(outChecked, record.unpadded);
EncoderUtil.encodeVLI(outChecked, record.uncompressed);
}
// Index Padding
for (int i = getIndexPaddingSize(); i > 0; --i)
outChecked.write(0x00);
// CRC32
long value = crc32.getValue();
for (int i = 0; i < 4; ++i)
out.write((byte)(value >>> (i * 8)));
}
}

View File

@@ -0,0 +1,94 @@
/*
* IndexHash
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.index;
import java.io.InputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.zip.CheckedInputStream;
import org.tukaani.xz.common.DecoderUtil;
import org.tukaani.xz.XZIOException;
import org.tukaani.xz.CorruptedInputException;
public class IndexHash extends IndexBase {
private org.tukaani.xz.check.Check hash;
public IndexHash() {
super(new CorruptedInputException());
try {
hash = new org.tukaani.xz.check.SHA256();
} catch (java.security.NoSuchAlgorithmException e) {
hash = new org.tukaani.xz.check.CRC32();
}
}
public void add(long unpaddedSize, long uncompressedSize)
throws XZIOException {
super.add(unpaddedSize, uncompressedSize);
ByteBuffer buf = ByteBuffer.allocate(2 * 8);
buf.putLong(unpaddedSize);
buf.putLong(uncompressedSize);
hash.update(buf.array());
}
public void validate(InputStream in) throws IOException {
// Index Indicator (0x00) has already been read by BlockInputStream
// so add 0x00 to the CRC32 here.
java.util.zip.CRC32 crc32 = new java.util.zip.CRC32();
crc32.update('\0');
CheckedInputStream inChecked = new CheckedInputStream(in, crc32);
// Get and validate the Number of Records field.
long storedRecordCount = DecoderUtil.decodeVLI(inChecked);
if (storedRecordCount != recordCount)
throw new CorruptedInputException("XZ Index is corrupt");
// Decode and hash the Index field and compare it to
// the hash value calculated from the decoded Blocks.
IndexHash stored = new IndexHash();
for (long i = 0; i < recordCount; ++i) {
long unpaddedSize = DecoderUtil.decodeVLI(inChecked);
long uncompressedSize = DecoderUtil.decodeVLI(inChecked);
try {
stored.add(unpaddedSize, uncompressedSize);
} catch (XZIOException e) {
throw new CorruptedInputException("XZ Index is corrupt");
}
if (stored.blocksSum > blocksSum
|| stored.uncompressedSum > uncompressedSum
|| stored.indexListSize > indexListSize)
throw new CorruptedInputException("XZ Index is corrupt");
}
if (stored.blocksSum != blocksSum
|| stored.uncompressedSum != uncompressedSum
|| stored.indexListSize != indexListSize
|| !Arrays.equals(stored.hash.finish(), hash.finish()))
throw new CorruptedInputException("XZ Index is corrupt");
// Index Padding
DataInputStream inData = new DataInputStream(inChecked);
for (int i = getIndexPaddingSize(); i > 0; --i)
if (inData.readUnsignedByte() != 0x00)
throw new CorruptedInputException("XZ Index is corrupt");
// CRC32
long value = crc32.getValue();
for (int i = 0; i < 4; ++i)
if (((value >>> (i * 8)) & 0xFF) != inData.readUnsignedByte())
throw new CorruptedInputException("XZ Index is corrupt");
}
}

View File

@@ -0,0 +1,20 @@
/*
* IndexRecord
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.index;
class IndexRecord {
final long unpadded;
final long uncompressed;
IndexRecord(long unpadded, long uncompressed) {
this.unpadded = unpadded;
this.uncompressed = uncompressed;
}
}

View File

@@ -0,0 +1,255 @@
/*
* Binary Tree match finder with 2-, 3-, and 4-byte hashing
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.lz;
final class BT4 extends LZEncoder {
private final Hash234 hash;
private final int[] tree;
private final Matches matches;
private final int depthLimit;
private final int cyclicSize;
private int cyclicPos = -1;
private int lzPos;
static int getMemoryUsage(int dictSize) {
return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 8) + 10;
}
BT4(int dictSize, int beforeSizeMin, int readAheadMax,
int niceLen, int matchLenMax, int depthLimit) {
super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax);
cyclicSize = dictSize + 1;
lzPos = cyclicSize;
hash = new Hash234(dictSize);
tree = new int[cyclicSize * 2];
// Substracting 1 because the shortest match that this match
// finder can find is 2 bytes, so there's no need to reserve
// space for one-byte matches.
matches = new Matches(niceLen - 1);
this.depthLimit = depthLimit > 0 ? depthLimit : 16 + niceLen / 2;
}
private int movePos() {
int avail = movePos(niceLen, 4);
if (avail != 0) {
if (++lzPos == Integer.MAX_VALUE) {
int normalizationOffset = Integer.MAX_VALUE - cyclicSize;
hash.normalize(normalizationOffset);
normalize(tree, normalizationOffset);
lzPos -= normalizationOffset;
}
if (++cyclicPos == cyclicSize)
cyclicPos = 0;
}
return avail;
}
public Matches getMatches() {
matches.count = 0;
int matchLenLimit = matchLenMax;
int niceLenLimit = niceLen;
int avail = movePos();
if (avail < matchLenLimit) {
if (avail == 0)
return matches;
matchLenLimit = avail;
if (niceLenLimit > avail)
niceLenLimit = avail;
}
hash.calcHashes(buf, readPos);
int delta2 = lzPos - hash.getHash2Pos();
int delta3 = lzPos - hash.getHash3Pos();
int currentMatch = hash.getHash4Pos();
hash.updateTables(lzPos);
int lenBest = 0;
// See if the hash from the first two bytes found a match.
// The hashing algorithm guarantees that if the first byte
// matches, also the second byte does, so there's no need to
// test the second byte.
if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) {
lenBest = 2;
matches.len[0] = 2;
matches.dist[0] = delta2 - 1;
matches.count = 1;
}
// See if the hash from the first three bytes found a match that
// is different from the match possibly found by the two-byte hash.
// Also here the hashing algorithm guarantees that if the first byte
// matches, also the next two bytes do.
if (delta2 != delta3 && delta3 < cyclicSize
&& buf[readPos - delta3] == buf[readPos]) {
lenBest = 3;
matches.dist[matches.count++] = delta3 - 1;
delta2 = delta3;
}
// If a match was found, see how long it is.
if (matches.count > 0) {
while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
== buf[readPos + lenBest])
++lenBest;
matches.len[matches.count - 1] = lenBest;
// Return if it is long enough (niceLen or reached the end of
// the dictionary).
if (lenBest >= niceLenLimit) {
skip(niceLenLimit, currentMatch);
return matches;
}
}
// Long enough match wasn't found so easily. Look for better matches
// from the binary tree.
if (lenBest < 3)
lenBest = 3;
int depth = depthLimit;
int ptr0 = (cyclicPos << 1) + 1;
int ptr1 = cyclicPos << 1;
int len0 = 0;
int len1 = 0;
while (true) {
int delta = lzPos - currentMatch;
// Return if the search depth limit has been reached or
// if the distance of the potential match exceeds the
// dictionary size.
if (depth-- == 0 || delta >= cyclicSize) {
tree[ptr0] = 0;
tree[ptr1] = 0;
return matches;
}
int pair = (cyclicPos - delta
+ (delta > cyclicPos ? cyclicSize : 0)) << 1;
int len = Math.min(len0, len1);
if (buf[readPos + len - delta] == buf[readPos + len]) {
while (++len < matchLenLimit)
if (buf[readPos + len - delta] != buf[readPos + len])
break;
if (len > lenBest) {
lenBest = len;
matches.len[matches.count] = len;
matches.dist[matches.count] = delta - 1;
++matches.count;
if (len >= niceLenLimit) {
tree[ptr1] = tree[pair];
tree[ptr0] = tree[pair + 1];
return matches;
}
}
}
if ((buf[readPos + len - delta] & 0xFF)
< (buf[readPos + len] & 0xFF)) {
tree[ptr1] = currentMatch;
ptr1 = pair + 1;
currentMatch = tree[ptr1];
len1 = len;
} else {
tree[ptr0] = currentMatch;
ptr0 = pair;
currentMatch = tree[ptr0];
len0 = len;
}
}
}
private void skip(int niceLenLimit, int currentMatch) {
int depth = depthLimit;
int ptr0 = (cyclicPos << 1) + 1;
int ptr1 = cyclicPos << 1;
int len0 = 0;
int len1 = 0;
while (true) {
int delta = lzPos - currentMatch;
if (depth-- == 0 || delta >= cyclicSize) {
tree[ptr0] = 0;
tree[ptr1] = 0;
return;
}
int pair = (cyclicPos - delta
+ (delta > cyclicPos ? cyclicSize : 0)) << 1;
int len = Math.min(len0, len1);
if (buf[readPos + len - delta] == buf[readPos + len]) {
// No need to look for longer matches than niceLenLimit
// because we only are updating the tree, not returning
// matches found to the caller.
do {
if (++len == niceLenLimit) {
tree[ptr1] = tree[pair];
tree[ptr0] = tree[pair + 1];
return;
}
} while (buf[readPos + len - delta] == buf[readPos + len]);
}
if ((buf[readPos + len - delta] & 0xFF)
< (buf[readPos + len] & 0xFF)) {
tree[ptr1] = currentMatch;
ptr1 = pair + 1;
currentMatch = tree[ptr1];
len1 = len;
} else {
tree[ptr0] = currentMatch;
ptr0 = pair;
currentMatch = tree[ptr0];
len0 = len;
}
}
}
public void skip(int len) {
while (len-- > 0) {
int niceLenLimit = niceLen;
int avail = movePos();
if (avail < niceLenLimit) {
if (avail == 0)
continue;
niceLenLimit = avail;
}
hash.calcHashes(buf, readPos);
int currentMatch = hash.getHash4Pos();
hash.updateTables(lzPos);
skip(niceLenLimit, currentMatch);
}
}
}

View File

@@ -0,0 +1,35 @@
/*
* CRC32Hash
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.lz;
/**
* Provides a CRC32 table using the polynomial from IEEE 802.3.
*/
class CRC32Hash {
private static final int CRC32_POLY = 0xEDB88320;
static final int[] crcTable = new int[256];
static {
for (int i = 0; i < 256; ++i) {
int r = i;
for (int j = 0; j < 8; ++j) {
if ((r & 1) != 0)
r = (r >>> 1) ^ CRC32_POLY;
else
r >>>= 1;
}
crcTable[i] = r;
}
}
}

View File

@@ -0,0 +1,200 @@
/*
* Hash Chain match finder with 2-, 3-, and 4-byte hashing
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.lz;
final class HC4 extends LZEncoder {
private final Hash234 hash;
private final int[] chain;
private final Matches matches;
private final int depthLimit;
private final int cyclicSize;
private int cyclicPos = -1;
private int lzPos;
/**
* Gets approximate memory usage of the match finder as kibibytes.
*/
static int getMemoryUsage(int dictSize) {
return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 4) + 10;
}
/**
* Creates a new LZEncoder with the HC4 match finder.
* See <code>LZEncoder.getInstance</code> for parameter descriptions.
*/
HC4(int dictSize, int beforeSizeMin, int readAheadMax,
int niceLen, int matchLenMax, int depthLimit) {
super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax);
hash = new Hash234(dictSize);
// +1 because we need dictSize bytes of history + the current byte.
cyclicSize = dictSize + 1;
chain = new int[cyclicSize];
lzPos = cyclicSize;
// Substracting 1 because the shortest match that this match
// finder can find is 2 bytes, so there's no need to reserve
// space for one-byte matches.
matches = new Matches(niceLen - 1);
// Use a default depth limit if no other value was specified.
// The default is just something based on experimentation;
// it's nothing magic.
this.depthLimit = (depthLimit > 0) ? depthLimit : 4 + niceLen / 4;
}
/**
* Moves to the next byte, checks that there is enough available space,
* and possibly normalizes the hash tables and the hash chain.
*
* @return number of bytes available, including the current byte
*/
private int movePos() {
int avail = movePos(4, 4);
if (avail != 0) {
if (++lzPos == Integer.MAX_VALUE) {
int normalizationOffset = Integer.MAX_VALUE - cyclicSize;
hash.normalize(normalizationOffset);
normalize(chain, normalizationOffset);
lzPos -= normalizationOffset;
}
if (++cyclicPos == cyclicSize)
cyclicPos = 0;
}
return avail;
}
public Matches getMatches() {
matches.count = 0;
int matchLenLimit = matchLenMax;
int niceLenLimit = niceLen;
int avail = movePos();
if (avail < matchLenLimit) {
if (avail == 0)
return matches;
matchLenLimit = avail;
if (niceLenLimit > avail)
niceLenLimit = avail;
}
hash.calcHashes(buf, readPos);
int delta2 = lzPos - hash.getHash2Pos();
int delta3 = lzPos - hash.getHash3Pos();
int currentMatch = hash.getHash4Pos();
hash.updateTables(lzPos);
chain[cyclicPos] = currentMatch;
int lenBest = 0;
// See if the hash from the first two bytes found a match.
// The hashing algorithm guarantees that if the first byte
// matches, also the second byte does, so there's no need to
// test the second byte.
if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) {
lenBest = 2;
matches.len[0] = 2;
matches.dist[0] = delta2 - 1;
matches.count = 1;
}
// See if the hash from the first three bytes found a match that
// is different from the match possibly found by the two-byte hash.
// Also here the hashing algorithm guarantees that if the first byte
// matches, also the next two bytes do.
if (delta2 != delta3 && delta3 < cyclicSize
&& buf[readPos - delta3] == buf[readPos]) {
lenBest = 3;
matches.dist[matches.count++] = delta3 - 1;
delta2 = delta3;
}
// If a match was found, see how long it is.
if (matches.count > 0) {
while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
== buf[readPos + lenBest])
++lenBest;
matches.len[matches.count - 1] = lenBest;
// Return if it is long enough (niceLen or reached the end of
// the dictionary).
if (lenBest >= niceLenLimit)
return matches;
}
// Long enough match wasn't found so easily. Look for better matches
// from the hash chain.
if (lenBest < 3)
lenBest = 3;
int depth = depthLimit;
while (true) {
int delta = lzPos - currentMatch;
// Return if the search depth limit has been reached or
// if the distance of the potential match exceeds the
// dictionary size.
if (depth-- == 0 || delta >= cyclicSize)
return matches;
currentMatch = chain[cyclicPos - delta
+ (delta > cyclicPos ? cyclicSize : 0)];
// Test the first byte and the first new byte that would give us
// a match that is at least one byte longer than lenBest. This
// too short matches get quickly skipped.
if (buf[readPos + lenBest - delta] == buf[readPos + lenBest]
&& buf[readPos - delta] == buf[readPos]) {
// Calculate the length of the match.
int len = 0;
while (++len < matchLenLimit)
if (buf[readPos + len - delta] != buf[readPos + len])
break;
// Use the match if and only if it is better than the longest
// match found so far.
if (len > lenBest) {
lenBest = len;
matches.len[matches.count] = len;
matches.dist[matches.count] = delta - 1;
++matches.count;
// Return if it is long enough (niceLen or reached the
// end of the dictionary).
if (len >= niceLenLimit)
return matches;
}
}
}
}
public void skip(int len) {
assert len >= 0;
while (len-- > 0) {
if (movePos() != 0) {
// Update the hash chain and hash tables.
hash.calcHashes(buf, readPos);
chain[cyclicPos] = hash.getHash4Pos();
hash.updateTables(lzPos);
}
}
}
}

View File

@@ -0,0 +1,89 @@
/*
* 2-, 3-, and 4-byte hashing
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.lz;
final class Hash234 extends CRC32Hash {
private static final int HASH_2_SIZE = 1 << 10;
private static final int HASH_2_MASK = HASH_2_SIZE - 1;
private static final int HASH_3_SIZE = 1 << 16;
private static final int HASH_3_MASK = HASH_3_SIZE - 1;
private final int hash4Mask;
private final int[] hash2Table = new int[HASH_2_SIZE];
private final int[] hash3Table = new int[HASH_3_SIZE];
private final int[] hash4Table;
private int hash2Value = 0;
private int hash3Value = 0;
private int hash4Value = 0;
static int getHash4Size(int dictSize) {
int h = dictSize - 1;
h |= h >>> 1;
h |= h >>> 2;
h |= h >>> 4;
h |= h >>> 8;
h >>>= 1;
h |= 0xFFFF;
if (h > (1 << 24))
h >>>= 1;
return h + 1;
}
static int getMemoryUsage(int dictSize) {
// Sizes of the hash arrays + a little extra
return (HASH_2_SIZE + HASH_3_SIZE + getHash4Size(dictSize))
/ (1024 / 4) + 4;
}
Hash234(int dictSize) {
hash4Table = new int[getHash4Size(dictSize)];
hash4Mask = hash4Table.length - 1;
}
void calcHashes(byte[] buf, int off) {
int temp = crcTable[buf[off] & 0xFF] ^ (buf[off + 1] & 0xFF);
hash2Value = temp & HASH_2_MASK;
temp ^= (buf[off + 2] & 0xFF) << 8;
hash3Value = temp & HASH_3_MASK;
temp ^= crcTable[buf[off + 3] & 0xFF] << 5;
hash4Value = temp & hash4Mask;
}
int getHash2Pos() {
return hash2Table[hash2Value];
}
int getHash3Pos() {
return hash3Table[hash3Value];
}
int getHash4Pos() {
return hash4Table[hash4Value];
}
void updateTables(int pos) {
hash2Table[hash2Value] = pos;
hash3Table[hash3Value] = pos;
hash4Table[hash4Value] = pos;
}
void normalize(int normalizeOffset) {
LZEncoder.normalize(hash2Table, normalizeOffset);
LZEncoder.normalize(hash3Table, normalizeOffset);
LZEncoder.normalize(hash4Table, normalizeOffset);
}
}

View File

@@ -0,0 +1,126 @@
/*
* LZDecoder
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.lz;
import java.io.DataInputStream;
import java.io.IOException;
import org.tukaani.xz.CorruptedInputException;
public final class LZDecoder {
private final byte[] buf;
private int start = 0;
private int pos = 0;
private int full = 0;
private int limit = 0;
private int pendingLen = 0;
private int pendingDist = 0;
public LZDecoder(int dictSize, byte[] presetDict) {
buf = new byte[dictSize];
if (presetDict != null) {
pos = Math.min(presetDict.length, dictSize);
full = pos;
start = pos;
System.arraycopy(presetDict, presetDict.length - pos, buf, 0, pos);
}
}
public void reset() {
start = 0;
pos = 0;
full = 0;
limit = 0;
buf[buf.length - 1] = 0x00;
}
public void setLimit(int outMax) {
if (buf.length - pos <= outMax)
limit = buf.length;
else
limit = pos + outMax;
}
public boolean hasSpace() {
return pos < limit;
}
public boolean hasPending() {
return pendingLen > 0;
}
public int getPos() {
return pos;
}
public int getByte(int dist) {
int offset = pos - dist - 1;
if (dist >= pos)
offset += buf.length;
return buf[offset] & 0xFF;
}
public void putByte(byte b) {
buf[pos++] = b;
if (full < pos)
full = pos;
}
public void repeat(int dist, int len) throws IOException {
if (dist < 0 || dist >= full)
throw new CorruptedInputException();
int left = Math.min(limit - pos, len);
pendingLen = len - left;
pendingDist = dist;
int back = pos - dist - 1;
if (dist >= pos)
back += buf.length;
do {
buf[pos++] = buf[back++];
if (back == buf.length)
back = 0;
} while (--left > 0);
if (full < pos)
full = pos;
}
public void repeatPending() throws IOException {
if (pendingLen > 0)
repeat(pendingDist, pendingLen);
}
public void copyUncompressed(DataInputStream inData, int len)
throws IOException {
int copySize = Math.min(buf.length - pos, len);
inData.readFully(buf, pos, copySize);
pos += copySize;
if (full < pos)
full = pos;
}
public int flush(byte[] out, int outOff) {
int copySize = pos - start;
if (pos == buf.length)
pos = 0;
System.arraycopy(buf, start, out, outOff, copySize);
start = pos;
return copySize;
}
}

View File

@@ -0,0 +1,419 @@
/*
* LZEncoder
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.lz;
import java.io.OutputStream;
import java.io.IOException;
public abstract class LZEncoder {
public static final int MF_HC4 = 0x04;
public static final int MF_BT4 = 0x14;
/**
* Number of bytes to keep available before the current byte
* when moving the LZ window.
*/
private final int keepSizeBefore;
/**
* Number of bytes that must be available, the current byte included,
* to make hasEnoughData return true. Flushing and finishing are
* naturally exceptions to this since there cannot be any data after
* the end of the uncompressed input.
*/
private final int keepSizeAfter;
final int matchLenMax;
final int niceLen;
final byte[] buf;
int readPos = -1;
private int readLimit = -1;
private boolean finishing = false;
private int writePos = 0;
private int pendingSize = 0;
static void normalize(int[] positions, int normalizationOffset) {
for (int i = 0; i < positions.length; ++i) {
if (positions[i] <= normalizationOffset)
positions[i] = 0;
else
positions[i] -= normalizationOffset;
}
}
/**
* Gets the size of the LZ window buffer that needs to be allocated.
*/
private static int getBufSize(
int dictSize, int extraSizeBefore, int extraSizeAfter,
int matchLenMax) {
int keepSizeBefore = extraSizeBefore + dictSize;
int keepSizeAfter = extraSizeAfter + matchLenMax;
int reserveSize = Math.min(dictSize / 2 + (256 << 10), 512 << 20);
return keepSizeBefore + keepSizeAfter + reserveSize;
}
/**
* Gets approximate memory usage of the LZEncoder base structure and
* the match finder as kibibytes.
*/
public static int getMemoryUsage(
int dictSize, int extraSizeBefore, int extraSizeAfter,
int matchLenMax, int mf) {
// Buffer size + a little extra
int m = getBufSize(dictSize, extraSizeBefore, extraSizeAfter,
matchLenMax) / 1024 + 10;
switch (mf) {
case MF_HC4:
m += HC4.getMemoryUsage(dictSize);
break;
case MF_BT4:
m += BT4.getMemoryUsage(dictSize);
break;
default:
throw new IllegalArgumentException();
}
return m;
}
/**
* Creates a new LZEncoder.
* <p>
* @param dictSize dictionary size
*
* @param extraSizeBefore
* number of bytes to keep available in the
* history in addition to dictSize
*
* @param extraSizeAfter
* number of bytes that must be available
* after current position + matchLenMax
*
* @param niceLen if a match of at least <code>niceLen</code>
* bytes is found, be happy with it and don't
* stop looking for longer matches
*
* @param matchLenMax don't test for matches longer than
* <code>matchLenMax</code> bytes
*
* @param mf match finder ID
*
* @param depthLimit match finder search depth limit
*/
public static LZEncoder getInstance(
int dictSize, int extraSizeBefore, int extraSizeAfter,
int niceLen, int matchLenMax, int mf, int depthLimit) {
switch (mf) {
case MF_HC4:
return new HC4(dictSize, extraSizeBefore, extraSizeAfter,
niceLen, matchLenMax, depthLimit);
case MF_BT4:
return new BT4(dictSize, extraSizeBefore, extraSizeAfter,
niceLen, matchLenMax, depthLimit);
}
throw new IllegalArgumentException();
}
/**
* Creates a new LZEncoder. See <code>getInstance</code>.
*/
LZEncoder(int dictSize, int extraSizeBefore, int extraSizeAfter,
int niceLen, int matchLenMax) {
buf = new byte[getBufSize(dictSize, extraSizeBefore, extraSizeAfter,
matchLenMax)];
keepSizeBefore = extraSizeBefore + dictSize;
keepSizeAfter = extraSizeAfter + matchLenMax;
this.matchLenMax = matchLenMax;
this.niceLen = niceLen;
}
/**
* Sets a preset dictionary. If a preset dictionary is wanted, this
* function must be called immediately after creating the LZEncoder
* before any data has been encoded.
*/
public void setPresetDict(int dictSize, byte[] presetDict) {
assert !isStarted();
assert writePos == 0;
if (presetDict != null) {
// If the preset dictionary buffer is bigger than the dictionary
// size, copy only the tail of the preset dictionary.
int copySize = Math.min(presetDict.length, dictSize);
int offset = presetDict.length - copySize;
System.arraycopy(presetDict, offset, buf, 0, copySize);
writePos += copySize;
skip(copySize);
}
}
/**
* Moves data from the end of the buffer to the beginning, discarding
* old data and making space for new input.
*/
private void moveWindow() {
// Align the move to a multiple of 16 bytes. LZMA2 needs this
// because it uses the lowest bits from readPos to get the
// alignment of the uncompressed data.
int moveOffset = (readPos + 1 - keepSizeBefore) & ~15;
int moveSize = writePos - moveOffset;
System.arraycopy(buf, moveOffset, buf, 0, moveSize);
readPos -= moveOffset;
readLimit -= moveOffset;
writePos -= moveOffset;
}
/**
* Copies new data into the LZEncoder's buffer.
*/
public int fillWindow(byte[] in, int off, int len) {
assert !finishing;
// Move the sliding window if needed.
if (readPos >= buf.length - keepSizeAfter)
moveWindow();
// Try to fill the dictionary buffer. If it becomes full,
// some of the input bytes may be left unused.
if (len > buf.length - writePos)
len = buf.length - writePos;
System.arraycopy(in, off, buf, writePos, len);
writePos += len;
// Set the new readLimit but only if there's enough data to allow
// encoding of at least one more byte.
if (writePos >= keepSizeAfter)
readLimit = writePos - keepSizeAfter;
processPendingBytes();
// Tell the caller how much input we actually copied into
// the dictionary.
return len;
}
/**
* Process pending bytes remaining from preset dictionary initialization
* or encoder flush operation.
*/
private void processPendingBytes() {
// After flushing or setting a preset dictionary there will be
// pending data that hasn't been ran through the match finder yet.
// Run it through the match finder now if there is enough new data
// available (readPos < readLimit) that the encoder may encode at
// least one more input byte. This way we don't waste any time
// looping in the match finder (and marking the same bytes as
// pending again) if the application provides very little new data
// per write call.
if (pendingSize > 0 && readPos < readLimit) {
readPos -= pendingSize;
int oldPendingSize = pendingSize;
pendingSize = 0;
skip(oldPendingSize);
assert pendingSize < oldPendingSize;
}
}
/**
* Returns true if at least one byte has already been run through
* the match finder.
*/
public boolean isStarted() {
return readPos != -1;
}
/**
* Marks that all the input needs to be made available in
* the encoded output.
*/
public void setFlushing() {
readLimit = writePos - 1;
processPendingBytes();
}
/**
* Marks that there is no more input remaining. The read position
* can be advanced until the end of the data.
*/
public void setFinishing() {
readLimit = writePos - 1;
finishing = true;
processPendingBytes();
}
/**
* Tests if there is enough input available to let the caller encode
* at least one more byte.
*/
public boolean hasEnoughData(int alreadyReadLen) {
return readPos - alreadyReadLen < readLimit;
}
public void copyUncompressed(OutputStream out, int backward, int len)
throws IOException {
out.write(buf, readPos + 1 - backward, len);
}
/**
* Get the number of bytes available, including the current byte.
* <p>
* Note that the result is undefined if <code>getMatches</code> or
* <code>skip</code> hasn't been called yet and no preset dictionary
* is being used.
*/
public int getAvail() {
assert isStarted();
return writePos - readPos;
}
/**
* Gets the lowest four bits of the absolute offset of the current byte.
* Bits other than the lowest four are undefined.
*/
public int getPos() {
return readPos;
}
/**
* Gets the byte from the given backward offset.
* <p>
* The current byte is at <code>0</code>, the previous byte
* at <code>1</code> etc. To get a byte at zero-based distance,
* use <code>getByte(dist + 1)<code>.
* <p>
* This function is equivalent to <code>getByte(0, backward)</code>.
*/
public int getByte(int backward) {
return buf[readPos - backward] & 0xFF;
}
/**
* Gets the byte from the given forward minus backward offset.
* The forward offset is added to the current position. This lets
* one read bytes ahead of the current byte.
*/
public int getByte(int forward, int backward) {
return buf[readPos + forward - backward] & 0xFF;
}
/**
* Get the length of a match at the given distance.
*
* @param dist zero-based distance of the match to test
* @param lenLimit don't test for a match longer than this
*
* @return length of the match; it is in the range [0, lenLimit]
*/
public int getMatchLen(int dist, int lenLimit) {
int backPos = readPos - dist - 1;
int len = 0;
while (len < lenLimit && buf[readPos + len] == buf[backPos + len])
++len;
return len;
}
/**
* Get the length of a match at the given distance and forward offset.
*
* @param forward forward offset
* @param dist zero-based distance of the match to test
* @param lenLimit don't test for a match longer than this
*
* @return length of the match; it is in the range [0, lenLimit]
*/
public int getMatchLen(int forward, int dist, int lenLimit) {
int curPos = readPos + forward;
int backPos = curPos - dist - 1;
int len = 0;
while (len < lenLimit && buf[curPos + len] == buf[backPos + len])
++len;
return len;
}
/**
* Verifies that the matches returned by the match finder are valid.
* This is meant to be used in an assert statement. This is totally
* useless for actual encoding since match finder's results should
* naturally always be valid if it isn't broken.
*
* @param matches return value from <code>getMatches</code>
*
* @return true if matches are valid, false if match finder is broken
*/
public boolean verifyMatches(Matches matches) {
int lenLimit = Math.min(getAvail(), matchLenMax);
for (int i = 0; i < matches.count; ++i)
if (getMatchLen(matches.dist[i], lenLimit) != matches.len[i])
return false;
return true;
}
/**
* Moves to the next byte, checks if there is enough input available,
* and returns the amount of input available.
*
* @param requiredForFlushing
* minimum number of available bytes when
* flushing; encoding may be continued with
* new input after flushing
* @param requiredForFinishing
* minimum number of available bytes when
* finishing; encoding must not be continued
* after finishing or the match finder state
* may be corrupt
*
* @return the number of bytes available or zero if there
* is not enough input available
*/
int movePos(int requiredForFlushing, int requiredForFinishing) {
assert requiredForFlushing >= requiredForFinishing;
++readPos;
int avail = writePos - readPos;
if (avail < requiredForFlushing) {
if (avail < requiredForFinishing || !finishing) {
++pendingSize;
avail = 0;
}
}
return avail;
}
/**
* Runs match finder for the next byte and returns the matches found.
*/
public abstract Matches getMatches();
/**
* Skips the given number of bytes in the match finder.
*/
public abstract void skip(int len);
}

View File

@@ -0,0 +1,22 @@
/*
* Matches
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.lz;
public final class Matches {
public final int[] len;
public final int[] dist;
public int count = 0;
Matches(int countMax) {
len = new int[countMax];
dist = new int[countMax];
}
}

View File

@@ -0,0 +1,140 @@
/*
* LZMACoder
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.lzma;
import org.tukaani.xz.rangecoder.RangeCoder;
abstract class LZMACoder {
static final int POS_STATES_MAX = 1 << 4;
static final int MATCH_LEN_MIN = 2;
static final int MATCH_LEN_MAX = MATCH_LEN_MIN + LengthCoder.LOW_SYMBOLS
+ LengthCoder.MID_SYMBOLS
+ LengthCoder.HIGH_SYMBOLS - 1;
static final int DIST_STATES = 4;
static final int DIST_SLOTS = 1 << 6;
static final int DIST_MODEL_START = 4;
static final int DIST_MODEL_END = 14;
static final int FULL_DISTANCES = 1 << (DIST_MODEL_END / 2);
static final int ALIGN_BITS = 4;
static final int ALIGN_SIZE = 1 << ALIGN_BITS;
static final int ALIGN_MASK = ALIGN_SIZE - 1;
static final int REPS = 4;
final int posMask;
final int[] reps = new int[REPS];
final State state = new State();
final short[][] isMatch = new short[State.STATES][POS_STATES_MAX];
final short[] isRep = new short[State.STATES];
final short[] isRep0 = new short[State.STATES];
final short[] isRep1 = new short[State.STATES];
final short[] isRep2 = new short[State.STATES];
final short[][] isRep0Long = new short[State.STATES][POS_STATES_MAX];
final short[][] distSlots = new short[DIST_STATES][DIST_SLOTS];
final short[][] distSpecial = { new short[2], new short[2],
new short[4], new short[4],
new short[8], new short[8],
new short[16], new short[16],
new short[32], new short[32] };
final short[] distAlign = new short[ALIGN_SIZE];
static final int getDistState(int len) {
return len < DIST_STATES + MATCH_LEN_MIN
? len - MATCH_LEN_MIN
: DIST_STATES - 1;
}
LZMACoder(int pb) {
posMask = (1 << pb) - 1;
}
void reset() {
reps[0] = 0;
reps[1] = 0;
reps[2] = 0;
reps[3] = 0;
state.reset();
for (int i = 0; i < isMatch.length; ++i)
RangeCoder.initProbs(isMatch[i]);
RangeCoder.initProbs(isRep);
RangeCoder.initProbs(isRep0);
RangeCoder.initProbs(isRep1);
RangeCoder.initProbs(isRep2);
for (int i = 0; i < isRep0Long.length; ++i)
RangeCoder.initProbs(isRep0Long[i]);
for (int i = 0; i < distSlots.length; ++i)
RangeCoder.initProbs(distSlots[i]);
for (int i = 0; i < distSpecial.length; ++i)
RangeCoder.initProbs(distSpecial[i]);
RangeCoder.initProbs(distAlign);
}
abstract class LiteralCoder {
private final int lc;
private final int literalPosMask;
LiteralCoder(int lc, int lp) {
this.lc = lc;
this.literalPosMask = (1 << lp) - 1;
}
final int getSubcoderIndex(int prevByte, int pos) {
int low = prevByte >> (8 - lc);
int high = (pos & literalPosMask) << lc;
return low + high;
}
abstract class LiteralSubcoder {
final short[] probs = new short[0x300];
void reset() {
RangeCoder.initProbs(probs);
}
}
}
abstract class LengthCoder {
static final int LOW_SYMBOLS = 1 << 3;
static final int MID_SYMBOLS = 1 << 3;
static final int HIGH_SYMBOLS = 1 << 8;
final short[] choice = new short[2];
final short[][] low = new short[POS_STATES_MAX][LOW_SYMBOLS];
final short[][] mid = new short[POS_STATES_MAX][MID_SYMBOLS];
final short[] high = new short[HIGH_SYMBOLS];
void reset() {
RangeCoder.initProbs(choice);
for (int i = 0; i < low.length; ++i)
RangeCoder.initProbs(low[i]);
for (int i = 0; i < low.length; ++i)
RangeCoder.initProbs(mid[i]);
RangeCoder.initProbs(high);
}
}
}

View File

@@ -0,0 +1,199 @@
/*
* LZMADecoder
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.lzma;
import java.io.IOException;
import org.tukaani.xz.lz.LZDecoder;
import org.tukaani.xz.rangecoder.RangeDecoder;
public final class LZMADecoder extends LZMACoder {
private final LZDecoder lz;
private final RangeDecoder rc;
private final LiteralDecoder literalDecoder;
private final LengthDecoder matchLenDecoder = new LengthDecoder();
private final LengthDecoder repLenDecoder = new LengthDecoder();
public LZMADecoder(LZDecoder lz, RangeDecoder rc, int lc, int lp, int pb) {
super(pb);
this.lz = lz;
this.rc = rc;
this.literalDecoder = new LiteralDecoder(lc, lp);
reset();
}
public void reset() {
super.reset();
literalDecoder.reset();
matchLenDecoder.reset();
repLenDecoder.reset();
}
/**
* Returns true if LZMA end marker was detected. It is encoded as
* the maximum match distance which with signed ints becomes -1. This
* function is needed only for LZMA1. LZMA2 doesn't use the end marker
* in the LZMA layer.
*/
public boolean endMarkerDetected() {
return reps[0] == -1;
}
public void decode() throws IOException {
lz.repeatPending();
while (lz.hasSpace()) {
int posState = lz.getPos() & posMask;
if (rc.decodeBit(isMatch[state.get()], posState) == 0) {
literalDecoder.decode();
} else {
int len = rc.decodeBit(isRep, state.get()) == 0
? decodeMatch(posState)
: decodeRepMatch(posState);
// NOTE: With LZMA1 streams that have the end marker,
// this will throw CorruptedInputException. LZMAInputStream
// handles it specially.
lz.repeat(reps[0], len);
}
}
rc.normalize();
}
private int decodeMatch(int posState) throws IOException {
state.updateMatch();
reps[3] = reps[2];
reps[2] = reps[1];
reps[1] = reps[0];
int len = matchLenDecoder.decode(posState);
int distSlot = rc.decodeBitTree(distSlots[getDistState(len)]);
if (distSlot < DIST_MODEL_START) {
reps[0] = distSlot;
} else {
int limit = (distSlot >> 1) - 1;
reps[0] = (2 | (distSlot & 1)) << limit;
if (distSlot < DIST_MODEL_END) {
reps[0] |= rc.decodeReverseBitTree(
distSpecial[distSlot - DIST_MODEL_START]);
} else {
reps[0] |= rc.decodeDirectBits(limit - ALIGN_BITS)
<< ALIGN_BITS;
reps[0] |= rc.decodeReverseBitTree(distAlign);
}
}
return len;
}
private int decodeRepMatch(int posState) throws IOException {
if (rc.decodeBit(isRep0, state.get()) == 0) {
if (rc.decodeBit(isRep0Long[state.get()], posState) == 0) {
state.updateShortRep();
return 1;
}
} else {
int tmp;
if (rc.decodeBit(isRep1, state.get()) == 0) {
tmp = reps[1];
} else {
if (rc.decodeBit(isRep2, state.get()) == 0) {
tmp = reps[2];
} else {
tmp = reps[3];
reps[3] = reps[2];
}
reps[2] = reps[1];
}
reps[1] = reps[0];
reps[0] = tmp;
}
state.updateLongRep();
return repLenDecoder.decode(posState);
}
private class LiteralDecoder extends LiteralCoder {
LiteralSubdecoder[] subdecoders;
LiteralDecoder(int lc, int lp) {
super(lc, lp);
subdecoders = new LiteralSubdecoder[1 << (lc + lp)];
for (int i = 0; i < subdecoders.length; ++i)
subdecoders[i] = new LiteralSubdecoder();
}
void reset() {
for (int i = 0; i < subdecoders.length; ++i)
subdecoders[i].reset();
}
void decode() throws IOException {
int i = getSubcoderIndex(lz.getByte(0), lz.getPos());
subdecoders[i].decode();
}
private class LiteralSubdecoder extends LiteralSubcoder {
void decode() throws IOException {
int symbol = 1;
if (state.isLiteral()) {
do {
symbol = (symbol << 1) | rc.decodeBit(probs, symbol);
} while (symbol < 0x100);
} else {
int matchByte = lz.getByte(reps[0]);
int offset = 0x100;
int matchBit;
int bit;
do {
matchByte <<= 1;
matchBit = matchByte & offset;
bit = rc.decodeBit(probs, offset + matchBit + symbol);
symbol = (symbol << 1) | bit;
offset &= (0 - bit) ^ ~matchBit;
} while (symbol < 0x100);
}
lz.putByte((byte)symbol);
state.updateLiteral();
}
}
}
private class LengthDecoder extends LengthCoder {
int decode(int posState) throws IOException {
if (rc.decodeBit(choice, 0) == 0)
return rc.decodeBitTree(low[posState]) + MATCH_LEN_MIN;
if (rc.decodeBit(choice, 1) == 0)
return rc.decodeBitTree(mid[posState])
+ MATCH_LEN_MIN + LOW_SYMBOLS;
return rc.decodeBitTree(high)
+ MATCH_LEN_MIN + LOW_SYMBOLS + MID_SYMBOLS;
}
}
}

View File

@@ -0,0 +1,711 @@
/*
* LZMAEncoder
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.lzma;
import org.tukaani.xz.lz.LZEncoder;
import org.tukaani.xz.lz.Matches;
import org.tukaani.xz.rangecoder.RangeEncoder;
public abstract class LZMAEncoder extends LZMACoder {
public static final int MODE_FAST = 1;
public static final int MODE_NORMAL = 2;
/**
* LZMA2 chunk is considered full when its uncompressed size exceeds
* <code>LZMA2_UNCOMPRESSED_LIMIT</code>.
* <p>
* A compressed LZMA2 chunk can hold 2 MiB of uncompressed data.
* A single LZMA symbol may indicate up to MATCH_LEN_MAX bytes
* of data, so the LZMA2 chunk is considered full when there is
* less space than MATCH_LEN_MAX bytes.
*/
private static final int LZMA2_UNCOMPRESSED_LIMIT
= (2 << 20) - MATCH_LEN_MAX;
/**
* LZMA2 chunk is considered full when its compressed size exceeds
* <code>LZMA2_COMPRESSED_LIMIT</code>.
* <p>
* The maximum compressed size of a LZMA2 chunk is 64 KiB.
* A single LZMA symbol might use 20 bytes of space even though
* it usually takes just one byte or so. Two more bytes are needed
* for LZMA2 uncompressed chunks (see LZMA2OutputStream.writeChunk).
* Leave a little safety margin and use 26 bytes.
*/
private static final int LZMA2_COMPRESSED_LIMIT = (64 << 10) - 26;
private static final int DIST_PRICE_UPDATE_INTERVAL = FULL_DISTANCES;
private static final int ALIGN_PRICE_UPDATE_INTERVAL = ALIGN_SIZE;
private final RangeEncoder rc;
final LZEncoder lz;
final LiteralEncoder literalEncoder;
final LengthEncoder matchLenEncoder;
final LengthEncoder repLenEncoder;
final int niceLen;
private int distPriceCount = 0;
private int alignPriceCount = 0;
private final int distSlotPricesSize;
private final int[][] distSlotPrices;
private final int[][] fullDistPrices
= new int[DIST_STATES][FULL_DISTANCES];
private final int[] alignPrices = new int[ALIGN_SIZE];
int back = 0;
int readAhead = -1;
private int uncompressedSize = 0;
public static int getMemoryUsage(int mode, int dictSize,
int extraSizeBefore, int mf) {
int m = 80;
switch (mode) {
case MODE_FAST:
m += LZMAEncoderFast.getMemoryUsage(
dictSize, extraSizeBefore, mf);
break;
case MODE_NORMAL:
m += LZMAEncoderNormal.getMemoryUsage(
dictSize, extraSizeBefore, mf);
break;
default:
throw new IllegalArgumentException();
}
return m;
}
public static LZMAEncoder getInstance(
RangeEncoder rc, int lc, int lp, int pb, int mode,
int dictSize, int extraSizeBefore,
int niceLen, int mf, int depthLimit) {
switch (mode) {
case MODE_FAST:
return new LZMAEncoderFast(rc, lc, lp, pb,
dictSize, extraSizeBefore,
niceLen, mf, depthLimit);
case MODE_NORMAL:
return new LZMAEncoderNormal(rc, lc, lp, pb,
dictSize, extraSizeBefore,
niceLen, mf, depthLimit);
}
throw new IllegalArgumentException();
}
/**
* Gets an integer [0, 63] matching the highest two bits of an integer.
* This is like bit scan reverse (BSR) on x86 except that this also
* cares about the second highest bit.
*/
public static int getDistSlot(int dist) {
if (dist <= DIST_MODEL_START)
return dist;
int n = dist;
int i = 31;
if ((n & 0xFFFF0000) == 0) {
n <<= 16;
i = 15;
}
if ((n & 0xFF000000) == 0) {
n <<= 8;
i -= 8;
}
if ((n & 0xF0000000) == 0) {
n <<= 4;
i -= 4;
}
if ((n & 0xC0000000) == 0) {
n <<= 2;
i -= 2;
}
if ((n & 0x80000000) == 0)
--i;
return (i << 1) + ((dist >>> (i - 1)) & 1);
}
/**
* Gets the next LZMA symbol.
* <p>
* There are three types of symbols: literal (a single byte),
* repeated match, and normal match. The symbol is indicated
* by the return value and by the variable <code>back</code>.
* <p>
* Literal: <code>back == -1</code> and return value is <code>1</code>.
* The literal itself needs to be read from <code>lz</code> separately.
* <p>
* Repeated match: <code>back</code> is in the range [0, 3] and
* the return value is the length of the repeated match.
* <p>
* Normal match: <code>back - REPS<code> (<code>back - 4</code>)
* is the distance of the match and the return value is the length
* of the match.
*/
abstract int getNextSymbol();
LZMAEncoder(RangeEncoder rc, LZEncoder lz,
int lc, int lp, int pb, int dictSize, int niceLen) {
super(pb);
this.rc = rc;
this.lz = lz;
this.niceLen = niceLen;
literalEncoder = new LiteralEncoder(lc, lp);
matchLenEncoder = new LengthEncoder(pb, niceLen);
repLenEncoder = new LengthEncoder(pb, niceLen);
distSlotPricesSize = getDistSlot(dictSize - 1) + 1;
distSlotPrices = new int[DIST_STATES][distSlotPricesSize];
reset();
}
public LZEncoder getLZEncoder() {
return lz;
}
public void reset() {
super.reset();
literalEncoder.reset();
matchLenEncoder.reset();
repLenEncoder.reset();
distPriceCount = 0;
alignPriceCount = 0;
uncompressedSize += readAhead + 1;
readAhead = -1;
}
public int getUncompressedSize() {
return uncompressedSize;
}
public void resetUncompressedSize() {
uncompressedSize = 0;
}
/**
* Compresses for LZMA2.
*
* @return true if the LZMA2 chunk became full, false otherwise
*/
public boolean encodeForLZMA2() {
if (!lz.isStarted() && !encodeInit())
return false;
while (uncompressedSize <= LZMA2_UNCOMPRESSED_LIMIT
&& rc.getPendingSize() <= LZMA2_COMPRESSED_LIMIT)
if (!encodeSymbol())
return false;
return true;
}
private boolean encodeInit() {
assert readAhead == -1;
if (!lz.hasEnoughData(0))
return false;
// The first symbol must be a literal unless using
// a preset dictionary. This code isn't run if using
// a preset dictionary.
skip(1);
rc.encodeBit(isMatch[state.get()], 0, 0);
literalEncoder.encodeInit();
--readAhead;
assert readAhead == -1;
++uncompressedSize;
assert uncompressedSize == 1;
return true;
}
private boolean encodeSymbol() {
if (!lz.hasEnoughData(readAhead + 1))
return false;
int len = getNextSymbol();
assert readAhead >= 0;
int posState = (lz.getPos() - readAhead) & posMask;
if (back == -1) {
// Literal i.e. eight-bit byte
assert len == 1;
rc.encodeBit(isMatch[state.get()], posState, 0);
literalEncoder.encode();
} else {
// Some type of match
rc.encodeBit(isMatch[state.get()], posState, 1);
if (back < REPS) {
// Repeated match i.e. the same distance
// has been used earlier.
assert lz.getMatchLen(-readAhead, reps[back], len) == len;
rc.encodeBit(isRep, state.get(), 1);
encodeRepMatch(back, len, posState);
} else {
// Normal match
assert lz.getMatchLen(-readAhead, back - REPS, len) == len;
rc.encodeBit(isRep, state.get(), 0);
encodeMatch(back - REPS, len, posState);
}
}
readAhead -= len;
uncompressedSize += len;
return true;
}
private void encodeMatch(int dist, int len, int posState) {
state.updateMatch();
matchLenEncoder.encode(len, posState);
int distSlot = getDistSlot(dist);
rc.encodeBitTree(distSlots[getDistState(len)], distSlot);
if (distSlot >= DIST_MODEL_START) {
int footerBits = (distSlot >>> 1) - 1;
int base = (2 | (distSlot & 1)) << footerBits;
int distReduced = dist - base;
if (distSlot < DIST_MODEL_END) {
rc.encodeReverseBitTree(
distSpecial[distSlot - DIST_MODEL_START],
distReduced);
} else {
rc.encodeDirectBits(distReduced >>> ALIGN_BITS,
footerBits - ALIGN_BITS);
rc.encodeReverseBitTree(distAlign, distReduced & ALIGN_MASK);
--alignPriceCount;
}
}
reps[3] = reps[2];
reps[2] = reps[1];
reps[1] = reps[0];
reps[0] = dist;
--distPriceCount;
}
private void encodeRepMatch(int rep, int len, int posState) {
if (rep == 0) {
rc.encodeBit(isRep0, state.get(), 0);
rc.encodeBit(isRep0Long[state.get()], posState, len == 1 ? 0 : 1);
} else {
int dist = reps[rep];
rc.encodeBit(isRep0, state.get(), 1);
if (rep == 1) {
rc.encodeBit(isRep1, state.get(), 0);
} else {
rc.encodeBit(isRep1, state.get(), 1);
rc.encodeBit(isRep2, state.get(), rep - 2);
if (rep == 3)
reps[3] = reps[2];
reps[2] = reps[1];
}
reps[1] = reps[0];
reps[0] = dist;
}
if (len == 1) {
state.updateShortRep();
} else {
repLenEncoder.encode(len, posState);
state.updateLongRep();
}
}
Matches getMatches() {
++readAhead;
Matches matches = lz.getMatches();
assert lz.verifyMatches(matches);
return matches;
}
void skip(int len) {
readAhead += len;
lz.skip(len);
}
int getAnyMatchPrice(State state, int posState) {
return RangeEncoder.getBitPrice(isMatch[state.get()][posState], 1);
}
int getNormalMatchPrice(int anyMatchPrice, State state) {
return anyMatchPrice
+ RangeEncoder.getBitPrice(isRep[state.get()], 0);
}
int getAnyRepPrice(int anyMatchPrice, State state) {
return anyMatchPrice
+ RangeEncoder.getBitPrice(isRep[state.get()], 1);
}
int getShortRepPrice(int anyRepPrice, State state, int posState) {
return anyRepPrice
+ RangeEncoder.getBitPrice(isRep0[state.get()], 0)
+ RangeEncoder.getBitPrice(isRep0Long[state.get()][posState],
0);
}
int getLongRepPrice(int anyRepPrice, int rep, State state, int posState) {
int price = anyRepPrice;
if (rep == 0) {
price += RangeEncoder.getBitPrice(isRep0[state.get()], 0)
+ RangeEncoder.getBitPrice(
isRep0Long[state.get()][posState], 1);
} else {
price += RangeEncoder.getBitPrice(isRep0[state.get()], 1);
if (rep == 1)
price += RangeEncoder.getBitPrice(isRep1[state.get()], 0);
else
price += RangeEncoder.getBitPrice(isRep1[state.get()], 1)
+ RangeEncoder.getBitPrice(isRep2[state.get()],
rep - 2);
}
return price;
}
int getLongRepAndLenPrice(int rep, int len, State state, int posState) {
int anyMatchPrice = getAnyMatchPrice(state, posState);
int anyRepPrice = getAnyRepPrice(anyMatchPrice, state);
int longRepPrice = getLongRepPrice(anyRepPrice, rep, state, posState);
return longRepPrice + repLenEncoder.getPrice(len, posState);
}
int getMatchAndLenPrice(int normalMatchPrice,
int dist, int len, int posState) {
int price = normalMatchPrice
+ matchLenEncoder.getPrice(len, posState);
int distState = getDistState(len);
if (dist < FULL_DISTANCES) {
price += fullDistPrices[distState][dist];
} else {
// Note that distSlotPrices includes also
// the price of direct bits.
int distSlot = getDistSlot(dist);
price += distSlotPrices[distState][distSlot]
+ alignPrices[dist & ALIGN_MASK];
}
return price;
}
private void updateDistPrices() {
distPriceCount = DIST_PRICE_UPDATE_INTERVAL;
for (int distState = 0; distState < DIST_STATES; ++distState) {
for (int distSlot = 0; distSlot < distSlotPricesSize; ++distSlot)
distSlotPrices[distState][distSlot]
= RangeEncoder.getBitTreePrice(
distSlots[distState], distSlot);
for (int distSlot = DIST_MODEL_END; distSlot < distSlotPricesSize;
++distSlot) {
int count = (distSlot >>> 1) - 1 - ALIGN_BITS;
distSlotPrices[distState][distSlot]
+= RangeEncoder.getDirectBitsPrice(count);
}
for (int dist = 0; dist < DIST_MODEL_START; ++dist)
fullDistPrices[distState][dist]
= distSlotPrices[distState][dist];
}
int dist = DIST_MODEL_START;
for (int distSlot = DIST_MODEL_START; distSlot < DIST_MODEL_END;
++distSlot) {
int footerBits = (distSlot >>> 1) - 1;
int base = (2 | (distSlot & 1)) << footerBits;
int limit = distSpecial[distSlot - DIST_MODEL_START].length;
for (int i = 0; i < limit; ++i) {
int distReduced = dist - base;
int price = RangeEncoder.getReverseBitTreePrice(
distSpecial[distSlot - DIST_MODEL_START],
distReduced);
for (int distState = 0; distState < DIST_STATES; ++distState)
fullDistPrices[distState][dist]
= distSlotPrices[distState][distSlot] + price;
++dist;
}
}
assert dist == FULL_DISTANCES;
}
private void updateAlignPrices() {
alignPriceCount = ALIGN_PRICE_UPDATE_INTERVAL;
for (int i = 0; i < ALIGN_SIZE; ++i)
alignPrices[i] = RangeEncoder.getReverseBitTreePrice(distAlign,
i);
}
/**
* Updates the lookup tables used for calculating match distance
* and length prices. The updating is skipped for performance reasons
* if the tables haven't changed much since the previous update.
*/
void updatePrices() {
if (distPriceCount <= 0)
updateDistPrices();
if (alignPriceCount <= 0)
updateAlignPrices();
matchLenEncoder.updatePrices();
repLenEncoder.updatePrices();
}
class LiteralEncoder extends LiteralCoder {
LiteralSubencoder[] subencoders;
LiteralEncoder(int lc, int lp) {
super(lc, lp);
subencoders = new LiteralSubencoder[1 << (lc + lp)];
for (int i = 0; i < subencoders.length; ++i)
subencoders[i] = new LiteralSubencoder();
}
void reset() {
for (int i = 0; i < subencoders.length; ++i)
subencoders[i].reset();
}
void encodeInit() {
// When encoding the first byte of the stream, there is
// no previous byte in the dictionary so the encode function
// wouldn't work.
assert readAhead >= 0;
subencoders[0].encode();
}
void encode() {
assert readAhead >= 0;
int i = getSubcoderIndex(lz.getByte(1 + readAhead),
lz.getPos() - readAhead);
subencoders[i].encode();
}
int getPrice(int curByte, int matchByte,
int prevByte, int pos, State state) {
int price = RangeEncoder.getBitPrice(
isMatch[state.get()][pos & posMask], 0);
int i = getSubcoderIndex(prevByte, pos);
price += state.isLiteral()
? subencoders[i].getNormalPrice(curByte)
: subencoders[i].getMatchedPrice(curByte, matchByte);
return price;
}
private class LiteralSubencoder extends LiteralSubcoder {
void encode() {
int symbol = lz.getByte(readAhead) | 0x100;
if (state.isLiteral()) {
int subencoderIndex;
int bit;
do {
subencoderIndex = symbol >>> 8;
bit = (symbol >>> 7) & 1;
rc.encodeBit(probs, subencoderIndex, bit);
symbol <<= 1;
} while (symbol < 0x10000);
} else {
int matchByte = lz.getByte(reps[0] + 1 + readAhead);
int offset = 0x100;
int subencoderIndex;
int matchBit;
int bit;
do {
matchByte <<= 1;
matchBit = matchByte & offset;
subencoderIndex = offset + matchBit + (symbol >>> 8);
bit = (symbol >>> 7) & 1;
rc.encodeBit(probs, subencoderIndex, bit);
symbol <<= 1;
offset &= ~(matchByte ^ symbol);
} while (symbol < 0x10000);
}
state.updateLiteral();
}
int getNormalPrice(int symbol) {
int price = 0;
int subencoderIndex;
int bit;
symbol |= 0x100;
do {
subencoderIndex = symbol >>> 8;
bit = (symbol >>> 7) & 1;
price += RangeEncoder.getBitPrice(probs[subencoderIndex],
bit);
symbol <<= 1;
} while (symbol < (0x100 << 8));
return price;
}
int getMatchedPrice(int symbol, int matchByte) {
int price = 0;
int offset = 0x100;
int subencoderIndex;
int matchBit;
int bit;
symbol |= 0x100;
do {
matchByte <<= 1;
matchBit = matchByte & offset;
subencoderIndex = offset + matchBit + (symbol >>> 8);
bit = (symbol >>> 7) & 1;
price += RangeEncoder.getBitPrice(probs[subencoderIndex],
bit);
symbol <<= 1;
offset &= ~(matchByte ^ symbol);
} while (symbol < (0x100 << 8));
return price;
}
}
}
class LengthEncoder extends LengthCoder {
/**
* The prices are updated after at least
* <code>PRICE_UPDATE_INTERVAL</code> many lengths
* have been encoded with the same posState.
*/
private static final int PRICE_UPDATE_INTERVAL = 32; // FIXME?
private final int[] counters;
private final int[][] prices;
LengthEncoder(int pb, int niceLen) {
int posStates = 1 << pb;
counters = new int[posStates];
// Always allocate at least LOW_SYMBOLS + MID_SYMBOLS because
// it makes updatePrices slightly simpler. The prices aren't
// usually needed anyway if niceLen < 18.
int lenSymbols = Math.max(niceLen - MATCH_LEN_MIN + 1,
LOW_SYMBOLS + MID_SYMBOLS);
prices = new int[posStates][lenSymbols];
}
void reset() {
super.reset();
// Reset counters to zero to force price update before
// the prices are needed.
for (int i = 0; i < counters.length; ++i)
counters[i] = 0;
}
void encode(int len, int posState) {
len -= MATCH_LEN_MIN;
if (len < LOW_SYMBOLS) {
rc.encodeBit(choice, 0, 0);
rc.encodeBitTree(low[posState], len);
} else {
rc.encodeBit(choice, 0, 1);
len -= LOW_SYMBOLS;
if (len < MID_SYMBOLS) {
rc.encodeBit(choice, 1, 0);
rc.encodeBitTree(mid[posState], len);
} else {
rc.encodeBit(choice, 1, 1);
rc.encodeBitTree(high, len - MID_SYMBOLS);
}
}
--counters[posState];
}
int getPrice(int len, int posState) {
return prices[posState][len - MATCH_LEN_MIN];
}
void updatePrices() {
for (int posState = 0; posState < counters.length; ++posState) {
if (counters[posState] <= 0) {
counters[posState] = PRICE_UPDATE_INTERVAL;
updatePrices(posState);
}
}
}
private void updatePrices(int posState) {
int choice0Price = RangeEncoder.getBitPrice(choice[0], 0);
int i = 0;
for (; i < LOW_SYMBOLS; ++i)
prices[posState][i] = choice0Price
+ RangeEncoder.getBitTreePrice(low[posState], i);
choice0Price = RangeEncoder.getBitPrice(choice[0], 1);
int choice1Price = RangeEncoder.getBitPrice(choice[1], 0);
for (; i < LOW_SYMBOLS + MID_SYMBOLS; ++i)
prices[posState][i] = choice0Price + choice1Price
+ RangeEncoder.getBitTreePrice(mid[posState],
i - LOW_SYMBOLS);
choice1Price = RangeEncoder.getBitPrice(choice[1], 1);
for (; i < prices[posState].length; ++i)
prices[posState][i] = choice0Price + choice1Price
+ RangeEncoder.getBitTreePrice(high, i - LOW_SYMBOLS
- MID_SYMBOLS);
}
}
}

View File

@@ -0,0 +1,151 @@
/*
* LZMAEncoderFast
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.lzma;
import org.tukaani.xz.lz.LZEncoder;
import org.tukaani.xz.lz.Matches;
import org.tukaani.xz.rangecoder.RangeEncoder;
final class LZMAEncoderFast extends LZMAEncoder {
private static int EXTRA_SIZE_BEFORE = 1;
private static int EXTRA_SIZE_AFTER = MATCH_LEN_MAX - 1;
private Matches matches = null;
static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) {
return LZEncoder.getMemoryUsage(
dictSize, Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE),
EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf);
}
LZMAEncoderFast(RangeEncoder rc, int lc, int lp, int pb,
int dictSize, int extraSizeBefore,
int niceLen, int mf, int depthLimit) {
super(rc, LZEncoder.getInstance(dictSize,
Math.max(extraSizeBefore,
EXTRA_SIZE_BEFORE),
EXTRA_SIZE_AFTER,
niceLen, MATCH_LEN_MAX,
mf, depthLimit),
lc, lp, pb, dictSize, niceLen);
}
private boolean changePair(int smallDist, int bigDist) {
return smallDist < (bigDist >>> 7);
}
int getNextSymbol() {
// Get the matches for the next byte unless readAhead indicates
// that we already got the new matches during the previous call
// to this function.
if (readAhead == -1)
matches = getMatches();
back = -1;
// Get the number of bytes available in the dictionary, but
// not more than the maximum match length. If there aren't
// enough bytes remaining to encode a match at all, return
// immediately to encode this byte as a literal.
int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX);
if (avail < MATCH_LEN_MIN)
return 1;
// Look for a match from the previous four match distances.
int bestRepLen = 0;
int bestRepIndex = 0;
for (int rep = 0; rep < REPS; ++rep) {
int len = lz.getMatchLen(reps[rep], avail);
if (len < MATCH_LEN_MIN)
continue;
// If it is long enough, return it.
if (len >= niceLen) {
back = rep;
skip(len - 1);
return len;
}
// Remember the index and length of the best repeated match.
if (len > bestRepLen) {
bestRepIndex = rep;
bestRepLen = len;
}
}
int mainLen = 0;
int mainDist = 0;
if (matches.count > 0) {
mainLen = matches.len[matches.count - 1];
mainDist = matches.dist[matches.count - 1];
if (mainLen >= niceLen) {
back = mainDist + REPS;
skip(mainLen - 1);
return mainLen;
}
while (matches.count > 1
&& mainLen == matches.len[matches.count - 2] + 1) {
if (!changePair(matches.dist[matches.count - 2], mainDist))
break;
--matches.count;
mainLen = matches.len[matches.count - 1];
mainDist = matches.dist[matches.count - 1];
}
if (mainLen == MATCH_LEN_MIN && mainDist >= 0x80)
mainLen = 1;
}
if (bestRepLen >= MATCH_LEN_MIN) {
if (bestRepLen + 1 >= mainLen
|| (bestRepLen + 2 >= mainLen && mainDist >= (1 << 9))
|| (bestRepLen + 3 >= mainLen && mainDist >= (1 << 15))) {
back = bestRepIndex;
skip(bestRepLen - 1);
return bestRepLen;
}
}
if (mainLen < MATCH_LEN_MIN || avail <= MATCH_LEN_MIN)
return 1;
// Get the next match. Test if it is better than the current match.
// If so, encode the current byte as a literal.
matches = getMatches();
if (matches.count > 0) {
int newLen = matches.len[matches.count - 1];
int newDist = matches.dist[matches.count - 1];
if ((newLen >= mainLen && newDist < mainDist)
|| (newLen == mainLen + 1
&& !changePair(mainDist, newDist))
|| newLen > mainLen + 1
|| (newLen + 1 >= mainLen
&& mainLen >= MATCH_LEN_MIN + 1
&& changePair(newDist, mainDist)))
return 1;
}
int limit = Math.max(mainLen - 1, MATCH_LEN_MIN);
for (int rep = 0; rep < REPS; ++rep)
if (lz.getMatchLen(reps[rep], limit) == limit)
return 1;
back = mainDist + REPS;
skip(mainLen - 2);
return mainLen;
}
}

View File

@@ -0,0 +1,566 @@
/*
* LZMAEncoderNormal
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.lzma;
import org.tukaani.xz.lz.LZEncoder;
import org.tukaani.xz.lz.Matches;
import org.tukaani.xz.rangecoder.RangeEncoder;
final class LZMAEncoderNormal extends LZMAEncoder {
private static final int OPTS = 4096;
private static int EXTRA_SIZE_BEFORE = OPTS;
private static int EXTRA_SIZE_AFTER = OPTS;
private final Optimum[] opts = new Optimum[OPTS];
private int optCur = 0;
private int optEnd = 0;
private Matches matches;
// These are fields solely to avoid allocating the objects again and
// again on each function call.
private final int[] repLens = new int[REPS];
private final State nextState = new State();
static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) {
return LZEncoder.getMemoryUsage(dictSize,
Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE),
EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf)
+ OPTS * 64 / 1024;
}
LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb,
int dictSize, int extraSizeBefore,
int niceLen, int mf, int depthLimit) {
super(rc, LZEncoder.getInstance(dictSize,
Math.max(extraSizeBefore,
EXTRA_SIZE_BEFORE),
EXTRA_SIZE_AFTER,
niceLen, MATCH_LEN_MAX,
mf, depthLimit),
lc, lp, pb, dictSize, niceLen);
for (int i = 0; i < OPTS; ++i)
opts[i] = new Optimum();
}
public void reset() {
optCur = 0;
optEnd = 0;
super.reset();
}
/**
* Converts the opts array from backward indexes to forward indexes.
* Then it will be simple to get the next symbol from the array
* in later calls to <code>getNextSymbol()</code>.
*/
private int convertOpts() {
optEnd = optCur;
int optPrev = opts[optCur].optPrev;
do {
Optimum opt = opts[optCur];
if (opt.prev1IsLiteral) {
opts[optPrev].optPrev = optCur;
opts[optPrev].backPrev = -1;
optCur = optPrev--;
if (opt.hasPrev2) {
opts[optPrev].optPrev = optPrev + 1;
opts[optPrev].backPrev = opt.backPrev2;
optCur = optPrev;
optPrev = opt.optPrev2;
}
}
int temp = opts[optPrev].optPrev;
opts[optPrev].optPrev = optCur;
optCur = optPrev;
optPrev = temp;
} while (optCur > 0);
optCur = opts[0].optPrev;
back = opts[optCur].backPrev;
return optCur;
}
int getNextSymbol() {
// If there are pending symbols from an earlier call to this
// function, return those symbols first.
if (optCur < optEnd) {
int len = opts[optCur].optPrev - optCur;
optCur = opts[optCur].optPrev;
back = opts[optCur].backPrev;
return len;
}
assert optCur == optEnd;
optCur = 0;
optEnd = 0;
back = -1;
if (readAhead == -1)
matches = getMatches();
// Get the number of bytes available in the dictionary, but
// not more than the maximum match length. If there aren't
// enough bytes remaining to encode a match at all, return
// immediately to encode this byte as a literal.
int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX);
if (avail < MATCH_LEN_MIN)
return 1;
// Get the lengths of repeated matches.
int repBest = 0;
for (int rep = 0; rep < REPS; ++rep) {
repLens[rep] = lz.getMatchLen(reps[rep], avail);
if (repLens[rep] < MATCH_LEN_MIN) {
repLens[rep] = 0;
continue;
}
if (repLens[rep] > repLens[repBest])
repBest = rep;
}
// Return if the best repeated match is at least niceLen bytes long.
if (repLens[repBest] >= niceLen) {
back = repBest;
skip(repLens[repBest] - 1);
return repLens[repBest];
}
// Initialize mainLen and mainDist to the longest match found
// by the match finder.
int mainLen = 0;
int mainDist = 0;
if (matches.count > 0) {
mainLen = matches.len[matches.count - 1];
mainDist = matches.dist[matches.count - 1];
// Return if it is at least niceLen bytes long.
if (mainLen >= niceLen) {
back = mainDist + REPS;
skip(mainLen - 1);
return mainLen;
}
}
int curByte = lz.getByte(0);
int matchByte = lz.getByte(reps[0] + 1);
// If the match finder found no matches and this byte cannot be
// encoded as a repeated match (short or long), we must be return
// to have the byte encoded as a literal.
if (mainLen < MATCH_LEN_MIN && curByte != matchByte
&& repLens[repBest] < MATCH_LEN_MIN)
return 1;
int pos = lz.getPos();
int posState = pos & posMask;
// Calculate the price of encoding the current byte as a literal.
{
int prevByte = lz.getByte(1);
int literalPrice = literalEncoder.getPrice(curByte, matchByte,
prevByte, pos, state);
opts[1].set1(literalPrice, 0, -1);
}
int anyMatchPrice = getAnyMatchPrice(state, posState);
int anyRepPrice = getAnyRepPrice(anyMatchPrice, state);
// If it is possible to encode this byte as a short rep, see if
// it is cheaper than encoding it as a literal.
if (matchByte == curByte) {
int shortRepPrice = getShortRepPrice(anyRepPrice,
state, posState);
if (shortRepPrice < opts[1].price)
opts[1].set1(shortRepPrice, 0, 0);
}
// Return if there is neither normal nor long repeated match. Use
// a short match instead of a literal if is is possible and cheaper.
optEnd = Math.max(mainLen, repLens[repBest]);
if (optEnd < MATCH_LEN_MIN) {
assert optEnd == 0 : optEnd;
back = opts[1].backPrev;
return 1;
}
// Update the lookup tables for distances and lengths before using
// those price calculation functions. (The price function above
// don't need these tables.)
updatePrices();
// Initialize the state and reps of this position in opts[].
// updateOptStateAndReps() will need these to get the new
// state and reps for the next byte.
opts[0].state.set(state);
System.arraycopy(reps, 0, opts[0].reps, 0, REPS);
// Initialize the prices for latter opts that will be used below.
for (int i = optEnd; i >= MATCH_LEN_MIN; --i)
opts[i].reset();
// Calculate the prices of repeated matches of all lengths.
for (int rep = 0; rep < REPS; ++rep) {
int repLen = repLens[rep];
if (repLen < MATCH_LEN_MIN)
continue;
int longRepPrice = getLongRepPrice(anyRepPrice, rep,
state, posState);
do {
int price = longRepPrice + repLenEncoder.getPrice(repLen,
posState);
if (price < opts[repLen].price)
opts[repLen].set1(price, 0, rep);
} while (--repLen >= MATCH_LEN_MIN);
}
// Calculate the prices of normal matches that are longer than rep0.
{
int len = Math.max(repLens[0] + 1, MATCH_LEN_MIN);
if (len <= mainLen) {
int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
state);
// Set i to the index of the shortest match that is
// at least len bytes long.
int i = 0;
while (len > matches.len[i])
++i;
while (true) {
int dist = matches.dist[i];
int price = getMatchAndLenPrice(normalMatchPrice,
dist, len, posState);
if (price < opts[len].price)
opts[len].set1(price, 0, dist + REPS);
if (len == matches.len[i])
if (++i == matches.count)
break;
++len;
}
}
}
avail = Math.min(lz.getAvail(), OPTS - 1);
// Get matches for later bytes and optimize the use of LZMA symbols
// by calculating the prices and picking the cheapest symbol
// combinations.
while (++optCur < optEnd) {
matches = getMatches();
if (matches.count > 0
&& matches.len[matches.count - 1] >= niceLen)
break;
--avail;
++pos;
posState = pos & posMask;
updateOptStateAndReps();
anyMatchPrice = opts[optCur].price
+ getAnyMatchPrice(opts[optCur].state, posState);
anyRepPrice = getAnyRepPrice(anyMatchPrice, opts[optCur].state);
calc1BytePrices(pos, posState, avail, anyRepPrice);
if (avail >= MATCH_LEN_MIN) {
int startLen = calcLongRepPrices(pos, posState,
avail, anyRepPrice);
if (matches.count > 0)
calcNormalMatchPrices(pos, posState, avail,
anyMatchPrice, startLen);
}
}
return convertOpts();
}
/**
* Updates the state and reps for the current byte in the opts array.
*/
private void updateOptStateAndReps() {
int optPrev = opts[optCur].optPrev;
assert optPrev < optCur;
if (opts[optCur].prev1IsLiteral) {
--optPrev;
if (opts[optCur].hasPrev2) {
opts[optCur].state.set(opts[opts[optCur].optPrev2].state);
if (opts[optCur].backPrev2 < REPS)
opts[optCur].state.updateLongRep();
else
opts[optCur].state.updateMatch();
} else {
opts[optCur].state.set(opts[optPrev].state);
}
opts[optCur].state.updateLiteral();
} else {
opts[optCur].state.set(opts[optPrev].state);
}
if (optPrev == optCur - 1) {
// Must be either a short rep or a literal.
assert opts[optCur].backPrev == 0 || opts[optCur].backPrev == -1;
if (opts[optCur].backPrev == 0)
opts[optCur].state.updateShortRep();
else
opts[optCur].state.updateLiteral();
System.arraycopy(opts[optPrev].reps, 0,
opts[optCur].reps, 0, REPS);
} else {
int back;
if (opts[optCur].prev1IsLiteral && opts[optCur].hasPrev2) {
optPrev = opts[optCur].optPrev2;
back = opts[optCur].backPrev2;
opts[optCur].state.updateLongRep();
} else {
back = opts[optCur].backPrev;
if (back < REPS)
opts[optCur].state.updateLongRep();
else
opts[optCur].state.updateMatch();
}
if (back < REPS) {
opts[optCur].reps[0] = opts[optPrev].reps[back];
int rep;
for (rep = 1; rep <= back; ++rep)
opts[optCur].reps[rep] = opts[optPrev].reps[rep - 1];
for (; rep < REPS; ++rep)
opts[optCur].reps[rep] = opts[optPrev].reps[rep];
} else {
opts[optCur].reps[0] = back - REPS;
System.arraycopy(opts[optPrev].reps, 0,
opts[optCur].reps, 1, REPS - 1);
}
}
}
/**
* Calculates prices of a literal, a short rep, and literal + rep0.
*/
private void calc1BytePrices(int pos, int posState,
int avail, int anyRepPrice) {
// This will be set to true if using a literal or a short rep.
boolean nextIsByte = false;
int curByte = lz.getByte(0);
int matchByte = lz.getByte(opts[optCur].reps[0] + 1);
// Try a literal.
int literalPrice = opts[optCur].price
+ literalEncoder.getPrice(curByte, matchByte, lz.getByte(1),
pos, opts[optCur].state);
if (literalPrice < opts[optCur + 1].price) {
opts[optCur + 1].set1(literalPrice, optCur, -1);
nextIsByte = true;
}
// Try a short rep.
if (matchByte == curByte && (opts[optCur + 1].optPrev == optCur
|| opts[optCur + 1].backPrev != 0)) {
int shortRepPrice = getShortRepPrice(anyRepPrice,
opts[optCur].state,
posState);
if (shortRepPrice <= opts[optCur + 1].price) {
opts[optCur + 1].set1(shortRepPrice, optCur, 0);
nextIsByte = true;
}
}
// If neither a literal nor a short rep was the cheapest choice,
// try literal + long rep0.
if (!nextIsByte && matchByte != curByte && avail > MATCH_LEN_MIN) {
int lenLimit = Math.min(niceLen, avail - 1);
int len = lz.getMatchLen(1, opts[optCur].reps[0], lenLimit);
if (len >= MATCH_LEN_MIN) {
nextState.set(opts[optCur].state);
nextState.updateLiteral();
int nextPosState = (pos + 1) & posMask;
int price = literalPrice
+ getLongRepAndLenPrice(0, len,
nextState, nextPosState);
int i = optCur + 1 + len;
while (optEnd < i)
opts[++optEnd].reset();
if (price < opts[i].price)
opts[i].set2(price, optCur, 0);
}
}
}
/**
* Calculates prices of long rep and long rep + literal + rep0.
*/
private int calcLongRepPrices(int pos, int posState,
int avail, int anyRepPrice) {
int startLen = MATCH_LEN_MIN;
int lenLimit = Math.min(avail, niceLen);
for (int rep = 0; rep < REPS; ++rep) {
int len = lz.getMatchLen(opts[optCur].reps[rep], lenLimit);
if (len < MATCH_LEN_MIN)
continue;
while (optEnd < optCur + len)
opts[++optEnd].reset();
int longRepPrice = getLongRepPrice(anyRepPrice, rep,
opts[optCur].state, posState);
for (int i = len; i >= MATCH_LEN_MIN; --i) {
int price = longRepPrice
+ repLenEncoder.getPrice(i, posState);
if (price < opts[optCur + i].price)
opts[optCur + i].set1(price, optCur, rep);
}
if (rep == 0)
startLen = len + 1;
int len2Limit = Math.min(niceLen, avail - len - 1);
int len2 = lz.getMatchLen(len + 1, opts[optCur].reps[rep],
len2Limit);
if (len2 >= MATCH_LEN_MIN) {
// Rep
int price = longRepPrice
+ repLenEncoder.getPrice(len, posState);
nextState.set(opts[optCur].state);
nextState.updateLongRep();
// Literal
int curByte = lz.getByte(len, 0);
int matchByte = lz.getByte(0); // lz.getByte(len, len)
int prevByte = lz.getByte(len, 1);
price += literalEncoder.getPrice(curByte, matchByte, prevByte,
pos + len, nextState);
nextState.updateLiteral();
// Rep0
int nextPosState = (pos + len + 1) & posMask;
price += getLongRepAndLenPrice(0, len2,
nextState, nextPosState);
int i = optCur + len + 1 + len2;
while (optEnd < i)
opts[++optEnd].reset();
if (price < opts[i].price)
opts[i].set3(price, optCur, rep, len, 0);
}
}
return startLen;
}
/**
* Calculates prices of a normal match and normal match + literal + rep0.
*/
private void calcNormalMatchPrices(int pos, int posState, int avail,
int anyMatchPrice, int startLen) {
// If the longest match is so long that it would not fit into
// the opts array, shorten the matches.
if (matches.len[matches.count - 1] > avail) {
matches.count = 0;
while (matches.len[matches.count] < avail)
++matches.count;
matches.len[matches.count++] = avail;
}
if (matches.len[matches.count - 1] < startLen)
return;
while (optEnd < optCur + matches.len[matches.count - 1])
opts[++optEnd].reset();
int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
opts[optCur].state);
int match = 0;
while (startLen > matches.len[match])
++match;
for (int len = startLen; ; ++len) {
int dist = matches.dist[match];
// Calculate the price of a match of len bytes from the nearest
// possible distance.
int matchAndLenPrice = getMatchAndLenPrice(normalMatchPrice,
dist, len, posState);
if (matchAndLenPrice < opts[optCur + len].price)
opts[optCur + len].set1(matchAndLenPrice,
optCur, dist + REPS);
if (len != matches.len[match])
continue;
// Try match + literal + rep0. First get the length of the rep0.
int len2Limit = Math.min(niceLen, avail - len - 1);
int len2 = lz.getMatchLen(len + 1, dist, len2Limit);
if (len2 >= MATCH_LEN_MIN) {
nextState.set(opts[optCur].state);
nextState.updateMatch();
// Literal
int curByte = lz.getByte(len, 0);
int matchByte = lz.getByte(0); // lz.getByte(len, len)
int prevByte = lz.getByte(len, 1);
int price = matchAndLenPrice
+ literalEncoder.getPrice(curByte, matchByte,
prevByte, pos + len,
nextState);
nextState.updateLiteral();
// Rep0
int nextPosState = (pos + len + 1) & posMask;
price += getLongRepAndLenPrice(0, len2,
nextState, nextPosState);
int i = optCur + len + 1 + len2;
while (optEnd < i)
opts[++optEnd].reset();
if (price < opts[i].price)
opts[i].set3(price, optCur, dist + REPS, len, 0);
}
if (++match == matches.count)
break;
}
}
}

View File

@@ -0,0 +1,73 @@
/*
* Optimum
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.lzma;
final class Optimum {
private static final int INFINITY_PRICE = 1 << 30;
final State state = new State();
final int[] reps = new int[LZMACoder.REPS];
/**
* Cumulative price of arriving to this byte.
*/
int price;
int optPrev;
int backPrev;
boolean prev1IsLiteral;
boolean hasPrev2;
int optPrev2;
int backPrev2;
/**
* Resets the price.
*/
void reset() {
price = INFINITY_PRICE;
}
/**
* Sets to indicate one LZMA symbol (literal, rep, or match).
*/
void set1(int newPrice, int optCur, int back) {
price = newPrice;
optPrev = optCur;
backPrev = back;
prev1IsLiteral = false;
}
/**
* Sets to indicate two LZMA symbols of which the first one is a literal.
*/
void set2(int newPrice, int optCur, int back) {
price = newPrice;
optPrev = optCur + 1;
backPrev = back;
prev1IsLiteral = true;
hasPrev2 = false;
}
/**
* Sets to indicate three LZMA symbols of which the second one
* is a literal.
*/
void set3(int newPrice, int optCur, int back2, int len2, int back) {
price = newPrice;
optPrev = optCur + len2 + 1;
backPrev = back;
prev1IsLiteral = true;
hasPrev2 = true;
optPrev2 = optCur;
backPrev2 = back2;
}
}

View File

@@ -0,0 +1,75 @@
/*
* State
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.lzma;
final class State {
static final int STATES = 12;
private static final int LIT_STATES = 7;
private static final int LIT_LIT = 0;
private static final int MATCH_LIT_LIT = 1;
private static final int REP_LIT_LIT = 2;
private static final int SHORTREP_LIT_LIT = 3;
private static final int MATCH_LIT = 4;
private static final int REP_LIT = 5;
private static final int SHORTREP_LIT = 6;
private static final int LIT_MATCH = 7;
private static final int LIT_LONGREP = 8;
private static final int LIT_SHORTREP = 9;
private static final int NONLIT_MATCH = 10;
private static final int NONLIT_REP = 11;
private int state;
State() {}
State(State other) {
state = other.state;
}
void reset() {
state = LIT_LIT;
}
int get() {
return state;
}
void set(State other) {
state = other.state;
}
void updateLiteral() {
if (state <= SHORTREP_LIT_LIT)
state = LIT_LIT;
else if (state <= LIT_SHORTREP)
state -= 3;
else
state -= 6;
}
void updateMatch() {
state = state < LIT_STATES ? LIT_MATCH : NONLIT_MATCH;
}
void updateLongRep() {
state = state < LIT_STATES ? LIT_LONGREP : NONLIT_REP;
}
void updateShortRep() {
state = state < LIT_STATES ? LIT_SHORTREP : NONLIT_REP;
}
boolean isLiteral() {
return state < LIT_STATES;
}
}

View File

@@ -0,0 +1,36 @@
/**
* XZ data compression support.
*
* <h4>Introduction</h4>
* <p>
* This aims to be a complete implementation of XZ data compression
* in pure Java. Features:
* <ul>
* <li>Full support for the .xz file format specification version 1.0.4</li>
* <li>Single-threaded streamed compression and decompression</li>
* <li>Single-threaded decompression with limited random access support</li>
* <li>Raw streams (no .xz headers) for advanced users, including LZMA2
* with preset dictionary</li>
* </ul>
* <p>
* Threading is planned but it is unknown when it will be implemented.
* <p>
* For the latest source code, see the
* <a href="http://tukaani.org/xz/java.html">home page of XZ for Java</a>.
*
* <h4>Getting started</h4>
* <p>
* Start by reading the documentation of {@link org.tukaani.xz.XZOutputStream}
* and {@link org.tukaani.xz.XZInputStream}.
* If you use XZ inside another file format or protocol,
* see also {@link org.tukaani.xz.SingleXZInputStream}.
*
* <h4>Licensing</h4>
* <p>
* XZ for Java has been put into the public domain, thus you can do
* whatever you want with it. All the files in the package have been
* written by Lasse Collin and/or Igor Pavlov.
* <p>
* This software is provided "as is", without any warranty.
*/
package org.tukaani.xz;

View File

@@ -0,0 +1,26 @@
/*
* RangeCoder
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.rangecoder;
import java.util.Arrays;
public abstract class RangeCoder {
static final int SHIFT_BITS = 8;
static final int TOP_MASK = 0xFF000000;
static final int BIT_MODEL_TOTAL_BITS = 11;
static final int BIT_MODEL_TOTAL = 1 << BIT_MODEL_TOTAL_BITS;
static final short PROB_INIT = (short)(BIT_MODEL_TOTAL / 2);
static final int MOVE_BITS = 5;
public static final void initProbs(short[] probs) {
Arrays.fill(probs, PROB_INIT);
}
}

View File

@@ -0,0 +1,83 @@
/*
* RangeDecoder
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.rangecoder;
import java.io.DataInputStream;
import java.io.IOException;
public abstract class RangeDecoder extends RangeCoder {
int range = 0;
int code = 0;
public abstract void normalize() throws IOException;
public int decodeBit(short[] probs, int index) throws IOException {
normalize();
int prob = probs[index];
int bound = (range >>> BIT_MODEL_TOTAL_BITS) * prob;
int bit;
// Compare code and bound as if they were unsigned 32-bit integers.
if ((code ^ 0x80000000) < (bound ^ 0x80000000)) {
range = bound;
probs[index] = (short)(
prob + ((BIT_MODEL_TOTAL - prob) >>> MOVE_BITS));
bit = 0;
} else {
range -= bound;
code -= bound;
probs[index] = (short)(prob - (prob >>> MOVE_BITS));
bit = 1;
}
return bit;
}
public int decodeBitTree(short[] probs) throws IOException {
int symbol = 1;
do {
symbol = (symbol << 1) | decodeBit(probs, symbol);
} while (symbol < probs.length);
return symbol - probs.length;
}
public int decodeReverseBitTree(short[] probs) throws IOException {
int symbol = 1;
int i = 0;
int result = 0;
do {
int bit = decodeBit(probs, symbol);
symbol = (symbol << 1) | bit;
result |= bit << i++;
} while (symbol < probs.length);
return result;
}
public int decodeDirectBits(int count) throws IOException {
int result = 0;
do {
normalize();
range >>>= 1;
int t = (code - range) >>> 31;
code -= range & (t - 1);
result = (result << 1) | (1 - t);
} while (--count != 0);
return result;
}
}

View File

@@ -0,0 +1,64 @@
/*
* RangeDecoderFromBuffer
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.rangecoder;
import java.io.DataInputStream;
import java.io.IOException;
import org.tukaani.xz.CorruptedInputException;
public final class RangeDecoderFromBuffer extends RangeDecoder {
private static final int INIT_SIZE = 5;
private final byte[] buf;
private int pos = 0;
private int end = 0;
public RangeDecoderFromBuffer(int inputSizeMax) {
buf = new byte[inputSizeMax - INIT_SIZE];
}
public void prepareInputBuffer(DataInputStream in, int len)
throws IOException {
if (len < INIT_SIZE)
throw new CorruptedInputException();
if (in.readUnsignedByte() != 0x00)
throw new CorruptedInputException();
code = in.readInt();
range = 0xFFFFFFFF;
pos = 0;
end = len - INIT_SIZE;
in.readFully(buf, 0, end);
}
public boolean isInBufferOK() {
return pos <= end;
}
public boolean isFinished() {
return pos == end && code == 0;
}
public void normalize() throws IOException {
if ((range & TOP_MASK) == 0) {
try {
// If the input is corrupt, this might throw
// ArrayIndexOutOfBoundsException.
code = (code << SHIFT_BITS) | (buf[pos++] & 0xFF);
range <<= SHIFT_BITS;
} catch (ArrayIndexOutOfBoundsException e) {
throw new CorruptedInputException();
}
}
}
}

View File

@@ -0,0 +1,41 @@
/*
* RangeDecoderFromStream
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.rangecoder;
import java.io.InputStream;
import java.io.DataInputStream;
import java.io.IOException;
import org.tukaani.xz.CorruptedInputException;
public final class RangeDecoderFromStream extends RangeDecoder {
private final DataInputStream inData;
public RangeDecoderFromStream(InputStream in) throws IOException {
inData = new DataInputStream(in);
if (inData.readUnsignedByte() != 0x00)
throw new CorruptedInputException();
code = inData.readInt();
range = 0xFFFFFFFF;
}
public boolean isFinished() {
return code == 0;
}
public void normalize() throws IOException {
if ((range & TOP_MASK) == 0) {
code = (code << SHIFT_BITS) | inData.readUnsignedByte();
range <<= SHIFT_BITS;
}
}
}

View File

@@ -0,0 +1,203 @@
/*
* RangeEncoder
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.rangecoder;
import java.io.OutputStream;
import java.io.IOException;
public final class RangeEncoder extends RangeCoder {
private static final int MOVE_REDUCING_BITS = 4;
private static final int BIT_PRICE_SHIFT_BITS = 4;
private static final int[] prices
= new int[BIT_MODEL_TOTAL >>> MOVE_REDUCING_BITS];
private long low;
private int range;
// NOTE: int is OK for LZMA2 because a compressed chunk
// is not more than 64 KiB, but with LZMA1 there is no chunking
// so in theory cacheSize can grow very big. To be very safe,
// use long instead of int if you adapt this code for LZMA1.
private int cacheSize;
private byte cache;
private final byte[] buf;
private int bufPos;
static {
for (int i = (1 << MOVE_REDUCING_BITS) / 2; i < BIT_MODEL_TOTAL;
i += (1 << MOVE_REDUCING_BITS)) {
int w = i;
int bitCount = 0;
for (int j = 0; j < BIT_PRICE_SHIFT_BITS; ++j) {
w *= w;
bitCount <<= 1;
while ((w & 0xFFFF0000) != 0) {
w >>>= 1;
++bitCount;
}
}
prices[i >> MOVE_REDUCING_BITS]
= (BIT_MODEL_TOTAL_BITS << BIT_PRICE_SHIFT_BITS)
- 15 - bitCount;
}
}
public RangeEncoder(int bufSize) {
buf = new byte[bufSize];
reset();
}
public void reset() {
low = 0;
range = 0xFFFFFFFF;
cache = 0x00;
cacheSize = 1;
bufPos = 0;
}
public int getPendingSize() {
return bufPos + cacheSize + 5 - 1;
}
public int finish() {
for (int i = 0; i < 5; ++i)
shiftLow();
return bufPos;
}
public void write(OutputStream out) throws IOException {
out.write(buf, 0, bufPos);
}
private void shiftLow() {
int lowHi = (int)(low >>> 32);
if (lowHi != 0 || low < 0xFF000000L) {
int temp = cache;
do {
buf[bufPos++] = (byte)(temp + lowHi);
temp = 0xFF;
} while (--cacheSize != 0);
cache = (byte)(low >>> 24);
}
++cacheSize;
low = (low & 0x00FFFFFF) << 8;
}
public void encodeBit(short[] probs, int index, int bit) {
int prob = probs[index];
int bound = (range >>> BIT_MODEL_TOTAL_BITS) * prob;
// NOTE: Any non-zero value for bit is taken as 1.
if (bit == 0) {
range = bound;
probs[index] = (short)(
prob + ((BIT_MODEL_TOTAL - prob) >>> MOVE_BITS));
} else {
low += bound & 0xFFFFFFFFL;
range -= bound;
probs[index] = (short)(prob - (prob >>> MOVE_BITS));
}
if ((range & TOP_MASK) == 0) {
range <<= SHIFT_BITS;
shiftLow();
}
}
public static int getBitPrice(int prob, int bit) {
// NOTE: Unlike in encodeBit(), here bit must be 0 or 1.
assert bit == 0 || bit == 1;
return prices[(prob ^ ((-bit) & (BIT_MODEL_TOTAL - 1)))
>>> MOVE_REDUCING_BITS];
}
public void encodeBitTree(short[] probs, int symbol) {
int index = 1;
int mask = probs.length;
do {
mask >>>= 1;
int bit = symbol & mask;
encodeBit(probs, index, bit);
index <<= 1;
if (bit != 0)
index |= 1;
} while (mask != 1);
}
public static int getBitTreePrice(short[] probs, int symbol) {
int price = 0;
symbol |= probs.length;
do {
int bit = symbol & 1;
symbol >>>= 1;
price += getBitPrice(probs[symbol], bit);
} while (symbol != 1);
return price;
}
public void encodeReverseBitTree(short[] probs, int symbol) {
int index = 1;
symbol |= probs.length;
do {
int bit = symbol & 1;
symbol >>>= 1;
encodeBit(probs, index, bit);
index = (index << 1) | bit;
} while (symbol != 1);
}
public static int getReverseBitTreePrice(short[] probs, int symbol) {
int price = 0;
int index = 1;
symbol |= probs.length;
do {
int bit = symbol & 1;
symbol >>>= 1;
price += getBitPrice(probs[index], bit);
index = (index << 1) | bit;
} while (symbol != 1);
return price;
}
public void encodeDirectBits(int value, int count) {
do {
range >>>= 1;
low += range & (0 - ((value >>> --count) & 1));
if ((range & TOP_MASK) == 0) {
range <<= SHIFT_BITS;
shiftLow();
}
} while (count != 0);
}
public static int getDirectBitsPrice(int count) {
return count << BIT_PRICE_SHIFT_BITS;
}
}

View File

@@ -0,0 +1,50 @@
/*
* BCJ filter for little endian ARM instructions
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.simple;
public final class ARM implements SimpleFilter {
private final boolean isEncoder;
private int pos;
public ARM(boolean isEncoder, int startPos) {
this.isEncoder = isEncoder;
pos = startPos + 8;
}
public int code(byte[] buf, int off, int len) {
int end = off + len - 4;
int i;
for (i = off; i <= end; i += 4) {
if ((buf[i + 3] & 0xFF) == 0xEB) {
int src = ((buf[i + 2] & 0xFF) << 16)
| ((buf[i + 1] & 0xFF) << 8)
| (buf[i] & 0xFF);
src <<= 2;
int dest;
if (isEncoder)
dest = src + (pos + i - off);
else
dest = src - (pos + i - off);
dest >>>= 2;
buf[i + 2] = (byte)(dest >>> 16);
buf[i + 1] = (byte)(dest >>> 8);
buf[i] = (byte)dest;
}
}
i -= off;
pos += i;
return i;
}
}

View File

@@ -0,0 +1,53 @@
/*
* BCJ filter for little endian ARM-Thumb instructions
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.simple;
public final class ARMThumb implements SimpleFilter {
private final boolean isEncoder;
private int pos;
public ARMThumb(boolean isEncoder, int startPos) {
this.isEncoder = isEncoder;
pos = startPos + 4;
}
public int code(byte[] buf, int off, int len) {
int end = off + len - 4;
int i;
for (i = off; i <= end; i += 2) {
if ((buf[i + 1] & 0xF8) == 0xF0 && (buf[i + 3] & 0xF8) == 0xF8) {
int src = ((buf[i + 1] & 0x07) << 19)
| ((buf[i] & 0xFF) << 11)
| ((buf[i + 3] & 0x07) << 8)
| (buf[i + 2] & 0xFF);
src <<= 1;
int dest;
if (isEncoder)
dest = src + (pos + i - off);
else
dest = src - (pos + i - off);
dest >>>= 1;
buf[i + 1] = (byte)(0xF0 | ((dest >>> 19) & 0x07));
buf[i] = (byte)(dest >>> 11);
buf[i + 3] = (byte)(0xF8 | ((dest >>> 8) & 0x07));
buf[i + 2] = (byte)dest;
i += 2;
}
}
i -= off;
pos += i;
return i;
}
}

View File

@@ -0,0 +1,81 @@
/*
* BCJ filter for Itanium (IA-64) instructions
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.simple;
public final class IA64 implements SimpleFilter {
private static final int[] BRANCH_TABLE = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
4, 4, 6, 6, 0, 0, 7, 7,
4, 4, 0, 0, 4, 4, 0, 0 };
private final boolean isEncoder;
private int pos;
public IA64(boolean isEncoder, int startPos) {
this.isEncoder = isEncoder;
pos = startPos;
}
public int code(byte[] buf, int off, int len) {
int end = off + len - 16;
int i;
for (i = off; i <= end; i += 16) {
int instrTemplate = buf[i] & 0x1F;
int mask = BRANCH_TABLE[instrTemplate];
for (int slot = 0, bitPos = 5; slot < 3; ++slot, bitPos += 41) {
if (((mask >>> slot) & 1) == 0)
continue;
int bytePos = bitPos >>> 3;
int bitRes = bitPos & 7;
long instr = 0;
for (int j = 0; j < 6; ++j)
instr |= (buf[i + bytePos + j] & 0xFFL) << (8 * j);
long instrNorm = instr >>> bitRes;
if (((instrNorm >>> 37) & 0x0F) != 0x05
|| ((instrNorm >>> 9) & 0x07) != 0x00)
continue;
int src = (int)((instrNorm >>> 13) & 0x0FFFFF);
src |= ((int)(instrNorm >>> 36) & 1) << 20;
src <<= 4;
int dest;
if (isEncoder)
dest = src + (pos + i - off);
else
dest = src - (pos + i - off);
dest >>>= 4;
instrNorm &= ~(0x8FFFFFL << 13);
instrNorm |= (dest & 0x0FFFFFL) << 13;
instrNorm |= (dest & 0x100000L) << (36 - 20);
instr &= (1 << bitRes) - 1;
instr |= instrNorm << bitRes;
for (int j = 0; j < 6; ++j)
buf[i + bytePos + j] = (byte)(instr >>> (8 * j));
}
}
i -= off;
pos += i;
return i;
}
}

View File

@@ -0,0 +1,50 @@
/*
* BCJ filter for big endian PowerPC instructions
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz.simple;
public final class PowerPC implements SimpleFilter {
private final boolean isEncoder;
private int pos;
public PowerPC(boolean isEncoder, int startPos) {
this.isEncoder = isEncoder;
pos = startPos;
}
public int code(byte[] buf, int off, int len) {
int end = off + len - 4;
int i;
for (i = off; i <= end; i += 4) {
if ((buf[i] & 0xFC) == 0x48 && (buf[i + 3] & 0x03) == 0x01) {
int src = ((buf[i] & 0x03) << 24)
| ((buf[i + 1] & 0xFF) << 16)
| ((buf[i + 2] & 0xFF) << 8)
| (buf[i + 3] & 0xFC);
int dest;
if (isEncoder)
dest = src + (pos + i - off);
else
dest = src - (pos + i - off);
buf[i] = (byte)(0x48 | ((dest >>> 24) & 0x03));
buf[i + 1] = (byte)(dest >>> 16);
buf[i + 2] = (byte)(dest >>> 8);
buf[i + 3] = (byte)((buf[i + 3] & 0x03) | dest);
}
}
i -= off;
pos += i;
return i;
}
}

Some files were not shown because too many files have changed in this diff Show More