Now can download .pack.xz file for Forge library.
This commit is contained in:
569
HMCLAPI/src/main/java/org/tukaani/xz/LZMAInputStream.java
Normal file
569
HMCLAPI/src/main/java/org/tukaani/xz/LZMAInputStream.java
Normal file
@@ -0,0 +1,569 @@
|
||||
/*
|
||||
* LZMAInputStream
|
||||
*
|
||||
* Authors: Lasse Collin <lasse.collin@tukaani.org>
|
||||
* Igor Pavlov <http://7-zip.org/>
|
||||
*
|
||||
* This file has been put into the public domain.
|
||||
* You can do whatever you want with this file.
|
||||
*/
|
||||
|
||||
package org.tukaani.xz;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.IOException;
|
||||
import org.tukaani.xz.lz.LZDecoder;
|
||||
import org.tukaani.xz.rangecoder.RangeDecoderFromStream;
|
||||
import org.tukaani.xz.lzma.LZMADecoder;
|
||||
|
||||
/**
|
||||
* Decompresses legacy .lzma files and raw LZMA streams (no .lzma header).
|
||||
* <p>
|
||||
* <b>IMPORTANT:</b> In contrast to other classes in this package, this class
|
||||
* reads data from its input stream one byte at a time. If the input stream
|
||||
* is for example {@link java.io.FileInputStream}, wrapping it into
|
||||
* {@link java.io.BufferedInputStream} tends to improve performance a lot.
|
||||
* This is not automatically done by this class because there may be use
|
||||
* cases where it is desired that this class won't read any bytes past
|
||||
* the end of the LZMA stream.
|
||||
* <p>
|
||||
* Even when using <code>BufferedInputStream</code>, the performance tends
|
||||
* to be worse (maybe 10-20 % slower) than with {@link LZMA2InputStream}
|
||||
* or {@link XZInputStream} (when the .xz file contains LZMA2-compressed data).
|
||||
*
|
||||
* @since 1.4
|
||||
*/
|
||||
public class LZMAInputStream extends InputStream {
|
||||
/**
|
||||
* Largest dictionary size supported by this implementation.
|
||||
* <p>
|
||||
* LZMA allows dictionaries up to one byte less than 4 GiB. This
|
||||
* implementation supports only 16 bytes less than 2 GiB. This
|
||||
* limitation is due to Java using signed 32-bit integers for array
|
||||
* indexing. The limitation shouldn't matter much in practice since so
|
||||
* huge dictionaries are not normally used.
|
||||
*/
|
||||
public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15;
|
||||
|
||||
private InputStream in;
|
||||
private LZDecoder lz;
|
||||
private RangeDecoderFromStream rc;
|
||||
private LZMADecoder lzma;
|
||||
|
||||
private boolean endReached = false;
|
||||
|
||||
private final byte[] tempBuf = new byte[1];
|
||||
|
||||
/**
|
||||
* Number of uncompressed bytes left to be decompressed, or -1 if
|
||||
* the end marker is used.
|
||||
*/
|
||||
private long remainingSize;
|
||||
|
||||
private IOException exception = null;
|
||||
|
||||
/**
|
||||
* Gets approximate decompressor memory requirements as kibibytes for
|
||||
* the given dictionary size and LZMA properties byte (lc, lp, and pb).
|
||||
*
|
||||
* @param dictSize LZMA dictionary size as bytes, should be
|
||||
* in the range [<code>0</code>,
|
||||
* <code>DICT_SIZE_MAX</code>]
|
||||
*
|
||||
* @param propsByte LZMA properties byte that encodes the values
|
||||
* of lc, lp, and pb
|
||||
*
|
||||
* @return approximate memory requirements as kibibytes (KiB)
|
||||
*
|
||||
* @throws UnsupportedOptionsException
|
||||
* if <code>dictSize</code> is outside
|
||||
* the range [<code>0</code>,
|
||||
* <code>DICT_SIZE_MAX</code>]
|
||||
*
|
||||
* @throws CorruptedInputException
|
||||
* if <code>propsByte</code> is invalid
|
||||
*/
|
||||
public static int getMemoryUsage(int dictSize, byte propsByte)
|
||||
throws UnsupportedOptionsException, CorruptedInputException {
|
||||
if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
|
||||
throw new UnsupportedOptionsException(
|
||||
"LZMA dictionary is too big for this implementation");
|
||||
|
||||
int props = propsByte & 0xFF;
|
||||
if (props > (4 * 5 + 4) * 9 + 8)
|
||||
throw new CorruptedInputException("Invalid LZMA properties byte");
|
||||
|
||||
props %= 9 * 5;
|
||||
int lp = props / 9;
|
||||
int lc = props - lp * 9;
|
||||
|
||||
return getMemoryUsage(dictSize, lc, lp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets approximate decompressor memory requirements as kibibytes for
|
||||
* the given dictionary size, lc, and lp. Note that pb isn't needed.
|
||||
*
|
||||
* @param dictSize LZMA dictionary size as bytes, must be
|
||||
* in the range [<code>0</code>,
|
||||
* <code>DICT_SIZE_MAX</code>]
|
||||
*
|
||||
* @param lc number of literal context bits, must be
|
||||
* in the range [0, 8]
|
||||
*
|
||||
* @param lp number of literal position bits, must be
|
||||
* in the range [0, 4]
|
||||
*
|
||||
* @return approximate memory requirements as kibibytes (KiB)
|
||||
*/
|
||||
public static int getMemoryUsage(int dictSize, int lc, int lp) {
|
||||
if (lc < 0 || lc > 8 || lp < 0 || lp > 4)
|
||||
throw new IllegalArgumentException("Invalid lc or lp");
|
||||
|
||||
// Probability variables have the type "short". There are
|
||||
// 0x300 (768) probability variables in each literal subcoder.
|
||||
// The number of literal subcoders is 2^(lc + lp).
|
||||
//
|
||||
// Roughly 10 KiB for the base state + LZ decoder's dictionary buffer
|
||||
// + sizeof(short) * number probability variables per literal subcoder
|
||||
// * number of literal subcoders
|
||||
return 10 + getDictSize(dictSize) / 1024
|
||||
+ ((2 * 0x300) << (lc + lp)) / 1024;
|
||||
}
|
||||
|
||||
private static int getDictSize(int dictSize) {
|
||||
if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
|
||||
throw new IllegalArgumentException(
|
||||
"LZMA dictionary is too big for this implementation");
|
||||
|
||||
// For performance reasons, use a 4 KiB dictionary if something
|
||||
// smaller was requested. It's a rare situation and the performance
|
||||
// difference isn't huge, and it starts to matter mostly when the
|
||||
// dictionary is just a few bytes. But we need to handle the special
|
||||
// case of dictSize == 0 anyway, which is an allowed value but in
|
||||
// practice means one-byte dictionary.
|
||||
//
|
||||
// Note that using a dictionary bigger than specified in the headers
|
||||
// can hide errors if there is a reference to data beyond the original
|
||||
// dictionary size but is still within 4 KiB.
|
||||
if (dictSize < 4096)
|
||||
dictSize = 4096;
|
||||
|
||||
// Round dictionary size upward to a multiple of 16. This way LZMA
|
||||
// can use LZDecoder.getPos() for calculating LZMA's posMask.
|
||||
return (dictSize + 15) & ~15;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new .lzma file format decompressor without
|
||||
* a memory usage limit.
|
||||
*
|
||||
* @param in input stream from which .lzma data is read;
|
||||
* it might be a good idea to wrap it in
|
||||
* <code>BufferedInputStream</code>, see the
|
||||
* note at the top of this page
|
||||
*
|
||||
* @throws CorruptedInputException
|
||||
* file is corrupt or perhaps not in
|
||||
* the .lzma format at all
|
||||
*
|
||||
* @throws UnsupportedOptionsException
|
||||
* dictionary size or uncompressed size is too
|
||||
* big for this implementation
|
||||
*
|
||||
* @throws EOFException
|
||||
* file is truncated or perhaps not in
|
||||
* the .lzma format at all
|
||||
*
|
||||
* @throws IOException may be thrown by <code>in</code>
|
||||
*/
|
||||
public LZMAInputStream(InputStream in) throws IOException {
|
||||
this(in, -1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new .lzma file format decompressor with an optional
|
||||
* memory usage limit.
|
||||
*
|
||||
* @param in input stream from which .lzma data is read;
|
||||
* it might be a good idea to wrap it in
|
||||
* <code>BufferedInputStream</code>, see the
|
||||
* note at the top of this page
|
||||
*
|
||||
* @param memoryLimit memory usage limit in kibibytes (KiB)
|
||||
* or <code>-1</code> to impose no
|
||||
* memory usage limit
|
||||
*
|
||||
* @throws CorruptedInputException
|
||||
* file is corrupt or perhaps not in
|
||||
* the .lzma format at all
|
||||
*
|
||||
* @throws UnsupportedOptionsException
|
||||
* dictionary size or uncompressed size is too
|
||||
* big for this implementation
|
||||
*
|
||||
* @throws MemoryLimitException
|
||||
* memory usage limit was exceeded
|
||||
*
|
||||
* @throws EOFException
|
||||
* file is truncated or perhaps not in
|
||||
* the .lzma format at all
|
||||
*
|
||||
* @throws IOException may be thrown by <code>in</code>
|
||||
*/
|
||||
public LZMAInputStream(InputStream in, int memoryLimit)
|
||||
throws IOException {
|
||||
DataInputStream inData = new DataInputStream(in);
|
||||
|
||||
// Properties byte (lc, lp, and pb)
|
||||
byte propsByte = inData.readByte();
|
||||
|
||||
// Dictionary size is an unsigned 32-bit little endian integer.
|
||||
int dictSize = 0;
|
||||
for (int i = 0; i < 4; ++i)
|
||||
dictSize |= inData.readUnsignedByte() << (8 * i);
|
||||
|
||||
// Uncompressed size is an unsigned 64-bit little endian integer.
|
||||
// The maximum 64-bit value is a special case (becomes -1 here)
|
||||
// which indicates that the end marker is used instead of knowing
|
||||
// the uncompressed size beforehand.
|
||||
long uncompSize = 0;
|
||||
for (int i = 0; i < 8; ++i)
|
||||
uncompSize |= (long)inData.readUnsignedByte() << (8 * i);
|
||||
|
||||
// Check the memory usage limit.
|
||||
int memoryNeeded = getMemoryUsage(dictSize, propsByte);
|
||||
if (memoryLimit != -1 && memoryNeeded > memoryLimit)
|
||||
throw new MemoryLimitException(memoryNeeded, memoryLimit);
|
||||
|
||||
initialize(in, uncompSize, propsByte, dictSize, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new input stream that decompresses raw LZMA data (no .lzma
|
||||
* header) from <code>in</code>.
|
||||
* <p>
|
||||
* The caller needs to know if the "end of payload marker (EOPM)" alias
|
||||
* "end of stream marker (EOS marker)" alias "end marker" present.
|
||||
* If the end marker isn't used, the caller must know the exact
|
||||
* uncompressed size of the stream.
|
||||
* <p>
|
||||
* The caller also needs to provide the LZMA properties byte that encodes
|
||||
* the number of literal context bits (lc), literal position bits (lp),
|
||||
* and position bits (pb).
|
||||
* <p>
|
||||
* The dictionary size used when compressing is also needed. Specifying
|
||||
* a too small dictionary size will prevent decompressing the stream.
|
||||
* Specifying a too big dictionary is waste of memory but decompression
|
||||
* will work.
|
||||
* <p>
|
||||
* There is no need to specify a dictionary bigger than
|
||||
* the uncompressed size of the data even if a bigger dictionary
|
||||
* was used when compressing. If you know the uncompressed size
|
||||
* of the data, this might allow saving some memory.
|
||||
*
|
||||
* @param in input stream from which compressed
|
||||
* data is read
|
||||
*
|
||||
* @param uncompSize uncompressed size of the LZMA stream or -1
|
||||
* if the end marker is used in the LZMA stream
|
||||
*
|
||||
* @param propsByte LZMA properties byte that has the encoded
|
||||
* values for literal context bits (lc), literal
|
||||
* position bits (lp), and position bits (pb)
|
||||
*
|
||||
* @param dictSize dictionary size as bytes, must be in the range
|
||||
* [<code>0</code>, <code>DICT_SIZE_MAX</code>]
|
||||
*
|
||||
* @throws CorruptedInputException
|
||||
* if <code>propsByte</code> is invalid or
|
||||
* the first input byte is not 0x00
|
||||
*
|
||||
* @throws UnsupportedOptionsException
|
||||
* dictionary size or uncompressed size is too
|
||||
* big for this implementation
|
||||
*
|
||||
*
|
||||
*/
|
||||
public LZMAInputStream(InputStream in, long uncompSize, byte propsByte,
|
||||
int dictSize) throws IOException {
|
||||
initialize(in, uncompSize, propsByte, dictSize, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new input stream that decompresses raw LZMA data (no .lzma
|
||||
* header) from <code>in</code> optionally with a preset dictionary.
|
||||
*
|
||||
* @param in input stream from which LZMA-compressed
|
||||
* data is read
|
||||
*
|
||||
* @param uncompSize uncompressed size of the LZMA stream or -1
|
||||
* if the end marker is used in the LZMA stream
|
||||
*
|
||||
* @param propsByte LZMA properties byte that has the encoded
|
||||
* values for literal context bits (lc), literal
|
||||
* position bits (lp), and position bits (pb)
|
||||
*
|
||||
* @param dictSize dictionary size as bytes, must be in the range
|
||||
* [<code>0</code>, <code>DICT_SIZE_MAX</code>]
|
||||
*
|
||||
* @param presetDict preset dictionary or <code>null</code>
|
||||
* to use no preset dictionary
|
||||
*
|
||||
* @throws CorruptedInputException
|
||||
* if <code>propsByte</code> is invalid or
|
||||
* the first input byte is not 0x00
|
||||
*
|
||||
* @throws UnsupportedOptionsException
|
||||
* dictionary size or uncompressed size is too
|
||||
* big for this implementation
|
||||
*
|
||||
* @throws EOFException file is truncated or corrupt
|
||||
*
|
||||
* @throws IOException may be thrown by <code>in</code>
|
||||
*/
|
||||
public LZMAInputStream(InputStream in, long uncompSize, byte propsByte,
|
||||
int dictSize, byte[] presetDict)
|
||||
throws IOException {
|
||||
initialize(in, uncompSize, propsByte, dictSize, presetDict);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new input stream that decompresses raw LZMA data (no .lzma
|
||||
* header) from <code>in</code> optionally with a preset dictionary.
|
||||
*
|
||||
* @param in input stream from which LZMA-compressed
|
||||
* data is read
|
||||
*
|
||||
* @param uncompSize uncompressed size of the LZMA stream or -1
|
||||
* if the end marker is used in the LZMA stream
|
||||
*
|
||||
* @param lc number of literal context bits, must be
|
||||
* in the range [0, 8]
|
||||
*
|
||||
* @param lp number of literal position bits, must be
|
||||
* in the range [0, 4]
|
||||
*
|
||||
* @param pb number position bits, must be
|
||||
* in the range [0, 4]
|
||||
*
|
||||
* @param dictSize dictionary size as bytes, must be in the range
|
||||
* [<code>0</code>, <code>DICT_SIZE_MAX</code>]
|
||||
*
|
||||
* @param presetDict preset dictionary or <code>null</code>
|
||||
* to use no preset dictionary
|
||||
*
|
||||
* @throws CorruptedInputException
|
||||
* if the first input byte is not 0x00
|
||||
*
|
||||
* @throws EOFException file is truncated or corrupt
|
||||
*
|
||||
* @throws IOException may be thrown by <code>in</code>
|
||||
*/
|
||||
public LZMAInputStream(InputStream in, long uncompSize,
|
||||
int lc, int lp, int pb,
|
||||
int dictSize, byte[] presetDict)
|
||||
throws IOException {
|
||||
initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict);
|
||||
}
|
||||
|
||||
private void initialize(InputStream in, long uncompSize, byte propsByte,
|
||||
int dictSize, byte[] presetDict)
|
||||
throws IOException {
|
||||
// Validate the uncompressed size since the other "initialize" throws
|
||||
// IllegalArgumentException if uncompSize < -1.
|
||||
if (uncompSize < -1)
|
||||
throw new UnsupportedOptionsException(
|
||||
"Uncompressed size is too big");
|
||||
|
||||
// Decode the properties byte. In contrast to LZMA2, there is no
|
||||
// limit of lc + lp <= 4.
|
||||
int props = propsByte & 0xFF;
|
||||
if (props > (4 * 5 + 4) * 9 + 8)
|
||||
throw new CorruptedInputException("Invalid LZMA properties byte");
|
||||
|
||||
int pb = props / (9 * 5);
|
||||
props -= pb * 9 * 5;
|
||||
int lp = props / 9;
|
||||
int lc = props - lp * 9;
|
||||
|
||||
// Validate the dictionary size since the other "initialize" throws
|
||||
// IllegalArgumentException if dictSize is not supported.
|
||||
if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
|
||||
throw new UnsupportedOptionsException(
|
||||
"LZMA dictionary is too big for this implementation");
|
||||
|
||||
initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict);
|
||||
}
|
||||
|
||||
private void initialize(InputStream in, long uncompSize,
|
||||
int lc, int lp, int pb,
|
||||
int dictSize, byte[] presetDict)
|
||||
throws IOException {
|
||||
// getDictSize validates dictSize and gives a message in
|
||||
// the exception too, so skip validating dictSize here.
|
||||
if (uncompSize < -1 || lc < 0 || lc > 8 || lp < 0 || lp > 4
|
||||
|| pb < 0 || pb > 4)
|
||||
throw new IllegalArgumentException();
|
||||
|
||||
this.in = in;
|
||||
|
||||
// If uncompressed size is known, use it to avoid wasting memory for
|
||||
// a uselessly large dictionary buffer.
|
||||
dictSize = getDictSize(dictSize);
|
||||
if (uncompSize >= 0 && dictSize > uncompSize)
|
||||
dictSize = getDictSize((int)uncompSize);
|
||||
|
||||
lz = new LZDecoder(getDictSize(dictSize), presetDict);
|
||||
rc = new RangeDecoderFromStream(in);
|
||||
lzma = new LZMADecoder(lz, rc, lc, lp, pb);
|
||||
remainingSize = uncompSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decompresses the next byte from this input stream.
|
||||
* <p>
|
||||
* Reading lots of data with <code>read()</code> from this input stream
|
||||
* may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code>
|
||||
* if you need to read lots of data one byte at a time.
|
||||
*
|
||||
* @return the next decompressed byte, or <code>-1</code>
|
||||
* to indicate the end of the compressed stream
|
||||
*
|
||||
* @throws CorruptedInputException
|
||||
*
|
||||
* @throws XZIOException if the stream has been closed
|
||||
*
|
||||
* @throws EOFException
|
||||
* compressed input is truncated or corrupt
|
||||
*
|
||||
* @throws IOException may be thrown by <code>in</code>
|
||||
*/
|
||||
public int read() throws IOException {
|
||||
return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decompresses into an array of bytes.
|
||||
* <p>
|
||||
* If <code>len</code> is zero, no bytes are read and <code>0</code>
|
||||
* is returned. Otherwise this will block until <code>len</code>
|
||||
* bytes have been decompressed, the end of the LZMA stream is reached,
|
||||
* or an exception is thrown.
|
||||
*
|
||||
* @param buf target buffer for uncompressed data
|
||||
* @param off start offset in <code>buf</code>
|
||||
* @param len maximum number of uncompressed bytes to read
|
||||
*
|
||||
* @return number of bytes read, or <code>-1</code> to indicate
|
||||
* the end of the compressed stream
|
||||
*
|
||||
* @throws CorruptedInputException
|
||||
*
|
||||
* @throws XZIOException if the stream has been closed
|
||||
*
|
||||
* @throws EOFException compressed input is truncated or corrupt
|
||||
*
|
||||
* @throws IOException may be thrown by <code>in</code>
|
||||
*/
|
||||
public int read(byte[] buf, int off, int len) throws IOException {
|
||||
if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
|
||||
throw new IndexOutOfBoundsException();
|
||||
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
if (in == null)
|
||||
throw new XZIOException("Stream closed");
|
||||
|
||||
if (exception != null)
|
||||
throw exception;
|
||||
|
||||
if (endReached)
|
||||
return -1;
|
||||
|
||||
try {
|
||||
int size = 0;
|
||||
|
||||
while (len > 0) {
|
||||
// If uncompressed size is known and thus no end marker will
|
||||
// be present, set the limit so that the uncompressed size
|
||||
// won't be exceeded.
|
||||
int copySizeMax = len;
|
||||
if (remainingSize >= 0 && remainingSize < len)
|
||||
copySizeMax = (int)remainingSize;
|
||||
|
||||
lz.setLimit(copySizeMax);
|
||||
|
||||
// Decode into the dictionary buffer.
|
||||
try {
|
||||
lzma.decode();
|
||||
} catch (CorruptedInputException e) {
|
||||
// The end marker is encoded with a LZMA symbol that
|
||||
// indicates maximum match distance. This is larger
|
||||
// than any supported dictionary and thus causes
|
||||
// CorruptedInputException from LZDecoder.repeat.
|
||||
if (remainingSize != -1 || !lzma.endMarkerDetected())
|
||||
throw e;
|
||||
|
||||
endReached = true;
|
||||
|
||||
// The exception makes lzma.decode() miss the last range
|
||||
// decoder normalization, so do it here. This might
|
||||
// cause an IOException if it needs to read a byte
|
||||
// from the input stream.
|
||||
rc.normalize();
|
||||
}
|
||||
|
||||
// Copy from the dictionary to buf.
|
||||
int copiedSize = lz.flush(buf, off);
|
||||
off += copiedSize;
|
||||
len -= copiedSize;
|
||||
size += copiedSize;
|
||||
|
||||
if (remainingSize >= 0) {
|
||||
// Update the number of bytes left to be decompressed.
|
||||
remainingSize -= copiedSize;
|
||||
assert remainingSize >= 0;
|
||||
|
||||
if (remainingSize == 0)
|
||||
endReached = true;
|
||||
}
|
||||
|
||||
if (endReached) {
|
||||
// Checking these helps a lot when catching corrupt
|
||||
// or truncated .lzma files. LZMA Utils doesn't do
|
||||
// the first check and thus it accepts many invalid
|
||||
// files that this implementation and XZ Utils don't.
|
||||
if (!rc.isFinished() || lz.hasPending())
|
||||
throw new CorruptedInputException();
|
||||
|
||||
return size == 0 ? -1 : size;
|
||||
}
|
||||
}
|
||||
|
||||
return size;
|
||||
|
||||
} catch (IOException e) {
|
||||
exception = e;
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the stream and calls <code>in.close()</code>.
|
||||
* If the stream was already closed, this does nothing.
|
||||
*
|
||||
* @throws IOException if thrown by <code>in.close()</code>
|
||||
*/
|
||||
public void close() throws IOException {
|
||||
if (in != null) {
|
||||
try {
|
||||
in.close();
|
||||
} finally {
|
||||
in = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user