001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lz4;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.util.Arrays;
024import java.util.zip.CheckedInputStream;
025
026import org.apache.commons.compress.compressors.CompressorInputStream;
027import org.apache.commons.compress.utils.BoundedInputStream;
028import org.apache.commons.compress.utils.ByteUtils;
029import org.apache.commons.compress.utils.IOUtils;
030import org.apache.commons.compress.utils.InputStreamStatistics;
031import org.apache.commons.io.input.CountingInputStream;
032
033/**
034 * CompressorInputStream for the LZ4 frame format.
035 *
036 * <p>
037 * Based on the "spec" in the version "1.5.1 (31/03/2015)"
038 * </p>
039 *
040 * @see <a href="https://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a>
041 * @since 1.14
042 * @NotThreadSafe
043 */
044public class FramedLZ4CompressorInputStream extends CompressorInputStream implements InputStreamStatistics {
045
046    /** Used by FramedLZ4CompressorOutputStream as well. */
047    static final byte[] LZ4_SIGNATURE = { 4, 0x22, 0x4d, 0x18 };
048    private static final byte[] SKIPPABLE_FRAME_TRAILER = { 0x2a, 0x4d, 0x18 };
049    private static final byte SKIPPABLE_FRAME_PREFIX_BYTE_MASK = 0x50;
050
051    static final int VERSION_MASK = 0xC0;
052    static final int SUPPORTED_VERSION = 0x40;
053    static final int BLOCK_INDEPENDENCE_MASK = 0x20;
054    static final int BLOCK_CHECKSUM_MASK = 0x10;
055    static final int CONTENT_SIZE_MASK = 0x08;
056    static final int CONTENT_CHECKSUM_MASK = 0x04;
057    static final int BLOCK_MAX_SIZE_MASK = 0x70;
058    static final int UNCOMPRESSED_FLAG_MASK = 0x80000000;
059
060    private static boolean isSkippableFrameSignature(final byte[] b) {
061        if ((b[0] & SKIPPABLE_FRAME_PREFIX_BYTE_MASK) != SKIPPABLE_FRAME_PREFIX_BYTE_MASK) {
062            return false;
063        }
064        for (int i = 1; i < 4; i++) {
065            if (b[i] != SKIPPABLE_FRAME_TRAILER[i - 1]) {
066                return false;
067            }
068        }
069        return true;
070    }
071
072    /**
073     * Checks if the signature matches what is expected for a .lz4 file.
074     * <p>
075     * .lz4 files start with a four byte signature.
076     * </p>
077     *
078     * @param signature the bytes to check
079     * @param length    the number of bytes to check
080     * @return true if this is a .sz stream, false otherwise
081     */
082    public static boolean matches(final byte[] signature, final int length) {
083
084        if (length < LZ4_SIGNATURE.length) {
085            return false;
086        }
087
088        byte[] shortenedSig = signature;
089        if (signature.length > LZ4_SIGNATURE.length) {
090            shortenedSig = Arrays.copyOf(signature, LZ4_SIGNATURE.length);
091        }
092
093        return Arrays.equals(shortenedSig, LZ4_SIGNATURE);
094    }
095
096    /** Used in no-arg read method. */
097    private final byte[] oneByte = new byte[1];
098    private final ByteUtils.ByteSupplier supplier = this::readOneByte;
099
100    private final CountingInputStream inputStream;
101    private final boolean decompressConcatenated;
102    private boolean expectBlockChecksum;
103    private boolean expectBlockDependency;
104
105    private boolean expectContentSize;
106    private boolean expectContentChecksum;
107
108    private InputStream currentBlock;
109
110    private boolean endReached, inUncompressed;
111
112    /** Used for frame header checksum and content checksum, if present. */
113    private final org.apache.commons.codec.digest.XXHash32 contentHash = new org.apache.commons.codec.digest.XXHash32();
114
115    /** Used for block checksum, if present. */
116    private final org.apache.commons.codec.digest.XXHash32 blockHash = new org.apache.commons.codec.digest.XXHash32();
117
118    /** Only created if the frame doesn't set the block independence flag. */
119    private byte[] blockDependencyBuffer;
120
121    /**
122     * Creates a new input stream that decompresses streams compressed using the LZ4 frame format and stops after decompressing the first frame.
123     *
124     * @param in the InputStream from which to read the compressed data
125     * @throws IOException if reading fails
126     */
127    public FramedLZ4CompressorInputStream(final InputStream in) throws IOException {
128        this(in, false);
129    }
130
131    /**
132     * Creates a new input stream that decompresses streams compressed using the LZ4 frame format.
133     *
134     * @param in                     the InputStream from which to read the compressed data
135     * @param decompressConcatenated if true, decompress until the end of the input; if false, stop after the first LZ4 frame and leave the input position to
136     *                               point to the next byte after the frame stream
137     * @throws IOException if reading fails
138     */
139    public FramedLZ4CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
140        this.inputStream = new CountingInputStream(in);
141        this.decompressConcatenated = decompressConcatenated;
142        init(true);
143    }
144
145    private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) {
146        len = Math.min(len, blockDependencyBuffer.length);
147        if (len > 0) {
148            final int keep = blockDependencyBuffer.length - len;
149            if (keep > 0) {
150                // move last keep bytes towards the start of the buffer
151                System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep);
152            }
153            // append new data
154            System.arraycopy(b, off, blockDependencyBuffer, keep, len);
155        }
156    }
157
158    /** {@inheritDoc} */
159    @Override
160    public void close() throws IOException {
161        try {
162            if (currentBlock != null) {
163                currentBlock.close();
164                currentBlock = null;
165            }
166        } finally {
167            inputStream.close();
168        }
169    }
170
171    /**
172     * @since 1.17
173     */
174    @Override
175    public long getCompressedCount() {
176        return inputStream.getByteCount();
177    }
178
179    private void init(final boolean firstFrame) throws IOException {
180        if (readSignature(firstFrame)) {
181            readFrameDescriptor();
182            nextBlock();
183        }
184    }
185
186    private void maybeFinishCurrentBlock() throws IOException {
187        if (currentBlock != null) {
188            currentBlock.close();
189            currentBlock = null;
190            if (expectBlockChecksum) {
191                verifyChecksum(blockHash, "block");
192                blockHash.reset();
193            }
194        }
195    }
196
197    private void nextBlock() throws IOException {
198        maybeFinishCurrentBlock();
199        final long len = ByteUtils.fromLittleEndian(supplier, 4);
200        final boolean uncompressed = (len & UNCOMPRESSED_FLAG_MASK) != 0;
201        final int realLen = (int) (len & ~UNCOMPRESSED_FLAG_MASK);
202        if (realLen == 0) {
203            verifyContentChecksum();
204            if (!decompressConcatenated) {
205                endReached = true;
206            } else {
207                init(false);
208            }
209            return;
210        }
211        InputStream capped = new BoundedInputStream(inputStream, realLen);
212        if (expectBlockChecksum) {
213            capped = new CheckedInputStream(capped, blockHash);
214        }
215        if (uncompressed) {
216            inUncompressed = true;
217            currentBlock = capped;
218        } else {
219            inUncompressed = false;
220            final BlockLZ4CompressorInputStream s = new BlockLZ4CompressorInputStream(capped);
221            if (expectBlockDependency) {
222                s.prefill(blockDependencyBuffer);
223            }
224            currentBlock = s;
225        }
226    }
227
228    /** {@inheritDoc} */
229    @Override
230    public int read() throws IOException {
231        return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
232    }
233
234    /** {@inheritDoc} */
235    @Override
236    public int read(final byte[] b, final int off, final int len) throws IOException {
237        if (len == 0) {
238            return 0;
239        }
240        if (endReached) {
241            return -1;
242        }
243        int r = readOnce(b, off, len);
244        if (r == -1) {
245            nextBlock();
246            if (!endReached) {
247                r = readOnce(b, off, len);
248            }
249        }
250        if (r != -1) {
251            if (expectBlockDependency) {
252                appendToBlockDependencyBuffer(b, off, r);
253            }
254            if (expectContentChecksum) {
255                contentHash.update(b, off, r);
256            }
257        }
258        return r;
259    }
260
261    private void readFrameDescriptor() throws IOException {
262        final int flags = readOneByte();
263        if (flags == -1) {
264            throw new IOException("Premature end of stream while reading frame flags");
265        }
266        contentHash.update(flags);
267        if ((flags & VERSION_MASK) != SUPPORTED_VERSION) {
268            throw new IOException("Unsupported version " + (flags >> 6));
269        }
270        expectBlockDependency = (flags & BLOCK_INDEPENDENCE_MASK) == 0;
271        if (expectBlockDependency) {
272            if (blockDependencyBuffer == null) {
273                blockDependencyBuffer = new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE];
274            }
275        } else {
276            blockDependencyBuffer = null;
277        }
278        expectBlockChecksum = (flags & BLOCK_CHECKSUM_MASK) != 0;
279        expectContentSize = (flags & CONTENT_SIZE_MASK) != 0;
280        expectContentChecksum = (flags & CONTENT_CHECKSUM_MASK) != 0;
281        final int bdByte = readOneByte();
282        if (bdByte == -1) { // max size is irrelevant for this implementation
283            throw new IOException("Premature end of stream while reading frame BD byte");
284        }
285        contentHash.update(bdByte);
286        if (expectContentSize) { // for now, we don't care, contains the uncompressed size
287            final byte[] contentSize = new byte[8];
288            final int skipped = IOUtils.readFully(inputStream, contentSize);
289            count(skipped);
290            if (8 != skipped) {
291                throw new IOException("Premature end of stream while reading content size");
292            }
293            contentHash.update(contentSize, 0, contentSize.length);
294        }
295        final int headerHash = readOneByte();
296        if (headerHash == -1) { // partial hash of header.
297            throw new IOException("Premature end of stream while reading frame header checksum");
298        }
299        final int expectedHash = (int) (contentHash.getValue() >> 8 & 0xff);
300        contentHash.reset();
301        if (headerHash != expectedHash) {
302            throw new IOException("Frame header checksum mismatch");
303        }
304    }
305
306    private int readOnce(final byte[] b, final int off, final int len) throws IOException {
307        if (inUncompressed) {
308            final int cnt = currentBlock.read(b, off, len);
309            count(cnt);
310            return cnt;
311        }
312        final BlockLZ4CompressorInputStream l = (BlockLZ4CompressorInputStream) currentBlock;
313        final long before = l.getBytesRead();
314        final int cnt = currentBlock.read(b, off, len);
315        count(l.getBytesRead() - before);
316        return cnt;
317    }
318
319    private int readOneByte() throws IOException {
320        final int b = inputStream.read();
321        if (b != -1) {
322            count(1);
323            return b & 0xFF;
324        }
325        return -1;
326    }
327
328    private boolean readSignature(final boolean firstFrame) throws IOException {
329        final String garbageMessage = firstFrame ? "Not a LZ4 frame stream" : "LZ4 frame stream followed by garbage";
330        final byte[] b = new byte[4];
331        int read = IOUtils.readFully(inputStream, b);
332        count(read);
333        if (0 == read && !firstFrame) {
334            // good LZ4 frame and nothing after it
335            endReached = true;
336            return false;
337        }
338        if (4 != read) {
339            throw new IOException(garbageMessage);
340        }
341
342        read = skipSkippableFrame(b);
343        if (0 == read && !firstFrame) {
344            // good LZ4 frame with only some skippable frames after it
345            endReached = true;
346            return false;
347        }
348        if (4 != read || !matches(b, 4)) {
349            throw new IOException(garbageMessage);
350        }
351        return true;
352    }
353
354    /**
355     * Skips over the contents of a skippable frame as well as skippable frames following it.
356     * <p>
357     * It then tries to read four more bytes which are supposed to hold an LZ4 signature and returns the number of bytes read while storing the bytes in the
358     * given array.
359     * </p>
360     */
361    private int skipSkippableFrame(final byte[] b) throws IOException {
362        int read = 4;
363        while (read == 4 && isSkippableFrameSignature(b)) {
364            final long len = ByteUtils.fromLittleEndian(supplier, 4);
365            if (len < 0) {
366                throw new IOException("Found illegal skippable frame with negative size");
367            }
368            final long skipped = org.apache.commons.io.IOUtils.skip(inputStream, len);
369            count(skipped);
370            if (len != skipped) {
371                throw new IOException("Premature end of stream while skipping frame");
372            }
373            read = IOUtils.readFully(inputStream, b);
374            count(read);
375        }
376        return read;
377    }
378
379    private void verifyChecksum(final org.apache.commons.codec.digest.XXHash32 hash, final String kind) throws IOException {
380        final byte[] checksum = new byte[4];
381        final int read = IOUtils.readFully(inputStream, checksum);
382        count(read);
383        if (4 != read) {
384            throw new IOException("Premature end of stream while reading " + kind + " checksum");
385        }
386        final long expectedHash = hash.getValue();
387        if (expectedHash != ByteUtils.fromLittleEndian(checksum)) {
388            throw new IOException(kind + " checksum mismatch.");
389        }
390    }
391
392    private void verifyContentChecksum() throws IOException {
393        if (expectContentChecksum) {
394            verifyChecksum(contentHash, "content");
395        }
396        contentHash.reset();
397    }
398}