001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.archivers.zip; 018 019import java.io.BufferedInputStream; 020import java.io.ByteArrayInputStream; 021import java.io.Closeable; 022import java.io.EOFException; 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.SequenceInputStream; 027import java.nio.ByteBuffer; 028import java.nio.ByteOrder; 029import java.nio.channels.FileChannel; 030import java.nio.channels.SeekableByteChannel; 031import java.nio.charset.Charset; 032import java.nio.charset.StandardCharsets; 033import java.nio.file.Files; 034import java.nio.file.OpenOption; 035import java.nio.file.Path; 036import java.nio.file.StandardOpenOption; 037import java.util.ArrayList; 038import java.util.Arrays; 039import java.util.Collections; 040import java.util.Comparator; 041import java.util.EnumSet; 042import java.util.Enumeration; 043import java.util.HashMap; 044import java.util.LinkedList; 045import java.util.List; 046import java.util.Map; 047import java.util.stream.Collectors; 048import java.util.stream.IntStream; 049import java.util.zip.Inflater; 050import java.util.zip.ZipException; 051 052import org.apache.commons.compress.archivers.EntryStreamOffsets; 053import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 054import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 055import org.apache.commons.compress.utils.BoundedArchiveInputStream; 056import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; 057import org.apache.commons.compress.utils.CharsetNames; 058import org.apache.commons.compress.utils.IOUtils; 059import org.apache.commons.compress.utils.InputStreamStatistics; 060import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; 061import org.apache.commons.io.Charsets; 062import org.apache.commons.io.FilenameUtils; 063import org.apache.commons.io.build.AbstractOrigin.ByteArrayOrigin; 064import org.apache.commons.io.build.AbstractStreamBuilder; 065import org.apache.commons.io.input.CountingInputStream; 066 067/** 068 * Replacement for {@link java.util.zip.ZipFile}. 069 * <p> 070 * This class adds support for file name encodings other than UTF-8 (which is required to work on ZIP files created by native ZIP tools and is able to skip a 071 * preamble like the one found in self extracting archives. Furthermore it returns instances of 072 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} instead of {@link java.util.zip.ZipEntry}. 073 * </p> 074 * <p> 075 * It doesn't extend {@link java.util.zip.ZipFile} as it would have to reimplement all methods anyway. Like {@link java.util.zip.ZipFile}, it uses 076 * SeekableByteChannel under the covers and supports compressed and uncompressed entries. As of Apache Commons Compress 1.3 it also transparently supports Zip64 077 * extensions and thus individual entries and archives larger than 4 GB or with more than 65,536 entries. 078 * </p> 079 * <p> 080 * The method signatures mimic the ones of {@link java.util.zip.ZipFile}, with a couple of exceptions: 081 * </p> 082 * <ul> 083 * <li>There is no getName method.</li> 084 * <li>entries has been renamed to getEntries.</li> 085 * <li>getEntries and getEntry return {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} instances.</li> 086 * <li>close is allowed to throw IOException.</li> 087 * </ul> 088 */ 089public class ZipFile implements Closeable { 090 091 /** 092 * Lock-free implementation of BoundedInputStream. The implementation uses positioned reads on the underlying archive file channel and therefore performs 093 * significantly faster in concurrent environment. 094 */ 095 private static class BoundedFileChannelInputStream extends BoundedArchiveInputStream { 096 private final FileChannel archive; 097 098 BoundedFileChannelInputStream(final long start, final long remaining, final FileChannel archive) { 099 super(start, remaining); 100 this.archive = archive; 101 } 102 103 @Override 104 protected int read(final long pos, final ByteBuffer buf) throws IOException { 105 final int read = archive.read(buf, pos); 106 buf.flip(); 107 return read; 108 } 109 } 110 111 /** 112 * Builds new {@link ZipFile} instances. 113 * <p> 114 * The channel will be opened for reading, assuming the specified encoding for file names. 115 * </p> 116 * <p> 117 * See {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} to read from an in-memory archive. 118 * </p> 119 * <p> 120 * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time 121 * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory. 122 * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is 123 * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 124 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. 125 * </p> 126 * 127 * @since 1.26.0 128 */ 129 public static class Builder extends AbstractStreamBuilder<ZipFile, Builder> { 130 131 static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8; 132 133 private SeekableByteChannel seekableByteChannel; 134 private boolean useUnicodeExtraFields = true; 135 private boolean ignoreLocalFileHeader; 136 private long maxNumberOfDisks = 1; 137 138 public Builder() { 139 setCharset(DEFAULT_CHARSET); 140 setCharsetDefault(DEFAULT_CHARSET); 141 } 142 143 @SuppressWarnings("resource") // caller closes 144 @Override 145 public ZipFile get() throws IOException { 146 final SeekableByteChannel actualChannel; 147 final String actualDescription; 148 if (seekableByteChannel != null) { 149 actualChannel = seekableByteChannel; 150 actualDescription = actualChannel.getClass().getSimpleName(); 151 } else if (checkOrigin() instanceof ByteArrayOrigin) { 152 actualChannel = new SeekableInMemoryByteChannel(checkOrigin().getByteArray()); 153 actualDescription = actualChannel.getClass().getSimpleName(); 154 } else { 155 OpenOption[] openOptions = getOpenOptions(); 156 if (openOptions.length == 0) { 157 openOptions = new OpenOption[] { StandardOpenOption.READ }; 158 } 159 final Path path = getPath(); 160 actualChannel = openZipChannel(path, maxNumberOfDisks, openOptions); 161 actualDescription = path.toString(); 162 } 163 final boolean closeOnError = seekableByteChannel != null; 164 return new ZipFile(actualChannel, actualDescription, getCharset(), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader); 165 } 166 167 /** 168 * Sets whether to ignore information stored inside the local file header. 169 * 170 * @param ignoreLocalFileHeader whether to ignore information stored inside. 171 * @return this. 172 */ 173 public Builder setIgnoreLocalFileHeader(final boolean ignoreLocalFileHeader) { 174 this.ignoreLocalFileHeader = ignoreLocalFileHeader; 175 return this; 176 } 177 178 /** 179 * Sets max number of multi archive disks, default is 1 (no multi archive). 180 * 181 * @param maxNumberOfDisks max number of multi archive disks. 182 * 183 * @return this. 184 */ 185 public Builder setMaxNumberOfDisks(final long maxNumberOfDisks) { 186 this.maxNumberOfDisks = maxNumberOfDisks; 187 return this; 188 } 189 190 /** 191 * The actual channel, overrides any other input aspects like a File, Path, and so on. 192 * 193 * @param seekableByteChannel The actual channel. 194 * @return this. 195 */ 196 public Builder setSeekableByteChannel(final SeekableByteChannel seekableByteChannel) { 197 this.seekableByteChannel = seekableByteChannel; 198 return this; 199 } 200 201 /** 202 * Sets whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 203 * 204 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 205 * @return this. 206 */ 207 public Builder setUseUnicodeExtraFields(final boolean useUnicodeExtraFields) { 208 this.useUnicodeExtraFields = useUnicodeExtraFields; 209 return this; 210 } 211 212 } 213 214 /** 215 * Extends ZipArchiveEntry to store the offset within the archive. 216 */ 217 private static final class Entry extends ZipArchiveEntry { 218 219 @Override 220 public boolean equals(final Object other) { 221 if (super.equals(other)) { 222 // super.equals would return false if other were not an Entry 223 final Entry otherEntry = (Entry) other; 224 return getLocalHeaderOffset() == otherEntry.getLocalHeaderOffset() && super.getDataOffset() == otherEntry.getDataOffset() 225 && super.getDiskNumberStart() == otherEntry.getDiskNumberStart(); 226 } 227 return false; 228 } 229 230 @Override 231 public int hashCode() { 232 return 3 * super.hashCode() + (int) getLocalHeaderOffset() + (int) (getLocalHeaderOffset() >> 32); 233 } 234 } 235 236 private static final class NameAndComment { 237 private final byte[] name; 238 private final byte[] comment; 239 240 private NameAndComment(final byte[] name, final byte[] comment) { 241 this.name = name; 242 this.comment = comment; 243 } 244 } 245 246 private static final class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics { 247 StoredStatisticsStream(final InputStream in) { 248 super(in); 249 } 250 251 @Override 252 public long getCompressedCount() { 253 return super.getByteCount(); 254 } 255 256 @Override 257 public long getUncompressedCount() { 258 return getCompressedCount(); 259 } 260 } 261 262 private static final EnumSet<StandardOpenOption> READ = EnumSet.of(StandardOpenOption.READ); 263 264 private static final int HASH_SIZE = 509; 265 static final int NIBLET_MASK = 0x0f; 266 static final int BYTE_SHIFT = 8; 267 private static final int POS_0 = 0; 268 private static final int POS_1 = 1; 269 private static final int POS_2 = 2; 270 private static final int POS_3 = 3; 271 private static final byte[] ONE_ZERO_BYTE = new byte[1]; 272 273 /** 274 * Length of a "central directory" entry structure without file name, extra fields or comment. 275 */ 276 private static final int CFH_LEN = 277 // @formatter:off 278 /* version made by */ ZipConstants.SHORT 279 /* version needed to extract */ + ZipConstants.SHORT 280 /* general purpose bit flag */ + ZipConstants.SHORT 281 /* compression method */ + ZipConstants.SHORT 282 /* last mod file time */ + ZipConstants.SHORT 283 /* last mod file date */ + ZipConstants.SHORT 284 /* crc-32 */ + ZipConstants.WORD 285 /* compressed size */ + ZipConstants.WORD 286 /* uncompressed size */ + ZipConstants.WORD 287 /* file name length */ + ZipConstants. SHORT 288 /* extra field length */ + ZipConstants.SHORT 289 /* file comment length */ + ZipConstants.SHORT 290 /* disk number start */ + ZipConstants.SHORT 291 /* internal file attributes */ + ZipConstants.SHORT 292 /* external file attributes */ + ZipConstants.WORD 293 /* relative offset of local header */ + ZipConstants.WORD; 294 // @formatter:on 295 296 private static final long CFH_SIG = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 297 298 /** 299 * Length of the "End of central directory record" - which is supposed to be the last structure of the archive - without file comment. 300 */ 301 static final int MIN_EOCD_SIZE = 302 // @formatter:off 303 /* end of central dir signature */ ZipConstants.WORD 304 /* number of this disk */ + ZipConstants.SHORT 305 /* number of the disk with the */ 306 /* start of the central directory */ + ZipConstants.SHORT 307 /* total number of entries in */ 308 /* the central dir on this disk */ + ZipConstants.SHORT 309 /* total number of entries in */ 310 /* the central dir */ + ZipConstants.SHORT 311 /* size of the central directory */ + ZipConstants.WORD 312 /* offset of start of central */ 313 /* directory with respect to */ 314 /* the starting disk number */ + ZipConstants.WORD 315 /* ZIP file comment length */ + ZipConstants.SHORT; 316 // @formatter:on 317 318 /** 319 * Maximum length of the "End of central directory record" with a file comment. 320 */ 321 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 322 // @formatter:off 323 /* maximum length of ZIP file comment */ + ZipConstants.ZIP64_MAGIC_SHORT; 324 // @formatter:on 325 326 /** 327 * Offset of the field that holds the location of the length of the central directory inside the "End of central directory record" relative to the start of 328 * the "End of central directory record". 329 */ 330 private static final int CFD_LENGTH_OFFSET = 331 // @formatter:off 332 /* end of central dir signature */ ZipConstants.WORD 333 /* number of this disk */ + ZipConstants.SHORT 334 /* number of the disk with the */ 335 /* start of the central directory */ + ZipConstants.SHORT 336 /* total number of entries in */ 337 /* the central dir on this disk */ + ZipConstants.SHORT 338 /* total number of entries in */ 339 /* the central dir */ + ZipConstants.SHORT; 340 // @formatter:on 341 342 /** 343 * Offset of the field that holds the disk number of the first central directory entry inside the "End of central directory record" relative to the start of 344 * the "End of central directory record". 345 */ 346 private static final int CFD_DISK_OFFSET = 347 // @formatter:off 348 /* end of central dir signature */ ZipConstants.WORD 349 /* number of this disk */ + ZipConstants.SHORT; 350 // @formatter:on 351 352 /** 353 * Offset of the field that holds the location of the first central directory entry inside the "End of central directory record" relative to the "number of 354 * the disk with the start of the central directory". 355 */ 356 private static final int CFD_LOCATOR_RELATIVE_OFFSET = 357 // @formatter:off 358 /* total number of entries in */ 359 /* the central dir on this disk */ + ZipConstants.SHORT 360 /* total number of entries in */ 361 /* the central dir */ + ZipConstants.SHORT 362 /* size of the central directory */ + ZipConstants.WORD; 363 // @formatter:on 364 365 /** 366 * Length of the "Zip64 end of central directory locator" - which should be right in front of the "end of central directory record" if one is present at 367 * all. 368 */ 369 private static final int ZIP64_EOCDL_LENGTH = 370 // @formatter:off 371 /* zip64 end of central dir locator sig */ ZipConstants.WORD 372 /* number of the disk with the start */ 373 /* start of the zip64 end of */ 374 /* central directory */ + ZipConstants.WORD 375 /* relative offset of the zip64 */ 376 /* end of central directory record */ + ZipConstants.DWORD 377 /* total number of disks */ + ZipConstants.WORD; 378 // @formatter:on 379 380 /** 381 * Offset of the field that holds the location of the "Zip64 end of central directory record" inside the "Zip64 end of central directory locator" relative 382 * to the start of the "Zip64 end of central directory locator". 383 */ 384 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 385 // @formatter:off 386 /* zip64 end of central dir locator sig */ ZipConstants.WORD 387 /* number of the disk with the start */ 388 /* start of the zip64 end of */ 389 /* central directory */ + ZipConstants.WORD; 390 // @formatter:on 391 392 /** 393 * Offset of the field that holds the location of the first central directory entry inside the "Zip64 end of central directory record" relative to the start 394 * of the "Zip64 end of central directory record". 395 */ 396 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 397 // @formatter:off 398 /* zip64 end of central dir */ 399 /* signature */ ZipConstants.WORD 400 /* size of zip64 end of central */ 401 /* directory record */ + ZipConstants.DWORD 402 /* version made by */ + ZipConstants.SHORT 403 /* version needed to extract */ + ZipConstants.SHORT 404 /* number of this disk */ + ZipConstants.WORD 405 /* number of the disk with the */ 406 /* start of the central directory */ + ZipConstants.WORD 407 /* total number of entries in the */ 408 /* central directory on this disk */ + ZipConstants.DWORD 409 /* total number of entries in the */ 410 /* central directory */ + ZipConstants.DWORD 411 /* size of the central directory */ + ZipConstants.DWORD; 412 // @formatter:on 413 414 /** 415 * Offset of the field that holds the disk number of the first central directory entry inside the "Zip64 end of central directory record" relative to the 416 * start of the "Zip64 end of central directory record". 417 */ 418 private static final int ZIP64_EOCD_CFD_DISK_OFFSET = 419 // @formatter:off 420 /* zip64 end of central dir */ 421 /* signature */ ZipConstants.WORD 422 /* size of zip64 end of central */ 423 /* directory record */ + ZipConstants.DWORD 424 /* version made by */ + ZipConstants.SHORT 425 /* version needed to extract */ + ZipConstants.SHORT 426 /* number of this disk */ + ZipConstants.WORD; 427 // @formatter:on 428 429 /** 430 * Offset of the field that holds the location of the first central directory entry inside the "Zip64 end of central directory record" relative to the 431 * "number of the disk with the start of the central directory". 432 */ 433 private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET = 434 // @formatter:off 435 /* total number of entries in the */ 436 /* central directory on this disk */ ZipConstants.DWORD 437 /* total number of entries in the */ 438 /* central directory */ + ZipConstants.DWORD 439 /* size of the central directory */ + ZipConstants.DWORD; 440 // @formatter:on 441 442 /** 443 * Number of bytes in local file header up to the "length of file name" entry. 444 */ 445 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 446 // @formatter:off 447 /* local file header signature */ ZipConstants.WORD 448 /* version needed to extract */ + ZipConstants.SHORT 449 /* general purpose bit flag */ + ZipConstants.SHORT 450 /* compression method */ + ZipConstants.SHORT 451 /* last mod file time */ + ZipConstants.SHORT 452 /* last mod file date */ + ZipConstants.SHORT 453 /* crc-32 */ + ZipConstants.WORD 454 /* compressed size */ + ZipConstants.WORD 455 /* uncompressed size */ + (long) ZipConstants.WORD; 456 // @formatter:on 457 458 /** 459 * Compares two ZipArchiveEntries based on their offset within the archive. 460 * <p> 461 * Won't return any meaningful results if one of the entries isn't part of the archive at all. 462 * </p> 463 * 464 * @since 1.1 465 */ 466 private static final Comparator<ZipArchiveEntry> offsetComparator = Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart) 467 .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset); 468 469 /** 470 * Creates a new Builder. 471 * 472 * @return a new Builder. 473 * @since 1.26.0 474 */ 475 public static Builder builder() { 476 return new Builder(); 477 } 478 479 /** 480 * Closes a ZIP file quietly; throwing no IOException, does nothing on null input. 481 * 482 * @param zipFile file to close, can be null 483 */ 484 public static void closeQuietly(final ZipFile zipFile) { 485 org.apache.commons.io.IOUtils.closeQuietly(zipFile); 486 } 487 488 /** 489 * Creates a new SeekableByteChannel for reading. 490 * 491 * @param path the path to the file to open or create 492 * @return a new seekable byte channel 493 * @throws IOException if an I/O error occurs 494 */ 495 private static SeekableByteChannel newReadByteChannel(final Path path) throws IOException { 496 return Files.newByteChannel(path, READ); 497 } 498 499 private static SeekableByteChannel openZipChannel(final Path path, final long maxNumberOfDisks, final OpenOption[] openOptions) throws IOException { 500 final FileChannel channel = FileChannel.open(path, StandardOpenOption.READ); 501 final List<FileChannel> channels = new ArrayList<>(); 502 try { 503 final boolean is64 = positionAtEndOfCentralDirectoryRecord(channel); 504 long numberOfDisks; 505 if (is64) { 506 channel.position(channel.position() + ZipConstants.WORD + ZipConstants.WORD + ZipConstants.DWORD); 507 final ByteBuffer buf = ByteBuffer.allocate(ZipConstants.WORD); 508 buf.order(ByteOrder.LITTLE_ENDIAN); 509 IOUtils.readFully(channel, buf); 510 buf.flip(); 511 numberOfDisks = buf.getInt() & 0xffffffffL; 512 } else { 513 channel.position(channel.position() + ZipConstants.WORD); 514 final ByteBuffer buf = ByteBuffer.allocate(ZipConstants.SHORT); 515 buf.order(ByteOrder.LITTLE_ENDIAN); 516 IOUtils.readFully(channel, buf); 517 buf.flip(); 518 numberOfDisks = (buf.getShort() & 0xffff) + 1; 519 } 520 if (numberOfDisks > Math.min(maxNumberOfDisks, Integer.MAX_VALUE)) { 521 throw new IOException("Too many disks for zip archive, max=" + Math.min(maxNumberOfDisks, Integer.MAX_VALUE) + " actual=" + numberOfDisks); 522 } 523 524 if (numberOfDisks <= 1) { 525 return channel; 526 } 527 channel.close(); 528 529 final Path parent = path.getParent(); 530 final String basename = FilenameUtils.removeExtension(path.getFileName().toString()); 531 532 return ZipSplitReadOnlySeekableByteChannel.forPaths(IntStream.range(0, (int) numberOfDisks).mapToObj(i -> { 533 if (i == numberOfDisks - 1) { 534 return path; 535 } 536 final Path lowercase = parent.resolve(String.format("%s.z%02d", basename, i + 1)); 537 if (Files.exists(lowercase)) { 538 return lowercase; 539 } 540 final Path uppercase = parent.resolve(String.format("%s.Z%02d", basename, i + 1)); 541 if (Files.exists(uppercase)) { 542 return uppercase; 543 } 544 return lowercase; 545 }).collect(Collectors.toList()), openOptions); 546 } catch (final Throwable ex) { 547 org.apache.commons.io.IOUtils.closeQuietly(channel); 548 channels.forEach(org.apache.commons.io.IOUtils::closeQuietly); 549 throw ex; 550 } 551 } 552 553 /** 554 * Searches for the and positions the stream at the start of the "End of central dir record". 555 * 556 * @return true if it's Zip64 end of central directory or false if it's Zip32 557 */ 558 private static boolean positionAtEndOfCentralDirectoryRecord(final SeekableByteChannel channel) throws IOException { 559 final boolean found = tryToLocateSignature(channel, MIN_EOCD_SIZE, MAX_EOCD_SIZE, ZipArchiveOutputStream.EOCD_SIG); 560 if (!found) { 561 throw new ZipException("Archive is not a ZIP archive"); 562 } 563 boolean found64 = false; 564 final long position = channel.position(); 565 if (position > ZIP64_EOCDL_LENGTH) { 566 final ByteBuffer wordBuf = ByteBuffer.allocate(4); 567 channel.position(channel.position() - ZIP64_EOCDL_LENGTH); 568 wordBuf.rewind(); 569 IOUtils.readFully(channel, wordBuf); 570 wordBuf.flip(); 571 found64 = wordBuf.equals(ByteBuffer.wrap(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG)); 572 if (!found64) { 573 channel.position(position); 574 } else { 575 channel.position(channel.position() - ZipConstants.WORD); 576 } 577 } 578 579 return found64; 580 } 581 582 /** 583 * Searches the archive backwards from minDistance to maxDistance for the given signature, positions the RandomaccessFile right at the signature if it has 584 * been found. 585 */ 586 private static boolean tryToLocateSignature(final SeekableByteChannel channel, final long minDistanceFromEnd, final long maxDistanceFromEnd, 587 final byte[] sig) throws IOException { 588 final ByteBuffer wordBuf = ByteBuffer.allocate(ZipConstants.WORD); 589 boolean found = false; 590 long off = channel.size() - minDistanceFromEnd; 591 final long stopSearching = Math.max(0L, channel.size() - maxDistanceFromEnd); 592 if (off >= 0) { 593 for (; off >= stopSearching; off--) { 594 channel.position(off); 595 try { 596 wordBuf.rewind(); 597 IOUtils.readFully(channel, wordBuf); 598 wordBuf.flip(); 599 } catch (final EOFException ex) { // NOSONAR 600 break; 601 } 602 int curr = wordBuf.get(); 603 if (curr == sig[POS_0]) { 604 curr = wordBuf.get(); 605 if (curr == sig[POS_1]) { 606 curr = wordBuf.get(); 607 if (curr == sig[POS_2]) { 608 curr = wordBuf.get(); 609 if (curr == sig[POS_3]) { 610 found = true; 611 break; 612 } 613 } 614 } 615 } 616 } 617 } 618 if (found) { 619 channel.position(off); 620 } 621 return found; 622 } 623 624 /** 625 * List of entries in the order they appear inside the central directory. 626 */ 627 private final List<ZipArchiveEntry> entries = new LinkedList<>(); 628 629 /** 630 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 631 */ 632 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = new HashMap<>(HASH_SIZE); 633 634 /** 635 * The encoding to use for file names and the file comment. 636 * <p> 637 * For a list of possible values see <a href="Supported Encodings">https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html</a>. 638 * Defaults to UTF-8. 639 * </p> 640 */ 641 private final Charset encoding; 642 643 /** 644 * The ZIP encoding to use for file names and the file comment. 645 */ 646 private final ZipEncoding zipEncoding; 647 648 /** 649 * The actual data source. 650 */ 651 private final SeekableByteChannel archive; 652 653 /** 654 * Whether to look for and use Unicode extra fields. 655 */ 656 private final boolean useUnicodeExtraFields; 657 658 /** 659 * Whether the file is closed. 660 */ 661 private volatile boolean closed = true; 662 663 /** 664 * Whether the ZIP archive is a split ZIP archive 665 */ 666 private final boolean isSplitZipArchive; 667 668 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 669 private final byte[] dwordBuf = new byte[ZipConstants.DWORD]; 670 671 private final byte[] wordBuf = new byte[ZipConstants.WORD]; 672 673 private final byte[] cfhBuf = new byte[CFH_LEN]; 674 675 private final byte[] shortBuf = new byte[ZipConstants.SHORT]; 676 677 private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf); 678 679 private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf); 680 681 private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf); 682 683 private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf); 684 685 private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset; 686 687 private long centralDirectoryStartOffset; 688 689 private long firstLocalFileHeaderOffset; 690 691 /** 692 * Opens the given file for reading, assuming "UTF8" for file names. 693 * 694 * @param file the archive. 695 * 696 * @throws IOException if an error occurs while reading the file. 697 * @deprecated Use {@link Builder#get()}. 698 */ 699 @Deprecated 700 public ZipFile(final File file) throws IOException { 701 this(file, CharsetNames.UTF_8); 702 } 703 704 /** 705 * Opens the given file for reading, assuming the specified encoding for file names and scanning for Unicode extra fields. 706 * 707 * @param file the archive. 708 * @param encoding the encoding to use for file names, use null for the platform's default encoding 709 * @throws IOException if an error occurs while reading the file. 710 * @deprecated Use {@link Builder#get()}. 711 */ 712 @Deprecated 713 public ZipFile(final File file, final String encoding) throws IOException { 714 this(file.toPath(), encoding, true); 715 } 716 717 /** 718 * Opens the given file for reading, assuming the specified encoding for file names. 719 * 720 * @param file the archive. 721 * @param encoding the encoding to use for file names, use null for the platform's default encoding 722 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 723 * @throws IOException if an error occurs while reading the file. 724 * @deprecated Use {@link Builder#get()}. 725 */ 726 @Deprecated 727 public ZipFile(final File file, final String encoding, final boolean useUnicodeExtraFields) throws IOException { 728 this(file.toPath(), encoding, useUnicodeExtraFields, false); 729 } 730 731 /** 732 * Opens the given file for reading, assuming the specified encoding for file names. 733 * <p> 734 * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time 735 * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory. 736 * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is 737 * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 738 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. 739 * </p> 740 * 741 * @param file the archive. 742 * @param encoding the encoding to use for file names, use null for the platform's default encoding 743 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 744 * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc) 745 * @throws IOException if an error occurs while reading the file. 746 * @since 1.19 747 * @deprecated Use {@link Builder#get()}. 748 */ 749 @Deprecated 750 @SuppressWarnings("resource") // Caller closes 751 public ZipFile(final File file, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader) throws IOException { 752 this(newReadByteChannel(file.toPath()), file.getAbsolutePath(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader); 753 } 754 755 /** 756 * Opens the given path for reading, assuming "UTF-8" for file names. 757 * 758 * @param path path to the archive. 759 * @throws IOException if an error occurs while reading the file. 760 * @since 1.22 761 * @deprecated Use {@link Builder#get()}. 762 */ 763 @Deprecated 764 public ZipFile(final Path path) throws IOException { 765 this(path, CharsetNames.UTF_8); 766 } 767 768 /** 769 * Opens the given path for reading, assuming the specified encoding for file names and scanning for Unicode extra fields. 770 * 771 * @param path path to the archive. 772 * @param encoding the encoding to use for file names, use null for the platform's default encoding 773 * @throws IOException if an error occurs while reading the file. 774 * @since 1.22 775 * @deprecated Use {@link Builder#get()}. 776 */ 777 @Deprecated 778 public ZipFile(final Path path, final String encoding) throws IOException { 779 this(path, encoding, true); 780 } 781 782 /** 783 * Opens the given path for reading, assuming the specified encoding for file names. 784 * 785 * @param path path to the archive. 786 * @param encoding the encoding to use for file names, use null for the platform's default encoding 787 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 788 * @throws IOException if an error occurs while reading the file. 789 * @since 1.22 790 * @deprecated Use {@link Builder#get()}. 791 */ 792 @Deprecated 793 public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields) throws IOException { 794 this(path, encoding, useUnicodeExtraFields, false); 795 } 796 797 /** 798 * Opens the given path for reading, assuming the specified encoding for file names. 799 * <p> 800 * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time 801 * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory. 802 * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is 803 * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 804 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. 805 * </p> 806 * 807 * @param path path to the archive. 808 * @param encoding the encoding to use for file names, use null for the platform's default encoding 809 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 810 * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc) 811 * @throws IOException if an error occurs while reading the file. 812 * @since 1.22 813 * @deprecated Use {@link Builder#get()}. 814 */ 815 @SuppressWarnings("resource") // Caller closes 816 @Deprecated 817 public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader) throws IOException { 818 this(newReadByteChannel(path), path.toAbsolutePath().toString(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader); 819 } 820 821 /** 822 * Opens the given channel for reading, assuming "UTF-8" for file names. 823 * <p> 824 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 825 * </p> 826 * 827 * @param channel the archive. 828 * 829 * @throws IOException if an error occurs while reading the file. 830 * @since 1.13 831 * @deprecated Use {@link Builder#get()}. 832 */ 833 @Deprecated 834 public ZipFile(final SeekableByteChannel channel) throws IOException { 835 this(channel, "a SeekableByteChannel", CharsetNames.UTF_8, true); 836 } 837 838 /** 839 * Opens the given channel for reading, assuming the specified encoding for file names. 840 * <p> 841 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 842 * </p> 843 * 844 * @param channel the archive. 845 * @param encoding the encoding to use for file names, use null for the platform's default encoding 846 * @throws IOException if an error occurs while reading the file. 847 * @since 1.13 848 * @deprecated Use {@link Builder#get()}. 849 */ 850 @Deprecated 851 public ZipFile(final SeekableByteChannel channel, final String encoding) throws IOException { 852 this(channel, "a SeekableByteChannel", encoding, true); 853 } 854 855 private ZipFile(final SeekableByteChannel channel, final String channelDescription, final Charset encoding, final boolean useUnicodeExtraFields, 856 final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException { 857 this.isSplitZipArchive = channel instanceof ZipSplitReadOnlySeekableByteChannel; 858 this.encoding = Charsets.toCharset(encoding, Builder.DEFAULT_CHARSET); 859 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 860 this.useUnicodeExtraFields = useUnicodeExtraFields; 861 this.archive = channel; 862 boolean success = false; 863 try { 864 final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = populateFromCentralDirectory(); 865 if (!ignoreLocalFileHeader) { 866 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 867 } 868 fillNameMap(); 869 success = true; 870 } catch (final IOException e) { 871 throw new IOException("Error reading Zip content from " + channelDescription, e); 872 } finally { 873 this.closed = !success; 874 if (!success && closeOnError) { 875 org.apache.commons.io.IOUtils.closeQuietly(archive); 876 } 877 } 878 } 879 880 /** 881 * Opens the given channel for reading, assuming the specified encoding for file names. 882 * <p> 883 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 884 * </p> 885 * 886 * @param channel the archive. 887 * @param channelDescription description of the archive, used for error messages only. 888 * @param encoding the encoding to use for file names, use null for the platform's default encoding 889 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 890 * @throws IOException if an error occurs while reading the file. 891 * @since 1.13 892 * @deprecated Use {@link Builder#get()}. 893 */ 894 @Deprecated 895 public ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields) 896 throws IOException { 897 this(channel, channelDescription, encoding, useUnicodeExtraFields, false, false); 898 } 899 900 /** 901 * Opens the given channel for reading, assuming the specified encoding for file names. 902 * <p> 903 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 904 * </p> 905 * <p> 906 * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time 907 * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory. 908 * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is 909 * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 910 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. 911 * </p> 912 * 913 * @param channel the archive. 914 * @param channelDescription description of the archive, used for error messages only. 915 * @param encoding the encoding to use for file names, use null for the platform's default encoding 916 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 917 * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's Javadoc) 918 * @throws IOException if an error occurs while reading the file. 919 * @since 1.19 920 * @deprecated Use {@link Builder#get()}. 921 */ 922 @Deprecated 923 public ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields, 924 final boolean ignoreLocalFileHeader) throws IOException { 925 this(channel, channelDescription, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader); 926 } 927 928 private ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields, 929 final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException { 930 this(channel, channelDescription, Charsets.toCharset(encoding), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader); 931 } 932 933 /** 934 * Opens the given file for reading, assuming "UTF-8". 935 * 936 * @param name name of the archive. 937 * @throws IOException if an error occurs while reading the file. 938 * @deprecated Use {@link Builder#get()}. 939 */ 940 @Deprecated 941 public ZipFile(final String name) throws IOException { 942 this(new File(name).toPath(), CharsetNames.UTF_8); 943 } 944 945 /** 946 * Opens the given file for reading, assuming the specified encoding for file names, scanning unicode extra fields. 947 * 948 * @param name name of the archive. 949 * @param encoding the encoding to use for file names, use null for the platform's default encoding 950 * @throws IOException if an error occurs while reading the file. 951 * @deprecated Use {@link Builder#get()}. 952 */ 953 @Deprecated 954 public ZipFile(final String name, final String encoding) throws IOException { 955 this(new File(name).toPath(), encoding, true); 956 } 957 958 /** 959 * Whether this class is able to read the given entry. 960 * <p> 961 * May return false if it is set up to use encryption or a compression method that hasn't been implemented yet. 962 * </p> 963 * 964 * @since 1.1 965 * @param entry the entry 966 * @return whether this class is able to read the given entry. 967 */ 968 public boolean canReadEntryData(final ZipArchiveEntry entry) { 969 return ZipUtil.canHandleEntryData(entry); 970 } 971 972 /** 973 * Closes the archive. 974 * 975 * @throws IOException if an error occurs closing the archive. 976 */ 977 @Override 978 public void close() throws IOException { 979 // this flag is only written here and read in finalize() which 980 // can never be run in parallel. 981 // no synchronization needed. 982 closed = true; 983 archive.close(); 984 } 985 986 /** 987 * Transfer selected entries from this ZIP file to a given #ZipArchiveOutputStream. Compression and all other attributes will be as in this file. 988 * <p> 989 * This method transfers entries based on the central directory of the ZIP file. 990 * </p> 991 * 992 * @param target The zipArchiveOutputStream to write the entries to 993 * @param predicate A predicate that selects which entries to write 994 * @throws IOException on error 995 */ 996 public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) throws IOException { 997 final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 998 while (src.hasMoreElements()) { 999 final ZipArchiveEntry entry = src.nextElement(); 1000 if (predicate.test(entry)) { 1001 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 1002 } 1003 } 1004 } 1005 1006 /** 1007 * Creates new BoundedInputStream, according to implementation of underlying archive channel. 1008 */ 1009 private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) { 1010 if (start < 0 || remaining < 0 || start + remaining < start) { 1011 throw new IllegalArgumentException("Corrupted archive, stream boundaries" + " are out of range"); 1012 } 1013 return archive instanceof FileChannel ? new BoundedFileChannelInputStream(start, remaining, (FileChannel) archive) 1014 : new BoundedSeekableByteChannelInputStream(start, remaining, archive); 1015 } 1016 1017 private void fillNameMap() { 1018 entries.forEach(ze -> { 1019 // entries are filled in populateFromCentralDirectory and 1020 // never modified 1021 final String name = ze.getName(); 1022 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>()); 1023 entriesOfThatName.addLast(ze); 1024 }); 1025 } 1026 1027 /** 1028 * Ensures that the close method of this ZIP file is called when there are no more references to it. 1029 * 1030 * @see #close() 1031 */ 1032 @Override 1033 protected void finalize() throws Throwable { 1034 try { 1035 if (!closed) { 1036 close(); 1037 } 1038 } finally { 1039 super.finalize(); 1040 } 1041 } 1042 1043 /** 1044 * Gets an InputStream for reading the content before the first local file header. 1045 * 1046 * @return null if there is no content before the first local file header. Otherwise, returns a stream to read the content before the first local file 1047 * header. 1048 * @since 1.23 1049 */ 1050 public InputStream getContentBeforeFirstLocalFileHeader() { 1051 return firstLocalFileHeaderOffset == 0 ? null : createBoundedInputStream(0, firstLocalFileHeaderOffset); 1052 } 1053 1054 private long getDataOffset(final ZipArchiveEntry ze) throws IOException { 1055 final long s = ze.getDataOffset(); 1056 if (s == EntryStreamOffsets.OFFSET_UNKNOWN) { 1057 setDataOffset(ze); 1058 return ze.getDataOffset(); 1059 } 1060 return s; 1061 } 1062 1063 /** 1064 * Gets the encoding to use for file names and the file comment. 1065 * 1066 * @return null if using the platform's default character encoding. 1067 */ 1068 public String getEncoding() { 1069 return encoding.name(); 1070 } 1071 1072 /** 1073 * Gets all entries. 1074 * <p> 1075 * Entries will be returned in the same order they appear within the archive's central directory. 1076 * </p> 1077 * 1078 * @return all entries as {@link ZipArchiveEntry} instances 1079 */ 1080 public Enumeration<ZipArchiveEntry> getEntries() { 1081 return Collections.enumeration(entries); 1082 } 1083 1084 /** 1085 * Gets all named entries in the same order they appear within the archive's central directory. 1086 * 1087 * @param name name of the entry. 1088 * @return the Iterable<ZipArchiveEntry> corresponding to the given name 1089 * @since 1.6 1090 */ 1091 public Iterable<ZipArchiveEntry> getEntries(final String name) { 1092 return nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST); 1093 } 1094 1095 /** 1096 * Gets all entries in physical order. 1097 * <p> 1098 * Entries will be returned in the same order their contents appear within the archive. 1099 * </p> 1100 * 1101 * @return all entries as {@link ZipArchiveEntry} instances 1102 * 1103 * @since 1.1 1104 */ 1105 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 1106 final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ARRAY); 1107 return Collections.enumeration(Arrays.asList(sortByOffset(allEntries))); 1108 } 1109 1110 /** 1111 * Gets all named entries in the same order their contents appear within the archive. 1112 * 1113 * @param name name of the entry. 1114 * @return the Iterable<ZipArchiveEntry> corresponding to the given name 1115 * @since 1.6 1116 */ 1117 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) { 1118 final LinkedList<ZipArchiveEntry> linkedList = nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST); 1119 return Arrays.asList(sortByOffset(linkedList.toArray(ZipArchiveEntry.EMPTY_ARRAY))); 1120 } 1121 1122 /** 1123 * Gets a named entry or {@code null} if no entry by that name exists. 1124 * <p> 1125 * If multiple entries with the same name exist the first entry in the archive's central directory by that name is returned. 1126 * </p> 1127 * 1128 * @param name name of the entry. 1129 * @return the ZipArchiveEntry corresponding to the given name - or {@code null} if not present. 1130 */ 1131 public ZipArchiveEntry getEntry(final String name) { 1132 final LinkedList<ZipArchiveEntry> entries = nameMap.get(name); 1133 return entries != null ? entries.getFirst() : null; 1134 } 1135 1136 /** 1137 * Gets the offset of the first local file header in the file. 1138 * 1139 * @return the length of the content before the first local file header 1140 * @since 1.23 1141 */ 1142 public long getFirstLocalFileHeaderOffset() { 1143 return firstLocalFileHeaderOffset; 1144 } 1145 1146 /** 1147 * Gets an InputStream for reading the contents of the given entry. 1148 * 1149 * @param entry the entry to get the stream for. 1150 * @return a stream to read the entry from. The returned stream implements {@link InputStreamStatistics}. 1151 * @throws IOException if unable to create an input stream from the zipEntry. 1152 */ 1153 public InputStream getInputStream(final ZipArchiveEntry entry) throws IOException { 1154 if (!(entry instanceof Entry)) { 1155 return null; 1156 } 1157 // cast validity is checked just above 1158 ZipUtil.checkRequestedFeatures(entry); 1159 1160 // doesn't get closed if the method is not supported - which 1161 // should never happen because of the checkRequestedFeatures 1162 // call above 1163 final InputStream is = new BufferedInputStream(getRawInputStream(entry)); // NOSONAR 1164 switch (ZipMethod.getMethodByCode(entry.getMethod())) { 1165 case STORED: 1166 return new StoredStatisticsStream(is); 1167 case UNSHRINKING: 1168 return new UnshrinkingInputStream(is); 1169 case IMPLODING: 1170 try { 1171 return new ExplodingInputStream(entry.getGeneralPurposeBit().getSlidingDictionarySize(), 1172 entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is); 1173 } catch (final IllegalArgumentException ex) { 1174 throw new IOException("bad IMPLODE data", ex); 1175 } 1176 case DEFLATED: 1177 final Inflater inflater = new Inflater(true); 1178 // Inflater with nowrap=true has this odd contract for a zero padding 1179 // byte following the data stream; this used to be zlib's requirement 1180 // and has been fixed a long time ago, but the contract persists so 1181 // we comply. 1182 // https://docs.oracle.com/javase/8/docs/api/java/util/zip/Inflater.html#Inflater(boolean) 1183 return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), inflater) { 1184 @Override 1185 public void close() throws IOException { 1186 try { 1187 super.close(); 1188 } finally { 1189 inflater.end(); 1190 } 1191 } 1192 }; 1193 case BZIP2: 1194 return new BZip2CompressorInputStream(is); 1195 case ENHANCED_DEFLATED: 1196 return new Deflate64CompressorInputStream(is); 1197 case AES_ENCRYPTED: 1198 case EXPANDING_LEVEL_1: 1199 case EXPANDING_LEVEL_2: 1200 case EXPANDING_LEVEL_3: 1201 case EXPANDING_LEVEL_4: 1202 case JPEG: 1203 case LZMA: 1204 case PKWARE_IMPLODING: 1205 case PPMD: 1206 case TOKENIZATION: 1207 case UNKNOWN: 1208 case WAVPACK: 1209 case XZ: 1210 default: 1211 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(entry.getMethod()), entry); 1212 } 1213 } 1214 1215 /** 1216 * Gets the raw stream of the archive entry (compressed form). 1217 * <p> 1218 * This method does not relate to how/if we understand the payload in the stream, since we really only intend to move it on to somewhere else. 1219 * </p> 1220 * <p> 1221 * Since version 1.22, this method will make an attempt to read the entry's data stream offset, even if the {@code ignoreLocalFileHeader} parameter was 1222 * {@code true} in the constructor. An IOException can also be thrown from the body of the method if this lookup fails for some reason. 1223 * </p> 1224 * 1225 * @param entry The entry to get the stream for 1226 * @return The raw input stream containing (possibly) compressed data. 1227 * @since 1.11 1228 * @throws IOException if there is a problem reading data offset (added in version 1.22). 1229 */ 1230 public InputStream getRawInputStream(final ZipArchiveEntry entry) throws IOException { 1231 if (!(entry instanceof Entry)) { 1232 return null; 1233 } 1234 final long start = getDataOffset(entry); 1235 if (start == EntryStreamOffsets.OFFSET_UNKNOWN) { 1236 return null; 1237 } 1238 return createBoundedInputStream(start, entry.getCompressedSize()); 1239 } 1240 1241 /** 1242 * Gets the entry's content as a String if isUnixSymlink() returns true for it, otherwise returns null. 1243 * <p> 1244 * This method assumes the symbolic link's file name uses the same encoding that as been specified for this ZipFile. 1245 * </p> 1246 * 1247 * @param entry ZipArchiveEntry object that represents the symbolic link 1248 * @return entry's content as a String 1249 * @throws IOException problem with content's input stream 1250 * @since 1.5 1251 */ 1252 public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException { 1253 if (entry != null && entry.isUnixSymlink()) { 1254 try (InputStream in = getInputStream(entry)) { 1255 return zipEncoding.decode(org.apache.commons.io.IOUtils.toByteArray(in)); 1256 } 1257 } 1258 return null; 1259 } 1260 1261 /** 1262 * Reads the central directory of the given archive and populates the internal tables with ZipArchiveEntry instances. 1263 * <p> 1264 * The ZipArchiveEntrys will know all data that can be obtained from the central directory alone, but not the data that requires the local file header or 1265 * additional data to be read. 1266 * </p> 1267 * 1268 * @return a map of zip entries that didn't have the language encoding flag set when read. 1269 */ 1270 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() throws IOException { 1271 final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = new HashMap<>(); 1272 1273 positionAtCentralDirectory(); 1274 centralDirectoryStartOffset = archive.position(); 1275 1276 wordBbuf.rewind(); 1277 IOUtils.readFully(archive, wordBbuf); 1278 long sig = ZipLong.getValue(wordBuf); 1279 1280 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 1281 throw new IOException("Central directory is empty, can't expand" + " corrupt archive."); 1282 } 1283 1284 while (sig == CFH_SIG) { 1285 readCentralDirectoryEntry(noUTF8Flag); 1286 wordBbuf.rewind(); 1287 IOUtils.readFully(archive, wordBbuf); 1288 sig = ZipLong.getValue(wordBuf); 1289 } 1290 return noUTF8Flag; 1291 } 1292 1293 /** 1294 * Searches for either the "Zip64 end of central directory locator" or the "End of central dir record", parses it and positions the 1295 * stream at the first central directory record. 1296 */ 1297 private void positionAtCentralDirectory() throws IOException { 1298 final boolean is64 = positionAtEndOfCentralDirectoryRecord(archive); 1299 if (!is64) { 1300 positionAtCentralDirectory32(); 1301 } else { 1302 positionAtCentralDirectory64(); 1303 } 1304 } 1305 1306 /** 1307 * Parses the "End of central dir record" and positions the stream at the first central directory record. 1308 * 1309 * Expects stream to be positioned at the beginning of the "End of central dir record". 1310 */ 1311 private void positionAtCentralDirectory32() throws IOException { 1312 final long endOfCentralDirectoryRecordOffset = archive.position(); 1313 if (isSplitZipArchive) { 1314 skipBytes(CFD_DISK_OFFSET); 1315 shortBbuf.rewind(); 1316 IOUtils.readFully(archive, shortBbuf); 1317 centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf); 1318 1319 skipBytes(CFD_LOCATOR_RELATIVE_OFFSET); 1320 1321 wordBbuf.rewind(); 1322 IOUtils.readFully(archive, wordBbuf); 1323 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1324 ((ZipSplitReadOnlySeekableByteChannel) archive).position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1325 } else { 1326 skipBytes(CFD_LENGTH_OFFSET); 1327 wordBbuf.rewind(); 1328 IOUtils.readFully(archive, wordBbuf); 1329 final long centralDirectoryLength = ZipLong.getValue(wordBuf); 1330 1331 wordBbuf.rewind(); 1332 IOUtils.readFully(archive, wordBbuf); 1333 centralDirectoryStartDiskNumber = 0; 1334 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1335 1336 firstLocalFileHeaderOffset = Long.max(endOfCentralDirectoryRecordOffset - centralDirectoryLength - centralDirectoryStartRelativeOffset, 0L); 1337 archive.position(centralDirectoryStartRelativeOffset + firstLocalFileHeaderOffset); 1338 } 1339 } 1340 1341 /** 1342 * Parses the "Zip64 end of central directory locator", finds the "Zip64 end of central directory record" using the parsed information, 1343 * parses that and positions the stream at the first central directory record. 1344 * 1345 * Expects stream to be positioned right behind the "Zip64 end of central directory locator"'s signature. 1346 */ 1347 private void positionAtCentralDirectory64() throws IOException { 1348 skipBytes(ZipConstants.WORD); 1349 if (isSplitZipArchive) { 1350 wordBbuf.rewind(); 1351 IOUtils.readFully(archive, wordBbuf); 1352 final long diskNumberOfEOCD = ZipLong.getValue(wordBuf); 1353 1354 dwordBbuf.rewind(); 1355 IOUtils.readFully(archive, dwordBbuf); 1356 final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf); 1357 ((ZipSplitReadOnlySeekableByteChannel) archive).position(diskNumberOfEOCD, relativeOffsetOfEOCD); 1358 } else { 1359 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET - ZipConstants.WORD /* signature has already been read */); 1360 dwordBbuf.rewind(); 1361 IOUtils.readFully(archive, dwordBbuf); 1362 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 1363 } 1364 1365 wordBbuf.rewind(); 1366 IOUtils.readFully(archive, wordBbuf); 1367 if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 1368 throw new ZipException("Archive's ZIP64 end of central directory locator is corrupt."); 1369 } 1370 1371 if (isSplitZipArchive) { 1372 skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET - ZipConstants.WORD /* signature has already been read */); 1373 wordBbuf.rewind(); 1374 IOUtils.readFully(archive, wordBbuf); 1375 centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf); 1376 1377 skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET); 1378 1379 dwordBbuf.rewind(); 1380 IOUtils.readFully(archive, dwordBbuf); 1381 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1382 ((ZipSplitReadOnlySeekableByteChannel) archive).position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1383 } else { 1384 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET - ZipConstants.WORD /* signature has already been read */); 1385 dwordBbuf.rewind(); 1386 IOUtils.readFully(archive, dwordBbuf); 1387 centralDirectoryStartDiskNumber = 0; 1388 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1389 archive.position(centralDirectoryStartRelativeOffset); 1390 } 1391 } 1392 1393 /** 1394 * Reads an individual entry of the central directory, creates an ZipArchiveEntry from it and adds it to the global maps. 1395 * 1396 * @param noUTF8Flag map used to collect entries that don't have their UTF-8 flag set and whose name will be set by data read from the local file header 1397 * later. The current entry may be added to this map. 1398 */ 1399 private void readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) throws IOException { 1400 cfhBbuf.rewind(); 1401 IOUtils.readFully(archive, cfhBbuf); 1402 int off = 0; 1403 final Entry ze = new Entry(); 1404 1405 final int versionMadeBy = ZipShort.getValue(cfhBuf, off); 1406 off += ZipConstants.SHORT; 1407 ze.setVersionMadeBy(versionMadeBy); 1408 ze.setPlatform(versionMadeBy >> BYTE_SHIFT & NIBLET_MASK); 1409 1410 ze.setVersionRequired(ZipShort.getValue(cfhBuf, off)); 1411 off += ZipConstants.SHORT; // version required 1412 1413 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off); 1414 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 1415 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.ZIP_ENCODING_UTF_8 : zipEncoding; 1416 if (hasUTF8Flag) { 1417 ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 1418 } 1419 ze.setGeneralPurposeBit(gpFlag); 1420 ze.setRawFlag(ZipShort.getValue(cfhBuf, off)); 1421 1422 off += ZipConstants.SHORT; 1423 1424 // noinspection MagicConstant 1425 ze.setMethod(ZipShort.getValue(cfhBuf, off)); 1426 off += ZipConstants.SHORT; 1427 1428 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off)); 1429 ze.setTime(time); 1430 off += ZipConstants.WORD; 1431 1432 ze.setCrc(ZipLong.getValue(cfhBuf, off)); 1433 off += ZipConstants.WORD; 1434 1435 long size = ZipLong.getValue(cfhBuf, off); 1436 if (size < 0) { 1437 throw new IOException("broken archive, entry with negative compressed size"); 1438 } 1439 ze.setCompressedSize(size); 1440 off += ZipConstants.WORD; 1441 1442 size = ZipLong.getValue(cfhBuf, off); 1443 if (size < 0) { 1444 throw new IOException("broken archive, entry with negative size"); 1445 } 1446 ze.setSize(size); 1447 off += ZipConstants.WORD; 1448 1449 final int fileNameLen = ZipShort.getValue(cfhBuf, off); 1450 off += ZipConstants.SHORT; 1451 if (fileNameLen < 0) { 1452 throw new IOException("broken archive, entry with negative fileNameLen"); 1453 } 1454 1455 final int extraLen = ZipShort.getValue(cfhBuf, off); 1456 off += ZipConstants.SHORT; 1457 if (extraLen < 0) { 1458 throw new IOException("broken archive, entry with negative extraLen"); 1459 } 1460 1461 final int commentLen = ZipShort.getValue(cfhBuf, off); 1462 off += ZipConstants.SHORT; 1463 if (commentLen < 0) { 1464 throw new IOException("broken archive, entry with negative commentLen"); 1465 } 1466 1467 ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off)); 1468 off += ZipConstants.SHORT; 1469 1470 ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off)); 1471 off += ZipConstants.SHORT; 1472 1473 ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off)); 1474 off += ZipConstants.WORD; 1475 1476 final byte[] fileName = IOUtils.readRange(archive, fileNameLen); 1477 if (fileName.length < fileNameLen) { 1478 throw new EOFException(); 1479 } 1480 ze.setName(entryEncoding.decode(fileName), fileName); 1481 1482 // LFH offset, 1483 ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off) + firstLocalFileHeaderOffset); 1484 // data offset will be filled later 1485 entries.add(ze); 1486 1487 final byte[] cdExtraData = IOUtils.readRange(archive, extraLen); 1488 if (cdExtraData.length < extraLen) { 1489 throw new EOFException(); 1490 } 1491 try { 1492 ze.setCentralDirectoryExtra(cdExtraData); 1493 } catch (final RuntimeException e) { 1494 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 1495 z.initCause(e); 1496 throw z; 1497 } 1498 1499 setSizesAndOffsetFromZip64Extra(ze); 1500 sanityCheckLFHOffset(ze); 1501 1502 final byte[] comment = IOUtils.readRange(archive, commentLen); 1503 if (comment.length < commentLen) { 1504 throw new EOFException(); 1505 } 1506 ze.setComment(entryEncoding.decode(comment)); 1507 1508 if (!hasUTF8Flag && useUnicodeExtraFields) { 1509 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 1510 } 1511 1512 ze.setStreamContiguous(true); 1513 } 1514 1515 /** 1516 * Walks through all recorded entries and adds the data available from the local file header. 1517 * <p> 1518 * Also records the offsets for the data to read from the entries. 1519 * </p> 1520 */ 1521 private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag) throws IOException { 1522 for (final ZipArchiveEntry zipArchiveEntry : entries) { 1523 // entries are filled in populateFromCentralDirectory and never modified 1524 final Entry ze = (Entry) zipArchiveEntry; 1525 final int[] lens = setDataOffset(ze); 1526 final int fileNameLen = lens[0]; 1527 final int extraFieldLen = lens[1]; 1528 skipBytes(fileNameLen); 1529 final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen); 1530 if (localExtraData.length < extraFieldLen) { 1531 throw new EOFException(); 1532 } 1533 try { 1534 ze.setExtra(localExtraData); 1535 } catch (final RuntimeException e) { 1536 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 1537 z.initCause(e); 1538 throw z; 1539 } 1540 1541 if (entriesWithoutUTF8Flag.containsKey(ze)) { 1542 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 1543 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, nc.comment); 1544 } 1545 } 1546 } 1547 1548 private void sanityCheckLFHOffset(final ZipArchiveEntry entry) throws IOException { 1549 if (entry.getDiskNumberStart() < 0) { 1550 throw new IOException("broken archive, entry with negative disk number"); 1551 } 1552 if (entry.getLocalHeaderOffset() < 0) { 1553 throw new IOException("broken archive, entry with negative local file header offset"); 1554 } 1555 if (isSplitZipArchive) { 1556 if (entry.getDiskNumberStart() > centralDirectoryStartDiskNumber) { 1557 throw new IOException("local file header for " + entry.getName() + " starts on a later disk than central directory"); 1558 } 1559 if (entry.getDiskNumberStart() == centralDirectoryStartDiskNumber && entry.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) { 1560 throw new IOException("local file header for " + entry.getName() + " starts after central directory"); 1561 } 1562 } else if (entry.getLocalHeaderOffset() > centralDirectoryStartOffset) { 1563 throw new IOException("local file header for " + entry.getName() + " starts after central directory"); 1564 } 1565 } 1566 1567 private int[] setDataOffset(final ZipArchiveEntry entry) throws IOException { 1568 long offset = entry.getLocalHeaderOffset(); 1569 if (isSplitZipArchive) { 1570 ((ZipSplitReadOnlySeekableByteChannel) archive).position(entry.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1571 // the offset should be updated to the global offset 1572 offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH; 1573 } else { 1574 archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1575 } 1576 wordBbuf.rewind(); 1577 IOUtils.readFully(archive, wordBbuf); 1578 wordBbuf.flip(); 1579 wordBbuf.get(shortBuf); 1580 final int fileNameLen = ZipShort.getValue(shortBuf); 1581 wordBbuf.get(shortBuf); 1582 final int extraFieldLen = ZipShort.getValue(shortBuf); 1583 entry.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH + ZipConstants.SHORT + ZipConstants.SHORT + fileNameLen + extraFieldLen); 1584 if (entry.getDataOffset() + entry.getCompressedSize() > centralDirectoryStartOffset) { 1585 throw new IOException("data for " + entry.getName() + " overlaps with central directory."); 1586 } 1587 return new int[] { fileNameLen, extraFieldLen }; 1588 } 1589 1590 /** 1591 * If the entry holds a Zip64 extended information extra field, read sizes from there if the entry's sizes are set to 0xFFFFFFFFF, do the same for the 1592 * offset of the local file header. 1593 * <p> 1594 * Ensures the Zip64 extra either knows both compressed and uncompressed size or neither of both as the internal logic in ExtraFieldUtils forces the field 1595 * to create local header data even if they are never used - and here a field with only one size would be invalid. 1596 * </p> 1597 */ 1598 private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry entry) throws IOException { 1599 final ZipExtraField extra = entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 1600 if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) { 1601 throw new ZipException("archive contains unparseable zip64 extra field"); 1602 } 1603 final Zip64ExtendedInformationExtraField z64 = (Zip64ExtendedInformationExtraField) extra; 1604 if (z64 != null) { 1605 final boolean hasUncompressedSize = entry.getSize() == ZipConstants.ZIP64_MAGIC; 1606 final boolean hasCompressedSize = entry.getCompressedSize() == ZipConstants.ZIP64_MAGIC; 1607 final boolean hasRelativeHeaderOffset = entry.getLocalHeaderOffset() == ZipConstants.ZIP64_MAGIC; 1608 final boolean hasDiskStart = entry.getDiskNumberStart() == ZipConstants.ZIP64_MAGIC_SHORT; 1609 z64.reparseCentralDirectoryData(hasUncompressedSize, hasCompressedSize, hasRelativeHeaderOffset, hasDiskStart); 1610 1611 if (hasUncompressedSize) { 1612 final long size = z64.getSize().getLongValue(); 1613 if (size < 0) { 1614 throw new IOException("broken archive, entry with negative size"); 1615 } 1616 entry.setSize(size); 1617 } else if (hasCompressedSize) { 1618 z64.setSize(new ZipEightByteInteger(entry.getSize())); 1619 } 1620 1621 if (hasCompressedSize) { 1622 final long size = z64.getCompressedSize().getLongValue(); 1623 if (size < 0) { 1624 throw new IOException("broken archive, entry with negative compressed size"); 1625 } 1626 entry.setCompressedSize(size); 1627 } else if (hasUncompressedSize) { 1628 z64.setCompressedSize(new ZipEightByteInteger(entry.getCompressedSize())); 1629 } 1630 1631 if (hasRelativeHeaderOffset) { 1632 entry.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue()); 1633 } 1634 1635 if (hasDiskStart) { 1636 entry.setDiskNumberStart(z64.getDiskStartNumber().getValue()); 1637 } 1638 } 1639 } 1640 1641 /** 1642 * Skips the given number of bytes or throws an EOFException if skipping failed. 1643 */ 1644 private void skipBytes(final int count) throws IOException { 1645 final long currentPosition = archive.position(); 1646 final long newPosition = currentPosition + count; 1647 if (newPosition > archive.size()) { 1648 throw new EOFException(); 1649 } 1650 archive.position(newPosition); 1651 } 1652 1653 /** 1654 * Sorts entries in place by offset. 1655 * 1656 * @param allEntries entries to sort 1657 * @return the given entries, sorted. 1658 */ 1659 private ZipArchiveEntry[] sortByOffset(final ZipArchiveEntry[] allEntries) { 1660 Arrays.sort(allEntries, offsetComparator); 1661 return allEntries; 1662 } 1663 1664 /** 1665 * Checks whether the archive starts with an LFH. If it doesn't, it may be an empty archive. 1666 */ 1667 private boolean startsWithLocalFileHeader() throws IOException { 1668 archive.position(firstLocalFileHeaderOffset); 1669 wordBbuf.rewind(); 1670 IOUtils.readFully(archive, wordBbuf); 1671 return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG); 1672 } 1673}