List of usage examples for org.apache.commons.compress.archivers.tar TarArchiveInputStream getNextTarEntry
public TarArchiveEntry getNextTarEntry() throws IOException
From source file:net.yacy.document.parser.tarParser.java
@Override public Document[] parse(final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, final int timezoneOffset, InputStream source) throws Parser.Failure, InterruptedException { final String filename = location.getFileName(); final String ext = MultiProtocolURL.getFileExtension(filename); if (ext.equals("gz") || ext.equals("tgz")) { try {//from w w w . j a v a 2 s . c om source = new GZIPInputStream(source); } catch (final IOException e) { throw new Parser.Failure("tar parser: " + e.getMessage(), location); } } TarArchiveEntry entry; final TarArchiveInputStream tis = new TarArchiveInputStream(source); // create maindoc for this bzip container final Document maindoc = new Document(location, mimeType, charset, this, null, null, AbstractParser .singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title null, null, null, null, 0.0d, 0.0d, (Object) null, null, null, null, false, new Date()); // loop through the elements in the tar file and parse every single file inside while (true) { try { File tmp = null; entry = tis.getNextTarEntry(); if (entry == null) break; if (entry.isDirectory() || entry.getSize() <= 0) continue; final String name = entry.getName(); final int idx = name.lastIndexOf('.'); final String mime = TextParser.mimeOf((idx > -1) ? name.substring(idx + 1) : ""); try { tmp = FileUtils.createTempFile(this.getClass(), name); FileUtils.copy(tis, tmp, entry.getSize()); final Document[] subDocs = TextParser.parseSource(AnchorURL.newAnchor(location, "#" + name), mime, null, scraper, timezoneOffset, 999, tmp); if (subDocs == null) continue; maindoc.addSubDocuments(subDocs); } catch (final Parser.Failure e) { AbstractParser.log.warn("tar parser entry " + name + ": " + e.getMessage()); } finally { if (tmp != null) FileUtils.deletedelete(tmp); } } catch (final IOException e) { AbstractParser.log.warn("tar parser:" + e.getMessage()); break; } } return new Document[] { maindoc }; }
From source file:net.yacy.utils.tarTools.java
/** * Untar for any tar archive, overwrites existing data. Closes the * InputStream once terminated./*from w w w .ja va 2 s. c om*/ * * @param in * input stream. Must not be null. (use * {@link #getInputStream(String)} for convenience) * @param untarDir * destination path. Must not be null. * @throws IOException * when a read/write error occurred * @throws FileNotFoundException * when the untarDir does not exists * @throws NullPointerException * when a parameter is null */ public static void unTar(final InputStream in, final String untarDir) throws IOException { ConcurrentLog.info("UNTAR", "starting"); if (new File(untarDir).exists()) { final TarArchiveInputStream tin = new TarArchiveInputStream(in); try { TarArchiveEntry tarEntry = tin.getNextTarEntry(); if (tarEntry == null) { throw new IOException("tar archive is empty or corrupted"); } while (tarEntry != null) { final File destPath = new File(untarDir + File.separator + tarEntry.getName()); if (!tarEntry.isDirectory()) { new File(destPath.getParent()).mkdirs(); // create missing subdirectories final FileOutputStream fout = new FileOutputStream(destPath); IOUtils.copyLarge(tin, fout, 0, tarEntry.getSize()); fout.close(); } else { destPath.mkdir(); } tarEntry = tin.getNextTarEntry(); } } finally { try { tin.close(); } catch (IOException ignored) { ConcurrentLog.warn("UNTAR", "InputStream could not be closed"); } } } else { // untarDir doesn't exist ConcurrentLog.warn("UNTAR", "destination " + untarDir + " doesn't exist."); /* Still have to close the input stream */ try { in.close(); } catch (IOException ignored) { ConcurrentLog.warn("UNTAR", "InputStream could not be closed"); } throw new FileNotFoundException("Output untar directory not found : " + untarDir); } ConcurrentLog.info("UNTAR", "finished"); }
From source file:net.zyuiop.remoteworldloader.utils.CompressionUtils.java
public static void uncompressArchive(File archive, File target) throws IOException, CompressorException { CompressorInputStream compressor = new GzipCompressorInputStream(new FileInputStream(archive)); TarArchiveInputStream stream = new TarArchiveInputStream(compressor); TarArchiveEntry entry;//www .ja v a2 s. c o m while ((entry = stream.getNextTarEntry()) != null) { File f = new File(target.getCanonicalPath(), entry.getName()); if (f.exists()) { Bukkit.getLogger().warning("The file " + f.getCanonicalPath() + " already exists, deleting it."); if (!f.delete()) { Bukkit.getLogger().warning("Cannot remove, skipping file."); } } if (entry.isDirectory()) { f.mkdirs(); continue; } f.getParentFile().mkdirs(); f.createNewFile(); try { try (OutputStream fos = new BufferedOutputStream(new FileOutputStream(f))) { final byte[] buf = new byte[8192]; int bytesRead; while (-1 != (bytesRead = stream.read(buf))) fos.write(buf, 0, bytesRead); } Bukkit.getLogger().info("Extracted file " + f.getName() + "..."); } catch (IOException ioe) { f.delete(); throw ioe; } } }
From source file:org.apache.camel.dataformat.tarfile.TarFileDataFormat.java
@Override public Object unmarshal(Exchange exchange, InputStream stream) throws Exception { if (usingIterator) { return new TarIterator(exchange.getIn(), stream); } else {//from w ww . ja v a2 s . com InputStream is = exchange.getIn().getMandatoryBody(InputStream.class); TarArchiveInputStream tis = (TarArchiveInputStream) new ArchiveStreamFactory() .createArchiveInputStream(ArchiveStreamFactory.TAR, new BufferedInputStream(is)); ByteArrayOutputStream baos = new ByteArrayOutputStream(); try { TarArchiveEntry entry = tis.getNextTarEntry(); if (entry != null) { exchange.getOut().setHeader(FILE_NAME, entry.getName()); IOHelper.copy(tis, baos); } entry = tis.getNextTarEntry(); if (entry != null) { throw new IllegalStateException("Tar file has more than 1 entry."); } return baos.toByteArray(); } finally { IOHelper.close(tis, baos); } } }
From source file:org.apache.camel.processor.aggregate.tarfile.AggregationStrategyWithFilenameHeaderTest.java
@Test public void testSplitter() throws Exception { MockEndpoint mock = getMockEndpoint("mock:aggregateToTarEntry"); mock.expectedMessageCount(1);/*w w w . j a va 2 s . c om*/ template.setDefaultEndpointUri("direct:start"); template.sendBodyAndHeader("foo", Exchange.FILE_NAME, FILE_NAMES.get(0)); template.sendBodyAndHeader("bar", Exchange.FILE_NAME, FILE_NAMES.get(1)); assertMockEndpointsSatisfied(); Thread.sleep(500); File[] files = new File("target/out").listFiles(); assertTrue(files != null); assertTrue("Should be a file in target/out directory", files.length > 0); File resultFile = files[0]; final TarArchiveInputStream tis = (TarArchiveInputStream) new ArchiveStreamFactory() .createArchiveInputStream(ArchiveStreamFactory.TAR, new BufferedInputStream(new FileInputStream(resultFile))); try { int fileCount = 0; for (TarArchiveEntry entry = tis.getNextTarEntry(); entry != null; entry = tis.getNextTarEntry()) { fileCount++; assertTrue("Tar entry file name should be on of: " + FILE_NAMES, FILE_NAMES.contains(entry.getName())); } assertEquals("Tar file should contain " + FILE_NAMES.size() + " files", FILE_NAMES.size(), fileCount); } finally { IOHelper.close(tis); } }
From source file:org.apache.camel.processor.aggregate.tarfile.AggregationStrategyWithPreservationTest.java
@Test public void testSplitter() throws Exception { MockEndpoint mock = getMockEndpoint("mock:aggregateToTarEntry"); mock.expectedMessageCount(1);//from w w w. j a v a 2 s . com assertMockEndpointsSatisfied(); Thread.sleep(500); File[] files = new File("target/out").listFiles(); assertTrue("Should be a file in target/out directory", files.length > 0); File resultFile = files[0]; Set<String> expectedTarFiles = new HashSet<String>( Arrays.asList("another/hello.txt", "other/greetings.txt", "chiau.txt", "hi.txt", "hola.txt")); TarArchiveInputStream tin = new TarArchiveInputStream(new FileInputStream(resultFile)); try { int fileCount = 0; for (TarArchiveEntry te = tin.getNextTarEntry(); te != null; te = tin.getNextTarEntry()) { expectedTarFiles.remove(te.getName()); fileCount++; } assertTrue("Tar file should contains " + AggregationStrategyWithPreservationTest.EXPECTED_NO_FILES + " files", fileCount == AggregationStrategyWithPreservationTest.EXPECTED_NO_FILES); assertEquals("Should have found all of the tar files in the file.", 0, expectedTarFiles.size()); } finally { IOHelper.close(tin); } }
From source file:org.apache.camel.processor.aggregate.tarfile.TarAggregationStrategyTest.java
@Test public void testSplitter() throws Exception { MockEndpoint mock = getMockEndpoint("mock:aggregateToTarEntry"); mock.expectedMessageCount(1);//w ww .j a va2s .c o m mock.expectedHeaderReceived("foo", "bar"); assertMockEndpointsSatisfied(); Thread.sleep(500); File[] files = new File("target/out").listFiles(); assertTrue(files != null); assertTrue("Should be a file in target/out directory", files.length > 0); File resultFile = files[0]; TarArchiveInputStream tin = new TarArchiveInputStream(new FileInputStream(resultFile)); try { int fileCount = 0; for (TarArchiveEntry te = tin.getNextTarEntry(); te != null; te = tin.getNextTarEntry()) { fileCount = fileCount + 1; } assertEquals("Tar file should contains " + TarAggregationStrategyTest.EXPECTED_NO_FILES + " files", TarAggregationStrategyTest.EXPECTED_NO_FILES, fileCount); } finally { IOHelper.close(tin); } }
From source file:org.apache.flume.test.util.StagedInstall.java
private void untarTarFile(File tarFile, File destDir) throws Exception { TarArchiveInputStream tarInputStream = null; try {//from ww w .j a va 2 s . c o m tarInputStream = new TarArchiveInputStream(new FileInputStream(tarFile)); TarArchiveEntry entry = null; while ((entry = tarInputStream.getNextTarEntry()) != null) { String name = entry.getName(); LOGGER.debug("Next file: " + name); File destFile = new File(destDir, entry.getName()); if (entry.isDirectory()) { destFile.mkdirs(); continue; } File destParent = destFile.getParentFile(); destParent.mkdirs(); OutputStream entryOutputStream = null; try { entryOutputStream = new FileOutputStream(destFile); byte[] buffer = new byte[2048]; int length = 0; while ((length = tarInputStream.read(buffer, 0, 2048)) != -1) { entryOutputStream.write(buffer, 0, length); } } catch (Exception ex) { LOGGER.error("Exception while expanding tar file", ex); throw ex; } finally { if (entryOutputStream != null) { try { entryOutputStream.close(); } catch (Exception ex) { LOGGER.warn("Failed to close entry output stream", ex); } } } } } catch (Exception ex) { LOGGER.error("Exception caught while untarring tar file: " + tarFile.getAbsolutePath(), ex); throw ex; } finally { if (tarInputStream != null) { try { tarInputStream.close(); } catch (Exception ex) { LOGGER.warn("Unable to close tar input stream: " + tarFile.getCanonicalPath(), ex); } } } }
From source file:org.apache.gobblin.data.management.copy.writer.TarArchiveInputStreamDataWriter.java
/** * Untars the passed in {@link FileAwareInputStream} to the task's staging directory. Uses the name of the root * {@link TarArchiveEntry} in the stream as the directory name for the untarred file. The method also commits the data * by moving the file from staging to output directory. * * @see org.apache.gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter#write(org.apache.gobblin.data.management.copy.FileAwareInputStream) *//*from ww w .j ava 2s. com*/ @Override public void writeImpl(InputStream inputStream, Path writeAt, CopyableFile copyableFile) throws IOException { this.closer.register(inputStream); TarArchiveInputStream tarIn = new TarArchiveInputStream(inputStream); final ReadableByteChannel inputChannel = Channels.newChannel(tarIn); TarArchiveEntry tarEntry; // flush the first entry in the tar, which is just the root directory tarEntry = tarIn.getNextTarEntry(); String tarEntryRootName = StringUtils.remove(tarEntry.getName(), Path.SEPARATOR); log.info("Unarchiving at " + writeAt); try { while ((tarEntry = tarIn.getNextTarEntry()) != null) { // the API tarEntry.getName() is misleading, it is actually the path of the tarEntry in the tar file String newTarEntryPath = tarEntry.getName().replace(tarEntryRootName, writeAt.getName()); Path tarEntryStagingPath = new Path(writeAt.getParent(), newTarEntryPath); if (!FileUtils.isSubPath(writeAt.getParent(), tarEntryStagingPath)) { throw new IOException( String.format("Extracted file: %s is trying to write outside of output directory: %s", tarEntryStagingPath, writeAt.getParent())); } if (tarEntry.isDirectory() && !this.fs.exists(tarEntryStagingPath)) { this.fs.mkdirs(tarEntryStagingPath); } else if (!tarEntry.isDirectory()) { FSDataOutputStream out = this.fs.create(tarEntryStagingPath, true); final WritableByteChannel outputChannel = Channels.newChannel(out); try { StreamCopier copier = new StreamCopier(inputChannel, outputChannel); if (isInstrumentationEnabled()) { copier.withCopySpeedMeter(this.copySpeedMeter); } this.bytesWritten.addAndGet(copier.copy()); if (isInstrumentationEnabled()) { log.info("File {}: copied {} bytes, average rate: {} B/s", copyableFile.getOrigin().getPath(), this.copySpeedMeter.getCount(), this.copySpeedMeter.getMeanRate()); } else { log.info("File {} copied.", copyableFile.getOrigin().getPath()); } } finally { out.close(); outputChannel.close(); } } } } finally { tarIn.close(); inputChannel.close(); inputStream.close(); } }
From source file:org.apache.kylin.common.util.TarGZUtil.java
public static void uncompressTarGZ(File tarFile, File dest) throws IOException { dest.mkdir();/* w w w . ja v a 2 s. c om*/ TarArchiveInputStream tarIn = null; tarIn = new TarArchiveInputStream( new GzipCompressorInputStream(new BufferedInputStream(new FileInputStream(tarFile)))); TarArchiveEntry tarEntry = tarIn.getNextTarEntry(); // tarIn is a TarArchiveInputStream while (tarEntry != null) {// create a file with the same name as the tarEntry File destPath = new File(dest, tarEntry.getName()); System.out.println("working: " + destPath.getCanonicalPath()); if (tarEntry.isDirectory()) { destPath.mkdirs(); } else { destPath.createNewFile(); //byte [] btoRead = new byte[(int)tarEntry.getSize()]; byte[] btoRead = new byte[1024]; //FileInputStream fin // = new FileInputStream(destPath.getCanonicalPath()); BufferedOutputStream bout = new BufferedOutputStream(new FileOutputStream(destPath)); int len = 0; while ((len = tarIn.read(btoRead)) != -1) { bout.write(btoRead, 0, len); } bout.close(); btoRead = null; } tarEntry = tarIn.getNextTarEntry(); } tarIn.close(); }