Example usage for org.apache.commons.compress.archivers.tar TarArchiveInputStream getNextTarEntry

List of usage examples for org.apache.commons.compress.archivers.tar TarArchiveInputStream getNextTarEntry

Introduction

In this page you can find the example usage for org.apache.commons.compress.archivers.tar TarArchiveInputStream getNextTarEntry.

Prototype

public TarArchiveEntry getNextTarEntry() throws IOException 

Source Link

Document

Get the next entry in this tar archive.

Usage

From source file:net.yacy.document.parser.tarParser.java

@Override
public Document[] parse(final DigestURL location, final String mimeType, final String charset,
        final VocabularyScraper scraper, final int timezoneOffset, InputStream source)
        throws Parser.Failure, InterruptedException {

    final String filename = location.getFileName();
    final String ext = MultiProtocolURL.getFileExtension(filename);
    if (ext.equals("gz") || ext.equals("tgz")) {
        try {//from   w w  w .  j a  v a  2 s .  c om
            source = new GZIPInputStream(source);
        } catch (final IOException e) {
            throw new Parser.Failure("tar parser: " + e.getMessage(), location);
        }
    }
    TarArchiveEntry entry;
    final TarArchiveInputStream tis = new TarArchiveInputStream(source);

    // create maindoc for this bzip container
    final Document maindoc = new Document(location, mimeType, charset, this, null, null,
            AbstractParser
                    .singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title
            null, null, null, null, 0.0d, 0.0d, (Object) null, null, null, null, false, new Date());
    // loop through the elements in the tar file and parse every single file inside
    while (true) {
        try {
            File tmp = null;
            entry = tis.getNextTarEntry();
            if (entry == null)
                break;
            if (entry.isDirectory() || entry.getSize() <= 0)
                continue;
            final String name = entry.getName();
            final int idx = name.lastIndexOf('.');
            final String mime = TextParser.mimeOf((idx > -1) ? name.substring(idx + 1) : "");
            try {
                tmp = FileUtils.createTempFile(this.getClass(), name);
                FileUtils.copy(tis, tmp, entry.getSize());
                final Document[] subDocs = TextParser.parseSource(AnchorURL.newAnchor(location, "#" + name),
                        mime, null, scraper, timezoneOffset, 999, tmp);
                if (subDocs == null)
                    continue;
                maindoc.addSubDocuments(subDocs);
            } catch (final Parser.Failure e) {
                AbstractParser.log.warn("tar parser entry " + name + ": " + e.getMessage());
            } finally {
                if (tmp != null)
                    FileUtils.deletedelete(tmp);
            }
        } catch (final IOException e) {
            AbstractParser.log.warn("tar parser:" + e.getMessage());
            break;
        }
    }
    return new Document[] { maindoc };
}

From source file:net.yacy.utils.tarTools.java

/**
 * Untar for any tar archive, overwrites existing data. Closes the
 * InputStream once terminated./*from  w  w w  .ja va 2  s.  c  om*/
 * 
 * @param in
 *            input stream. Must not be null. (use
 *            {@link #getInputStream(String)} for convenience)
 * @param untarDir
 *            destination path. Must not be null.
 * @throws IOException
 *             when a read/write error occurred
 * @throws FileNotFoundException
 *             when the untarDir does not exists
 * @throws NullPointerException
 *             when a parameter is null
 */
public static void unTar(final InputStream in, final String untarDir) throws IOException {
    ConcurrentLog.info("UNTAR", "starting");
    if (new File(untarDir).exists()) {
        final TarArchiveInputStream tin = new TarArchiveInputStream(in);
        try {
            TarArchiveEntry tarEntry = tin.getNextTarEntry();
            if (tarEntry == null) {
                throw new IOException("tar archive is empty or corrupted");
            }
            while (tarEntry != null) {
                final File destPath = new File(untarDir + File.separator + tarEntry.getName());
                if (!tarEntry.isDirectory()) {
                    new File(destPath.getParent()).mkdirs(); // create missing subdirectories
                    final FileOutputStream fout = new FileOutputStream(destPath);
                    IOUtils.copyLarge(tin, fout, 0, tarEntry.getSize());
                    fout.close();
                } else {
                    destPath.mkdir();
                }
                tarEntry = tin.getNextTarEntry();
            }
        } finally {
            try {
                tin.close();
            } catch (IOException ignored) {
                ConcurrentLog.warn("UNTAR", "InputStream could not be closed");
            }
        }
    } else { // untarDir doesn't exist
        ConcurrentLog.warn("UNTAR", "destination " + untarDir + " doesn't exist.");
        /* Still have to close the input stream */
        try {
            in.close();
        } catch (IOException ignored) {
            ConcurrentLog.warn("UNTAR", "InputStream could not be closed");
        }
        throw new FileNotFoundException("Output untar directory not found : " + untarDir);
    }
    ConcurrentLog.info("UNTAR", "finished");
}

From source file:net.zyuiop.remoteworldloader.utils.CompressionUtils.java

public static void uncompressArchive(File archive, File target) throws IOException, CompressorException {
    CompressorInputStream compressor = new GzipCompressorInputStream(new FileInputStream(archive));
    TarArchiveInputStream stream = new TarArchiveInputStream(compressor);

    TarArchiveEntry entry;//www .ja v a2  s.  c  o m
    while ((entry = stream.getNextTarEntry()) != null) {
        File f = new File(target.getCanonicalPath(), entry.getName());
        if (f.exists()) {
            Bukkit.getLogger().warning("The file " + f.getCanonicalPath() + " already exists, deleting it.");
            if (!f.delete()) {
                Bukkit.getLogger().warning("Cannot remove, skipping file.");
            }
        }

        if (entry.isDirectory()) {
            f.mkdirs();
            continue;
        }

        f.getParentFile().mkdirs();
        f.createNewFile();

        try {
            try (OutputStream fos = new BufferedOutputStream(new FileOutputStream(f))) {
                final byte[] buf = new byte[8192];
                int bytesRead;
                while (-1 != (bytesRead = stream.read(buf)))
                    fos.write(buf, 0, bytesRead);
            }
            Bukkit.getLogger().info("Extracted file " + f.getName() + "...");
        } catch (IOException ioe) {
            f.delete();
            throw ioe;
        }
    }
}

From source file:org.apache.camel.dataformat.tarfile.TarFileDataFormat.java

@Override
public Object unmarshal(Exchange exchange, InputStream stream) throws Exception {
    if (usingIterator) {
        return new TarIterator(exchange.getIn(), stream);
    } else {//from  w ww .  ja v  a2 s  . com
        InputStream is = exchange.getIn().getMandatoryBody(InputStream.class);
        TarArchiveInputStream tis = (TarArchiveInputStream) new ArchiveStreamFactory()
                .createArchiveInputStream(ArchiveStreamFactory.TAR, new BufferedInputStream(is));
        ByteArrayOutputStream baos = new ByteArrayOutputStream();

        try {
            TarArchiveEntry entry = tis.getNextTarEntry();
            if (entry != null) {
                exchange.getOut().setHeader(FILE_NAME, entry.getName());
                IOHelper.copy(tis, baos);
            }

            entry = tis.getNextTarEntry();
            if (entry != null) {
                throw new IllegalStateException("Tar file has more than 1 entry.");
            }

            return baos.toByteArray();

        } finally {
            IOHelper.close(tis, baos);
        }
    }
}

From source file:org.apache.camel.processor.aggregate.tarfile.AggregationStrategyWithFilenameHeaderTest.java

@Test
public void testSplitter() throws Exception {
    MockEndpoint mock = getMockEndpoint("mock:aggregateToTarEntry");
    mock.expectedMessageCount(1);/*w w  w  . j a va 2  s  .  c  om*/

    template.setDefaultEndpointUri("direct:start");
    template.sendBodyAndHeader("foo", Exchange.FILE_NAME, FILE_NAMES.get(0));
    template.sendBodyAndHeader("bar", Exchange.FILE_NAME, FILE_NAMES.get(1));
    assertMockEndpointsSatisfied();

    Thread.sleep(500);

    File[] files = new File("target/out").listFiles();
    assertTrue(files != null);
    assertTrue("Should be a file in target/out directory", files.length > 0);

    File resultFile = files[0];

    final TarArchiveInputStream tis = (TarArchiveInputStream) new ArchiveStreamFactory()
            .createArchiveInputStream(ArchiveStreamFactory.TAR,
                    new BufferedInputStream(new FileInputStream(resultFile)));
    try {
        int fileCount = 0;
        for (TarArchiveEntry entry = tis.getNextTarEntry(); entry != null; entry = tis.getNextTarEntry()) {
            fileCount++;
            assertTrue("Tar entry file name should be on of: " + FILE_NAMES,
                    FILE_NAMES.contains(entry.getName()));
        }
        assertEquals("Tar file should contain " + FILE_NAMES.size() + " files", FILE_NAMES.size(), fileCount);
    } finally {
        IOHelper.close(tis);
    }
}

From source file:org.apache.camel.processor.aggregate.tarfile.AggregationStrategyWithPreservationTest.java

@Test
public void testSplitter() throws Exception {
    MockEndpoint mock = getMockEndpoint("mock:aggregateToTarEntry");
    mock.expectedMessageCount(1);//from  w w  w.  j a  v a 2  s  . com

    assertMockEndpointsSatisfied();

    Thread.sleep(500);

    File[] files = new File("target/out").listFiles();
    assertTrue("Should be a file in target/out directory", files.length > 0);

    File resultFile = files[0];
    Set<String> expectedTarFiles = new HashSet<String>(
            Arrays.asList("another/hello.txt", "other/greetings.txt", "chiau.txt", "hi.txt", "hola.txt"));
    TarArchiveInputStream tin = new TarArchiveInputStream(new FileInputStream(resultFile));
    try {
        int fileCount = 0;
        for (TarArchiveEntry te = tin.getNextTarEntry(); te != null; te = tin.getNextTarEntry()) {
            expectedTarFiles.remove(te.getName());
            fileCount++;
        }
        assertTrue("Tar file should contains " + AggregationStrategyWithPreservationTest.EXPECTED_NO_FILES
                + " files", fileCount == AggregationStrategyWithPreservationTest.EXPECTED_NO_FILES);
        assertEquals("Should have found all of the tar files in the file.", 0, expectedTarFiles.size());
    } finally {
        IOHelper.close(tin);
    }
}

From source file:org.apache.camel.processor.aggregate.tarfile.TarAggregationStrategyTest.java

@Test
public void testSplitter() throws Exception {
    MockEndpoint mock = getMockEndpoint("mock:aggregateToTarEntry");
    mock.expectedMessageCount(1);//w  ww  .j a va2s  .c o m
    mock.expectedHeaderReceived("foo", "bar");

    assertMockEndpointsSatisfied();

    Thread.sleep(500);

    File[] files = new File("target/out").listFiles();
    assertTrue(files != null);
    assertTrue("Should be a file in target/out directory", files.length > 0);

    File resultFile = files[0];

    TarArchiveInputStream tin = new TarArchiveInputStream(new FileInputStream(resultFile));
    try {
        int fileCount = 0;
        for (TarArchiveEntry te = tin.getNextTarEntry(); te != null; te = tin.getNextTarEntry()) {
            fileCount = fileCount + 1;
        }
        assertEquals("Tar file should contains " + TarAggregationStrategyTest.EXPECTED_NO_FILES + " files",
                TarAggregationStrategyTest.EXPECTED_NO_FILES, fileCount);
    } finally {
        IOHelper.close(tin);
    }
}

From source file:org.apache.flume.test.util.StagedInstall.java

private void untarTarFile(File tarFile, File destDir) throws Exception {
    TarArchiveInputStream tarInputStream = null;
    try {//from  ww w .j  a  va 2  s  . c  o m
        tarInputStream = new TarArchiveInputStream(new FileInputStream(tarFile));
        TarArchiveEntry entry = null;
        while ((entry = tarInputStream.getNextTarEntry()) != null) {
            String name = entry.getName();
            LOGGER.debug("Next file: " + name);
            File destFile = new File(destDir, entry.getName());
            if (entry.isDirectory()) {
                destFile.mkdirs();
                continue;
            }
            File destParent = destFile.getParentFile();
            destParent.mkdirs();
            OutputStream entryOutputStream = null;
            try {
                entryOutputStream = new FileOutputStream(destFile);
                byte[] buffer = new byte[2048];
                int length = 0;
                while ((length = tarInputStream.read(buffer, 0, 2048)) != -1) {
                    entryOutputStream.write(buffer, 0, length);
                }
            } catch (Exception ex) {
                LOGGER.error("Exception while expanding tar file", ex);
                throw ex;
            } finally {
                if (entryOutputStream != null) {
                    try {
                        entryOutputStream.close();
                    } catch (Exception ex) {
                        LOGGER.warn("Failed to close entry output stream", ex);
                    }
                }
            }
        }
    } catch (Exception ex) {
        LOGGER.error("Exception caught while untarring tar file: " + tarFile.getAbsolutePath(), ex);
        throw ex;
    } finally {
        if (tarInputStream != null) {
            try {
                tarInputStream.close();
            } catch (Exception ex) {
                LOGGER.warn("Unable to close tar input stream: " + tarFile.getCanonicalPath(), ex);
            }
        }
    }

}

From source file:org.apache.gobblin.data.management.copy.writer.TarArchiveInputStreamDataWriter.java

/**
 * Untars the passed in {@link FileAwareInputStream} to the task's staging directory. Uses the name of the root
 * {@link TarArchiveEntry} in the stream as the directory name for the untarred file. The method also commits the data
 * by moving the file from staging to output directory.
 *
 * @see org.apache.gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter#write(org.apache.gobblin.data.management.copy.FileAwareInputStream)
 *//*from   ww w  .j ava 2s.  com*/
@Override
public void writeImpl(InputStream inputStream, Path writeAt, CopyableFile copyableFile) throws IOException {
    this.closer.register(inputStream);

    TarArchiveInputStream tarIn = new TarArchiveInputStream(inputStream);
    final ReadableByteChannel inputChannel = Channels.newChannel(tarIn);
    TarArchiveEntry tarEntry;

    // flush the first entry in the tar, which is just the root directory
    tarEntry = tarIn.getNextTarEntry();
    String tarEntryRootName = StringUtils.remove(tarEntry.getName(), Path.SEPARATOR);

    log.info("Unarchiving at " + writeAt);

    try {
        while ((tarEntry = tarIn.getNextTarEntry()) != null) {

            // the API tarEntry.getName() is misleading, it is actually the path of the tarEntry in the tar file
            String newTarEntryPath = tarEntry.getName().replace(tarEntryRootName, writeAt.getName());
            Path tarEntryStagingPath = new Path(writeAt.getParent(), newTarEntryPath);
            if (!FileUtils.isSubPath(writeAt.getParent(), tarEntryStagingPath)) {
                throw new IOException(
                        String.format("Extracted file: %s is trying to write outside of output directory: %s",
                                tarEntryStagingPath, writeAt.getParent()));
            }

            if (tarEntry.isDirectory() && !this.fs.exists(tarEntryStagingPath)) {
                this.fs.mkdirs(tarEntryStagingPath);
            } else if (!tarEntry.isDirectory()) {
                FSDataOutputStream out = this.fs.create(tarEntryStagingPath, true);
                final WritableByteChannel outputChannel = Channels.newChannel(out);
                try {
                    StreamCopier copier = new StreamCopier(inputChannel, outputChannel);
                    if (isInstrumentationEnabled()) {
                        copier.withCopySpeedMeter(this.copySpeedMeter);
                    }
                    this.bytesWritten.addAndGet(copier.copy());
                    if (isInstrumentationEnabled()) {
                        log.info("File {}: copied {} bytes, average rate: {} B/s",
                                copyableFile.getOrigin().getPath(), this.copySpeedMeter.getCount(),
                                this.copySpeedMeter.getMeanRate());
                    } else {
                        log.info("File {} copied.", copyableFile.getOrigin().getPath());
                    }
                } finally {
                    out.close();
                    outputChannel.close();
                }
            }
        }
    } finally {
        tarIn.close();
        inputChannel.close();
        inputStream.close();
    }
}

From source file:org.apache.kylin.common.util.TarGZUtil.java

public static void uncompressTarGZ(File tarFile, File dest) throws IOException {
    dest.mkdir();/*  w  w w  .  ja v a  2 s.  c  om*/
    TarArchiveInputStream tarIn = null;

    tarIn = new TarArchiveInputStream(
            new GzipCompressorInputStream(new BufferedInputStream(new FileInputStream(tarFile))));

    TarArchiveEntry tarEntry = tarIn.getNextTarEntry();
    // tarIn is a TarArchiveInputStream
    while (tarEntry != null) {// create a file with the same name as the tarEntry
        File destPath = new File(dest, tarEntry.getName());
        System.out.println("working: " + destPath.getCanonicalPath());
        if (tarEntry.isDirectory()) {
            destPath.mkdirs();
        } else {
            destPath.createNewFile();
            //byte [] btoRead = new byte[(int)tarEntry.getSize()];
            byte[] btoRead = new byte[1024];
            //FileInputStream fin 
            //  = new FileInputStream(destPath.getCanonicalPath());
            BufferedOutputStream bout = new BufferedOutputStream(new FileOutputStream(destPath));
            int len = 0;

            while ((len = tarIn.read(btoRead)) != -1) {
                bout.write(btoRead, 0, len);
            }

            bout.close();
            btoRead = null;

        }
        tarEntry = tarIn.getNextTarEntry();
    }
    tarIn.close();
}