Example usage for org.apache.commons.compress.archivers.tar TarArchiveEntry setSize

Introduction

In this page you can find the example usage for org.apache.commons.compress.archivers.tar TarArchiveEntry setSize.

Prototype

public void setSize(long size)

Source Link

Document

Set this entry's file size.

Usage

From source file:org.apache.camel.processor.aggregate.TarAggregationStrategy.java

@Override
public void onCompletion(Exchange exchange) {
    List<Exchange> list = exchange.getProperty(Exchange.GROUPED_EXCHANGE, List.class);
    try {//from www .j  a  v a  2 s.  c o  m
        ByteArrayOutputStream bout = new ByteArrayOutputStream();
        TarArchiveOutputStream tout = new TarArchiveOutputStream(bout);
        for (Exchange item : list) {
            String name = item.getProperty(TAR_ENTRY_NAME,
                    item.getProperty(Exchange.FILE_NAME, item.getExchangeId(), String.class), String.class);
            byte[] body = item.getIn().getBody(byte[].class);
            TarArchiveEntry entry = new TarArchiveEntry(name);
            entry.setSize(body.length);
            tout.putArchiveEntry(entry);
            tout.write(body);
            tout.closeArchiveEntry();
        }
        tout.close();
        exchange.getIn().setBody(bout.toByteArray());
        exchange.removeProperty(Exchange.GROUPED_EXCHANGE);
    } catch (Exception e) {
        throw new RuntimeException("Unable to tar exchanges!", e);
    }
}

From source file:org.apache.camel.processor.aggregate.tarfile.TarAggregationStrategy.java

private static void addFileToTar(File source, File file, String fileName) throws IOException, ArchiveException {
    File tmpTar = File.createTempFile(source.getName(), null);
    tmpTar.delete();/*from w ww  .j  a  v  a 2s  . c om*/
    if (!source.renameTo(tmpTar)) {
        throw new IOException("Could not make temp file (" + source.getName() + ")");
    }

    TarArchiveInputStream tin = (TarArchiveInputStream) new ArchiveStreamFactory()
            .createArchiveInputStream(ArchiveStreamFactory.TAR, new FileInputStream(tmpTar));
    TarArchiveOutputStream tos = new TarArchiveOutputStream(new FileOutputStream(source));
    tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX);
    tos.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX);

    InputStream in = new FileInputStream(file);

    // copy the existing entries    
    ArchiveEntry nextEntry;
    while ((nextEntry = tin.getNextEntry()) != null) {
        tos.putArchiveEntry(nextEntry);
        IOUtils.copy(tin, tos);
        tos.closeArchiveEntry();
    }

    // Add the new entry
    TarArchiveEntry entry = new TarArchiveEntry(fileName == null ? file.getName() : fileName);
    entry.setSize(file.length());
    tos.putArchiveEntry(entry);
    IOUtils.copy(in, tos);
    tos.closeArchiveEntry();

    IOHelper.close(in);
    IOHelper.close(tin);
    IOHelper.close(tos);
}

From source file:org.apache.camel.processor.aggregate.tarfile.TarAggregationStrategy.java

private static void addEntryToTar(File source, String entryName, byte[] buffer, int length)
        throws IOException, ArchiveException {
    File tmpTar = File.createTempFile(source.getName(), null);
    tmpTar.delete();//from   w  w w. j  av  a2 s.  c  o m
    if (!source.renameTo(tmpTar)) {
        throw new IOException("Cannot create temp file: " + source.getName());
    }
    TarArchiveInputStream tin = (TarArchiveInputStream) new ArchiveStreamFactory()
            .createArchiveInputStream(ArchiveStreamFactory.TAR, new FileInputStream(tmpTar));
    TarArchiveOutputStream tos = new TarArchiveOutputStream(new FileOutputStream(source));
    tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX);
    tos.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX);

    // copy the existing entries    
    ArchiveEntry nextEntry;
    while ((nextEntry = tin.getNextEntry()) != null) {
        tos.putArchiveEntry(nextEntry);
        IOUtils.copy(tin, tos);
        tos.closeArchiveEntry();
    }

    // Create new entry
    TarArchiveEntry entry = new TarArchiveEntry(entryName);
    entry.setSize(length);
    tos.putArchiveEntry(entry);
    tos.write(buffer, 0, length);
    tos.closeArchiveEntry();

    IOHelper.close(tin);
    IOHelper.close(tos);
}

From source file:org.apache.hadoop.yarn.util.TestFSDownload.java

static LocalResource createTarFile(FileContext files, Path p, int len, Random r, LocalResourceVisibility vis)
        throws IOException, URISyntaxException {
    byte[] bytes = new byte[len];
    r.nextBytes(bytes);// w  w w.  j ava2  s. c om

    File archiveFile = new File(p.toUri().getPath() + ".tar");
    archiveFile.createNewFile();
    TarArchiveOutputStream out = new TarArchiveOutputStream(new FileOutputStream(archiveFile));
    TarArchiveEntry entry = new TarArchiveEntry(p.getName());
    entry.setSize(bytes.length);
    out.putArchiveEntry(entry);
    out.write(bytes);
    out.closeArchiveEntry();
    out.close();

    LocalResource ret = recordFactory.newRecordInstance(LocalResource.class);
    ret.setResource(URL.fromPath(new Path(p.toString() + ".tar")));
    ret.setSize(len);
    ret.setType(LocalResourceType.ARCHIVE);
    ret.setVisibility(vis);
    ret.setTimestamp(files.getFileStatus(new Path(p.toString() + ".tar")).getModificationTime());
    return ret;
}

From source file:org.apache.hadoop.yarn.util.TestFSDownload.java

static LocalResource createTgzFile(FileContext files, Path p, int len, Random r, LocalResourceVisibility vis)
        throws IOException, URISyntaxException {
    byte[] bytes = new byte[len];
    r.nextBytes(bytes);/* w  w  w.ja  v  a  2s  . co m*/

    File gzipFile = new File(p.toUri().getPath() + ".tar.gz");
    gzipFile.createNewFile();
    TarArchiveOutputStream out = new TarArchiveOutputStream(
            new GZIPOutputStream(new FileOutputStream(gzipFile)));
    TarArchiveEntry entry = new TarArchiveEntry(p.getName());
    entry.setSize(bytes.length);
    out.putArchiveEntry(entry);
    out.write(bytes);
    out.closeArchiveEntry();
    out.close();

    LocalResource ret = recordFactory.newRecordInstance(LocalResource.class);
    ret.setResource(URL.fromPath(new Path(p.toString() + ".tar.gz")));
    ret.setSize(len);
    ret.setType(LocalResourceType.ARCHIVE);
    ret.setVisibility(vis);
    ret.setTimestamp(files.getFileStatus(new Path(p.toString() + ".tar.gz")).getModificationTime());
    return ret;
}

From source file:org.apache.karaf.tooling.ArchiveMojo.java

private void addFileToTarGz(TarArchiveOutputStream tOut, Path f, String base) throws IOException {
    if (Files.isDirectory(f)) {
        String entryName = base + f.getFileName().toString() + "/";
        TarArchiveEntry tarEntry = new TarArchiveEntry(entryName);
        tOut.putArchiveEntry(tarEntry);//w w w  .  ja  v  a 2  s .  c o m
        tOut.closeArchiveEntry();
        try (DirectoryStream<Path> children = Files.newDirectoryStream(f)) {
            for (Path child : children) {
                addFileToTarGz(tOut, child, entryName);
            }
        }
    } else if (useSymLinks && Files.isSymbolicLink(f)) {
        String entryName = base + f.getFileName().toString();
        TarArchiveEntry tarEntry = new TarArchiveEntry(entryName, TarConstants.LF_SYMLINK);
        tarEntry.setLinkName(Files.readSymbolicLink(f).toString());
        tOut.putArchiveEntry(tarEntry);
        tOut.closeArchiveEntry();
    } else {
        String entryName = base + f.getFileName().toString();
        TarArchiveEntry tarEntry = new TarArchiveEntry(entryName);
        tarEntry.setSize(Files.size(f));
        if (entryName.contains("/bin/") || (!usePathPrefix && entryName.startsWith("bin/"))) {
            if (entryName.endsWith(".bat")) {
                tarEntry.setMode(0644);
            } else {
                tarEntry.setMode(0755);
            }
        }
        tOut.putArchiveEntry(tarEntry);
        Files.copy(f, tOut);
        tOut.closeArchiveEntry();
    }
}

From source file:org.apache.nifi.cluster.flow.impl.DataFlowDaoImpl.java

private void writeTarEntry(final TarArchiveOutputStream tarOut, final String filename, final byte[] bytes)
        throws IOException {
    final TarArchiveEntry flowEntry = new TarArchiveEntry(filename);
    flowEntry.setSize(bytes.length);
    tarOut.putArchiveEntry(flowEntry);//from  ww w  .  j av  a  2s .  c  om
    tarOut.write(bytes);
    tarOut.closeArchiveEntry();
}

From source file:org.apache.nifi.util.FlowFilePackagerV1.java

private void writeAttributesEntry(final Map<String, String> attributes, final TarArchiveOutputStream tout)
        throws IOException {
    final StringBuilder sb = new StringBuilder();
    sb.append(/* w w w  .j  a va2  s.c  om*/
            "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE properties\n  SYSTEM \"http://java.sun.com/dtd/properties.dtd\">\n");
    sb.append("<properties>");
    for (final Map.Entry<String, String> entry : attributes.entrySet()) {
        final String escapedKey = StringEscapeUtils.escapeXml11(entry.getKey());
        final String escapedValue = StringEscapeUtils.escapeXml11(entry.getValue());
        sb.append("\n  <entry key=\"").append(escapedKey).append("\">").append(escapedValue).append("</entry>");
    }
    sb.append("</properties>");

    final byte[] metaBytes = sb.toString().getBytes(StandardCharsets.UTF_8);
    final TarArchiveEntry attribEntry = new TarArchiveEntry(FILENAME_ATTRIBUTES);
    attribEntry.setMode(tarPermissions);
    attribEntry.setSize(metaBytes.length);
    tout.putArchiveEntry(attribEntry);
    tout.write(metaBytes);
    tout.closeArchiveEntry();
}

From source file:org.apache.nifi.util.FlowFilePackagerV1.java

private void writeContentEntry(final TarArchiveOutputStream tarOut, final InputStream inStream,
        final long fileSize) throws IOException {
    final TarArchiveEntry entry = new TarArchiveEntry(FILENAME_CONTENT);
    entry.setMode(tarPermissions);//from  w  ww . j  ava2 s  .  c o  m
    entry.setSize(fileSize);
    tarOut.putArchiveEntry(entry);
    final byte[] buffer = new byte[512 << 10];//512KB
    int bytesRead = 0;
    while ((bytesRead = inStream.read(buffer)) != -1) { //still more data to read
        if (bytesRead > 0) {
            tarOut.write(buffer, 0, bytesRead);
        }
    }

    copy(inStream, tarOut);
    tarOut.closeArchiveEntry();
}

From source file:org.apache.nutch.tools.CommonCrawlDataDumper.java

/**
 * Dumps the reverse engineered CBOR content from the provided segment
 * directories if a parent directory contains more than one segment,
 * otherwise a single segment can be passed as an argument. If the boolean
 * argument is provided then the CBOR is also zipped.
 *
 * @param outputDir      the directory you wish to dump the raw content to. This
 *                       directory will be created.
 * @param segmentRootDir a directory containing one or more segments.
 * @param linkdb         Path to linkdb.
 * @param gzip           a boolean flag indicating whether the CBOR content should also
 *                       be gzipped.//from  w w w.j  ava  2s .  com
 * @param epochFilename  if {@code true}, output files will be names using the epoch time (in milliseconds).
 * @param extension      a file extension to use with output documents.
 * @throws Exception if any exception occurs.
 */
public void dump(File outputDir, File segmentRootDir, File linkdb, boolean gzip, String[] mimeTypes,
        boolean epochFilename, String extension, boolean warc) throws Exception {
    if (gzip) {
        LOG.info("Gzipping CBOR data has been skipped");
    }
    // total file counts
    Map<String, Integer> typeCounts = new HashMap<>();
    // filtered file counters
    Map<String, Integer> filteredCounts = new HashMap<>();

    Configuration nutchConfig = NutchConfiguration.create();
    Path segmentRootPath = new Path(segmentRootDir.toString());
    FileSystem fs = segmentRootPath.getFileSystem(nutchConfig);

    //get all paths
    List<Path> parts = new ArrayList<>();
    RemoteIterator<LocatedFileStatus> files = fs.listFiles(segmentRootPath, true);
    String partPattern = ".*" + File.separator + Content.DIR_NAME + File.separator + "part-[0-9]{5}"
            + File.separator + "data";
    while (files.hasNext()) {
        LocatedFileStatus next = files.next();
        if (next.isFile()) {
            Path path = next.getPath();
            if (path.toString().matches(partPattern)) {
                parts.add(path);
            }
        }
    }

    LinkDbReader linkDbReader = null;
    if (linkdb != null) {
        linkDbReader = new LinkDbReader(nutchConfig, new Path(linkdb.toString()));
    }
    if (parts == null || parts.size() == 0) {
        LOG.error("No segment directories found in {} ", segmentRootDir.getAbsolutePath());
        System.exit(1);
    }
    LOG.info("Found {} segment parts", parts.size());
    if (gzip && !warc) {
        fileList = new ArrayList<>();
        constructNewStream(outputDir);
    }

    for (Path segmentPart : parts) {
        LOG.info("Processing segment Part : [ {} ]", segmentPart);
        try {
            SequenceFile.Reader reader = new SequenceFile.Reader(nutchConfig,
                    SequenceFile.Reader.file(segmentPart));

            Writable key = (Writable) reader.getKeyClass().newInstance();

            Content content = null;
            while (reader.next(key)) {
                content = new Content();
                reader.getCurrentValue(content);
                Metadata metadata = content.getMetadata();
                String url = key.toString();

                String baseName = FilenameUtils.getBaseName(url);
                String extensionName = FilenameUtils.getExtension(url);

                if (!extension.isEmpty()) {
                    extensionName = extension;
                } else if ((extensionName == null) || extensionName.isEmpty()) {
                    extensionName = "html";
                }

                String outputFullPath = null;
                String outputRelativePath = null;
                String filename = null;
                String timestamp = null;
                String reverseKey = null;

                if (epochFilename || config.getReverseKey()) {
                    try {
                        long epoch = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z")
                                .parse(getDate(metadata.get("Date"))).getTime();
                        timestamp = String.valueOf(epoch);
                    } catch (ParseException pe) {
                        LOG.warn(pe.getMessage());
                    }

                    reverseKey = reverseUrl(url);
                    config.setReverseKeyValue(
                            reverseKey.replace("/", "_") + "_" + DigestUtils.sha1Hex(url) + "_" + timestamp);
                }

                if (!warc) {
                    if (epochFilename) {
                        outputFullPath = DumpFileUtil.createFileNameFromUrl(outputDir.getAbsolutePath(),
                                reverseKey, url, timestamp, extensionName, !gzip);
                        outputRelativePath = outputFullPath.substring(0,
                                outputFullPath.lastIndexOf(File.separator) - 1);
                        filename = content.getMetadata().get(Metadata.DATE) + "." + extensionName;
                    } else {
                        String md5Ofurl = DumpFileUtil.getUrlMD5(url);
                        String fullDir = DumpFileUtil.createTwoLevelsDirectory(outputDir.getAbsolutePath(),
                                md5Ofurl, !gzip);
                        filename = DumpFileUtil.createFileName(md5Ofurl, baseName, extensionName);
                        outputFullPath = String.format("%s/%s", fullDir, filename);

                        String[] fullPathLevels = fullDir.split(Pattern.quote(File.separator));
                        String firstLevelDirName = fullPathLevels[fullPathLevels.length - 2];
                        String secondLevelDirName = fullPathLevels[fullPathLevels.length - 1];
                        outputRelativePath = firstLevelDirName + secondLevelDirName;
                    }
                }
                // Encode all filetypes if no mimetypes have been given
                Boolean filter = (mimeTypes == null);

                String jsonData = "";
                try {
                    String mimeType = new Tika().detect(content.getContent());
                    // Maps file to JSON-based structure

                    Set<String> inUrls = null; //there may be duplicates, so using set
                    if (linkDbReader != null) {
                        Inlinks inlinks = linkDbReader.getInlinks((Text) key);
                        if (inlinks != null) {
                            Iterator<Inlink> iterator = inlinks.iterator();
                            inUrls = new LinkedHashSet<>();
                            while (inUrls.size() <= MAX_INLINKS && iterator.hasNext()) {
                                inUrls.add(iterator.next().getFromUrl());
                            }
                        }
                    }
                    //TODO: Make this Jackson Format implementation reusable
                    try (CommonCrawlFormat format = CommonCrawlFormatFactory
                            .getCommonCrawlFormat(warc ? "WARC" : "JACKSON", nutchConfig, config)) {
                        if (inUrls != null) {
                            format.setInLinks(new ArrayList<>(inUrls));
                        }
                        jsonData = format.getJsonData(url, content, metadata);
                    }

                    collectStats(typeCounts, mimeType);
                    // collects statistics for the given mimetypes
                    if ((mimeType != null) && (mimeTypes != null)
                            && Arrays.asList(mimeTypes).contains(mimeType)) {
                        collectStats(filteredCounts, mimeType);
                        filter = true;
                    }
                } catch (IOException ioe) {
                    LOG.error("Fatal error in creating JSON data: " + ioe.getMessage());
                    return;
                }

                if (!warc) {
                    if (filter) {
                        byte[] byteData = serializeCBORData(jsonData);

                        if (!gzip) {
                            File outputFile = new File(outputFullPath);
                            if (outputFile.exists()) {
                                LOG.info("Skipping writing: [" + outputFullPath + "]: file already exists");
                            } else {
                                LOG.info("Writing: [" + outputFullPath + "]");
                                IOUtils.copy(new ByteArrayInputStream(byteData),
                                        new FileOutputStream(outputFile));
                            }
                        } else {
                            if (fileList.contains(outputFullPath)) {
                                LOG.info("Skipping compressing: [" + outputFullPath + "]: file already exists");
                            } else {
                                fileList.add(outputFullPath);
                                LOG.info("Compressing: [" + outputFullPath + "]");
                                //TarArchiveEntry tarEntry = new TarArchiveEntry(firstLevelDirName + File.separator + secondLevelDirName + File.separator + filename);
                                TarArchiveEntry tarEntry = new TarArchiveEntry(
                                        outputRelativePath + File.separator + filename);
                                tarEntry.setSize(byteData.length);
                                tarOutput.putArchiveEntry(tarEntry);
                                tarOutput.write(byteData);
                                tarOutput.closeArchiveEntry();
                            }
                        }
                    }
                }
            }
            reader.close();
        } catch (Exception e) {
            LOG.warn("SKIPPED: {} Because : {}", segmentPart, e.getMessage());
        } finally {
            fs.close();
        }
    }

    if (gzip && !warc) {
        closeStream();
    }

    if (!typeCounts.isEmpty()) {
        LOG.info("CommonsCrawlDataDumper File Stats: "
                + DumpFileUtil.displayFileTypes(typeCounts, filteredCounts));
    }

}