List of usage examples for org.apache.commons.compress.archivers.tar TarArchiveEntry setSize
public void setSize(long size)
From source file:org.apache.camel.processor.aggregate.TarAggregationStrategy.java
@Override public void onCompletion(Exchange exchange) { List<Exchange> list = exchange.getProperty(Exchange.GROUPED_EXCHANGE, List.class); try {//from www .j a v a 2 s. c o m ByteArrayOutputStream bout = new ByteArrayOutputStream(); TarArchiveOutputStream tout = new TarArchiveOutputStream(bout); for (Exchange item : list) { String name = item.getProperty(TAR_ENTRY_NAME, item.getProperty(Exchange.FILE_NAME, item.getExchangeId(), String.class), String.class); byte[] body = item.getIn().getBody(byte[].class); TarArchiveEntry entry = new TarArchiveEntry(name); entry.setSize(body.length); tout.putArchiveEntry(entry); tout.write(body); tout.closeArchiveEntry(); } tout.close(); exchange.getIn().setBody(bout.toByteArray()); exchange.removeProperty(Exchange.GROUPED_EXCHANGE); } catch (Exception e) { throw new RuntimeException("Unable to tar exchanges!", e); } }
From source file:org.apache.camel.processor.aggregate.tarfile.TarAggregationStrategy.java
private static void addFileToTar(File source, File file, String fileName) throws IOException, ArchiveException { File tmpTar = File.createTempFile(source.getName(), null); tmpTar.delete();/*from w ww .j a v a 2s . c om*/ if (!source.renameTo(tmpTar)) { throw new IOException("Could not make temp file (" + source.getName() + ")"); } TarArchiveInputStream tin = (TarArchiveInputStream) new ArchiveStreamFactory() .createArchiveInputStream(ArchiveStreamFactory.TAR, new FileInputStream(tmpTar)); TarArchiveOutputStream tos = new TarArchiveOutputStream(new FileOutputStream(source)); tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX); tos.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX); InputStream in = new FileInputStream(file); // copy the existing entries ArchiveEntry nextEntry; while ((nextEntry = tin.getNextEntry()) != null) { tos.putArchiveEntry(nextEntry); IOUtils.copy(tin, tos); tos.closeArchiveEntry(); } // Add the new entry TarArchiveEntry entry = new TarArchiveEntry(fileName == null ? file.getName() : fileName); entry.setSize(file.length()); tos.putArchiveEntry(entry); IOUtils.copy(in, tos); tos.closeArchiveEntry(); IOHelper.close(in); IOHelper.close(tin); IOHelper.close(tos); }
From source file:org.apache.camel.processor.aggregate.tarfile.TarAggregationStrategy.java
private static void addEntryToTar(File source, String entryName, byte[] buffer, int length) throws IOException, ArchiveException { File tmpTar = File.createTempFile(source.getName(), null); tmpTar.delete();//from w w w. j av a2 s. c o m if (!source.renameTo(tmpTar)) { throw new IOException("Cannot create temp file: " + source.getName()); } TarArchiveInputStream tin = (TarArchiveInputStream) new ArchiveStreamFactory() .createArchiveInputStream(ArchiveStreamFactory.TAR, new FileInputStream(tmpTar)); TarArchiveOutputStream tos = new TarArchiveOutputStream(new FileOutputStream(source)); tos.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX); tos.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX); // copy the existing entries ArchiveEntry nextEntry; while ((nextEntry = tin.getNextEntry()) != null) { tos.putArchiveEntry(nextEntry); IOUtils.copy(tin, tos); tos.closeArchiveEntry(); } // Create new entry TarArchiveEntry entry = new TarArchiveEntry(entryName); entry.setSize(length); tos.putArchiveEntry(entry); tos.write(buffer, 0, length); tos.closeArchiveEntry(); IOHelper.close(tin); IOHelper.close(tos); }
From source file:org.apache.hadoop.yarn.util.TestFSDownload.java
static LocalResource createTarFile(FileContext files, Path p, int len, Random r, LocalResourceVisibility vis) throws IOException, URISyntaxException { byte[] bytes = new byte[len]; r.nextBytes(bytes);// w w w. j ava2 s. c om File archiveFile = new File(p.toUri().getPath() + ".tar"); archiveFile.createNewFile(); TarArchiveOutputStream out = new TarArchiveOutputStream(new FileOutputStream(archiveFile)); TarArchiveEntry entry = new TarArchiveEntry(p.getName()); entry.setSize(bytes.length); out.putArchiveEntry(entry); out.write(bytes); out.closeArchiveEntry(); out.close(); LocalResource ret = recordFactory.newRecordInstance(LocalResource.class); ret.setResource(URL.fromPath(new Path(p.toString() + ".tar"))); ret.setSize(len); ret.setType(LocalResourceType.ARCHIVE); ret.setVisibility(vis); ret.setTimestamp(files.getFileStatus(new Path(p.toString() + ".tar")).getModificationTime()); return ret; }
From source file:org.apache.hadoop.yarn.util.TestFSDownload.java
static LocalResource createTgzFile(FileContext files, Path p, int len, Random r, LocalResourceVisibility vis) throws IOException, URISyntaxException { byte[] bytes = new byte[len]; r.nextBytes(bytes);/* w w w.ja v a 2s . co m*/ File gzipFile = new File(p.toUri().getPath() + ".tar.gz"); gzipFile.createNewFile(); TarArchiveOutputStream out = new TarArchiveOutputStream( new GZIPOutputStream(new FileOutputStream(gzipFile))); TarArchiveEntry entry = new TarArchiveEntry(p.getName()); entry.setSize(bytes.length); out.putArchiveEntry(entry); out.write(bytes); out.closeArchiveEntry(); out.close(); LocalResource ret = recordFactory.newRecordInstance(LocalResource.class); ret.setResource(URL.fromPath(new Path(p.toString() + ".tar.gz"))); ret.setSize(len); ret.setType(LocalResourceType.ARCHIVE); ret.setVisibility(vis); ret.setTimestamp(files.getFileStatus(new Path(p.toString() + ".tar.gz")).getModificationTime()); return ret; }
From source file:org.apache.karaf.tooling.ArchiveMojo.java
private void addFileToTarGz(TarArchiveOutputStream tOut, Path f, String base) throws IOException { if (Files.isDirectory(f)) { String entryName = base + f.getFileName().toString() + "/"; TarArchiveEntry tarEntry = new TarArchiveEntry(entryName); tOut.putArchiveEntry(tarEntry);//w w w . ja v a 2 s . c o m tOut.closeArchiveEntry(); try (DirectoryStream<Path> children = Files.newDirectoryStream(f)) { for (Path child : children) { addFileToTarGz(tOut, child, entryName); } } } else if (useSymLinks && Files.isSymbolicLink(f)) { String entryName = base + f.getFileName().toString(); TarArchiveEntry tarEntry = new TarArchiveEntry(entryName, TarConstants.LF_SYMLINK); tarEntry.setLinkName(Files.readSymbolicLink(f).toString()); tOut.putArchiveEntry(tarEntry); tOut.closeArchiveEntry(); } else { String entryName = base + f.getFileName().toString(); TarArchiveEntry tarEntry = new TarArchiveEntry(entryName); tarEntry.setSize(Files.size(f)); if (entryName.contains("/bin/") || (!usePathPrefix && entryName.startsWith("bin/"))) { if (entryName.endsWith(".bat")) { tarEntry.setMode(0644); } else { tarEntry.setMode(0755); } } tOut.putArchiveEntry(tarEntry); Files.copy(f, tOut); tOut.closeArchiveEntry(); } }
From source file:org.apache.nifi.cluster.flow.impl.DataFlowDaoImpl.java
private void writeTarEntry(final TarArchiveOutputStream tarOut, final String filename, final byte[] bytes) throws IOException { final TarArchiveEntry flowEntry = new TarArchiveEntry(filename); flowEntry.setSize(bytes.length); tarOut.putArchiveEntry(flowEntry);//from ww w . j av a 2s . c om tarOut.write(bytes); tarOut.closeArchiveEntry(); }
From source file:org.apache.nifi.util.FlowFilePackagerV1.java
private void writeAttributesEntry(final Map<String, String> attributes, final TarArchiveOutputStream tout) throws IOException { final StringBuilder sb = new StringBuilder(); sb.append(/* w w w .j a va2 s.c om*/ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE properties\n SYSTEM \"http://java.sun.com/dtd/properties.dtd\">\n"); sb.append("<properties>"); for (final Map.Entry<String, String> entry : attributes.entrySet()) { final String escapedKey = StringEscapeUtils.escapeXml11(entry.getKey()); final String escapedValue = StringEscapeUtils.escapeXml11(entry.getValue()); sb.append("\n <entry key=\"").append(escapedKey).append("\">").append(escapedValue).append("</entry>"); } sb.append("</properties>"); final byte[] metaBytes = sb.toString().getBytes(StandardCharsets.UTF_8); final TarArchiveEntry attribEntry = new TarArchiveEntry(FILENAME_ATTRIBUTES); attribEntry.setMode(tarPermissions); attribEntry.setSize(metaBytes.length); tout.putArchiveEntry(attribEntry); tout.write(metaBytes); tout.closeArchiveEntry(); }
From source file:org.apache.nifi.util.FlowFilePackagerV1.java
private void writeContentEntry(final TarArchiveOutputStream tarOut, final InputStream inStream, final long fileSize) throws IOException { final TarArchiveEntry entry = new TarArchiveEntry(FILENAME_CONTENT); entry.setMode(tarPermissions);//from w ww . j ava2 s . c o m entry.setSize(fileSize); tarOut.putArchiveEntry(entry); final byte[] buffer = new byte[512 << 10];//512KB int bytesRead = 0; while ((bytesRead = inStream.read(buffer)) != -1) { //still more data to read if (bytesRead > 0) { tarOut.write(buffer, 0, bytesRead); } } copy(inStream, tarOut); tarOut.closeArchiveEntry(); }
From source file:org.apache.nutch.tools.CommonCrawlDataDumper.java
/** * Dumps the reverse engineered CBOR content from the provided segment * directories if a parent directory contains more than one segment, * otherwise a single segment can be passed as an argument. If the boolean * argument is provided then the CBOR is also zipped. * * @param outputDir the directory you wish to dump the raw content to. This * directory will be created. * @param segmentRootDir a directory containing one or more segments. * @param linkdb Path to linkdb. * @param gzip a boolean flag indicating whether the CBOR content should also * be gzipped.//from w w w.j ava 2s . com * @param epochFilename if {@code true}, output files will be names using the epoch time (in milliseconds). * @param extension a file extension to use with output documents. * @throws Exception if any exception occurs. */ public void dump(File outputDir, File segmentRootDir, File linkdb, boolean gzip, String[] mimeTypes, boolean epochFilename, String extension, boolean warc) throws Exception { if (gzip) { LOG.info("Gzipping CBOR data has been skipped"); } // total file counts Map<String, Integer> typeCounts = new HashMap<>(); // filtered file counters Map<String, Integer> filteredCounts = new HashMap<>(); Configuration nutchConfig = NutchConfiguration.create(); Path segmentRootPath = new Path(segmentRootDir.toString()); FileSystem fs = segmentRootPath.getFileSystem(nutchConfig); //get all paths List<Path> parts = new ArrayList<>(); RemoteIterator<LocatedFileStatus> files = fs.listFiles(segmentRootPath, true); String partPattern = ".*" + File.separator + Content.DIR_NAME + File.separator + "part-[0-9]{5}" + File.separator + "data"; while (files.hasNext()) { LocatedFileStatus next = files.next(); if (next.isFile()) { Path path = next.getPath(); if (path.toString().matches(partPattern)) { parts.add(path); } } } LinkDbReader linkDbReader = null; if (linkdb != null) { linkDbReader = new LinkDbReader(nutchConfig, new Path(linkdb.toString())); } if (parts == null || parts.size() == 0) { LOG.error("No segment directories found in {} ", segmentRootDir.getAbsolutePath()); System.exit(1); } LOG.info("Found {} segment parts", parts.size()); if (gzip && !warc) { fileList = new ArrayList<>(); constructNewStream(outputDir); } for (Path segmentPart : parts) { LOG.info("Processing segment Part : [ {} ]", segmentPart); try { SequenceFile.Reader reader = new SequenceFile.Reader(nutchConfig, SequenceFile.Reader.file(segmentPart)); Writable key = (Writable) reader.getKeyClass().newInstance(); Content content = null; while (reader.next(key)) { content = new Content(); reader.getCurrentValue(content); Metadata metadata = content.getMetadata(); String url = key.toString(); String baseName = FilenameUtils.getBaseName(url); String extensionName = FilenameUtils.getExtension(url); if (!extension.isEmpty()) { extensionName = extension; } else if ((extensionName == null) || extensionName.isEmpty()) { extensionName = "html"; } String outputFullPath = null; String outputRelativePath = null; String filename = null; String timestamp = null; String reverseKey = null; if (epochFilename || config.getReverseKey()) { try { long epoch = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z") .parse(getDate(metadata.get("Date"))).getTime(); timestamp = String.valueOf(epoch); } catch (ParseException pe) { LOG.warn(pe.getMessage()); } reverseKey = reverseUrl(url); config.setReverseKeyValue( reverseKey.replace("/", "_") + "_" + DigestUtils.sha1Hex(url) + "_" + timestamp); } if (!warc) { if (epochFilename) { outputFullPath = DumpFileUtil.createFileNameFromUrl(outputDir.getAbsolutePath(), reverseKey, url, timestamp, extensionName, !gzip); outputRelativePath = outputFullPath.substring(0, outputFullPath.lastIndexOf(File.separator) - 1); filename = content.getMetadata().get(Metadata.DATE) + "." + extensionName; } else { String md5Ofurl = DumpFileUtil.getUrlMD5(url); String fullDir = DumpFileUtil.createTwoLevelsDirectory(outputDir.getAbsolutePath(), md5Ofurl, !gzip); filename = DumpFileUtil.createFileName(md5Ofurl, baseName, extensionName); outputFullPath = String.format("%s/%s", fullDir, filename); String[] fullPathLevels = fullDir.split(Pattern.quote(File.separator)); String firstLevelDirName = fullPathLevels[fullPathLevels.length - 2]; String secondLevelDirName = fullPathLevels[fullPathLevels.length - 1]; outputRelativePath = firstLevelDirName + secondLevelDirName; } } // Encode all filetypes if no mimetypes have been given Boolean filter = (mimeTypes == null); String jsonData = ""; try { String mimeType = new Tika().detect(content.getContent()); // Maps file to JSON-based structure Set<String> inUrls = null; //there may be duplicates, so using set if (linkDbReader != null) { Inlinks inlinks = linkDbReader.getInlinks((Text) key); if (inlinks != null) { Iterator<Inlink> iterator = inlinks.iterator(); inUrls = new LinkedHashSet<>(); while (inUrls.size() <= MAX_INLINKS && iterator.hasNext()) { inUrls.add(iterator.next().getFromUrl()); } } } //TODO: Make this Jackson Format implementation reusable try (CommonCrawlFormat format = CommonCrawlFormatFactory .getCommonCrawlFormat(warc ? "WARC" : "JACKSON", nutchConfig, config)) { if (inUrls != null) { format.setInLinks(new ArrayList<>(inUrls)); } jsonData = format.getJsonData(url, content, metadata); } collectStats(typeCounts, mimeType); // collects statistics for the given mimetypes if ((mimeType != null) && (mimeTypes != null) && Arrays.asList(mimeTypes).contains(mimeType)) { collectStats(filteredCounts, mimeType); filter = true; } } catch (IOException ioe) { LOG.error("Fatal error in creating JSON data: " + ioe.getMessage()); return; } if (!warc) { if (filter) { byte[] byteData = serializeCBORData(jsonData); if (!gzip) { File outputFile = new File(outputFullPath); if (outputFile.exists()) { LOG.info("Skipping writing: [" + outputFullPath + "]: file already exists"); } else { LOG.info("Writing: [" + outputFullPath + "]"); IOUtils.copy(new ByteArrayInputStream(byteData), new FileOutputStream(outputFile)); } } else { if (fileList.contains(outputFullPath)) { LOG.info("Skipping compressing: [" + outputFullPath + "]: file already exists"); } else { fileList.add(outputFullPath); LOG.info("Compressing: [" + outputFullPath + "]"); //TarArchiveEntry tarEntry = new TarArchiveEntry(firstLevelDirName + File.separator + secondLevelDirName + File.separator + filename); TarArchiveEntry tarEntry = new TarArchiveEntry( outputRelativePath + File.separator + filename); tarEntry.setSize(byteData.length); tarOutput.putArchiveEntry(tarEntry); tarOutput.write(byteData); tarOutput.closeArchiveEntry(); } } } } } reader.close(); } catch (Exception e) { LOG.warn("SKIPPED: {} Because : {}", segmentPart, e.getMessage()); } finally { fs.close(); } } if (gzip && !warc) { closeStream(); } if (!typeCounts.isEmpty()) { LOG.info("CommonsCrawlDataDumper File Stats: " + DumpFileUtil.displayFileTypes(typeCounts, filteredCounts)); } }