List of usage examples for org.apache.commons.compress.archivers.tar TarArchiveEntry setSize
public void setSize(long size)
From source file:io.anserini.index.IndexUtils.java
public void dumpDocumentVectors(String reqDocidsPath, DocVectorWeight weight) throws IOException { String outFileName = weight == null ? reqDocidsPath + ".docvector.tar.gz" : reqDocidsPath + ".docvector." + weight + ".tar.gz"; LOG.info("Start dump document vectors with weight " + weight); InputStream in = getReadFileStream(reqDocidsPath); BufferedReader bRdr = new BufferedReader(new InputStreamReader(in)); FileOutputStream fOut = new FileOutputStream(new File(outFileName)); BufferedOutputStream bOut = new BufferedOutputStream(fOut); GzipCompressorOutputStream gzOut = new GzipCompressorOutputStream(bOut); TarArchiveOutputStream tOut = new TarArchiveOutputStream(gzOut); Map<Term, Integer> docFreqMap = new HashMap<>(); int numNonEmptyDocs = reader.getDocCount(LuceneDocumentGenerator.FIELD_BODY); String docid;/*from w w w . ja v a 2s.c o m*/ int counter = 0; while ((docid = bRdr.readLine()) != null) { counter++; // get term frequency Terms terms = reader.getTermVector(convertDocidToLuceneDocid(docid), LuceneDocumentGenerator.FIELD_BODY); if (terms == null) { // We do not throw exception here because there are some // collections in which part of documents don't have document vectors LOG.warn("Document vector not stored for doc " + docid); continue; } TermsEnum te = terms.iterator(); if (te == null) { LOG.warn("Document vector not stored for doc " + docid); continue; } Term term; long freq; // iterate every term and write and store in Map Map<String, String> docVectors = new HashMap<>(); while ((te.next()) != null) { term = new Term(LuceneDocumentGenerator.FIELD_BODY, te.term()); freq = te.totalTermFreq(); switch (weight) { case NONE: docVectors.put(term.bytes().utf8ToString(), String.valueOf(freq)); break; case TF_IDF: int docFreq; if (docFreqMap.containsKey(term)) { docFreq = docFreqMap.get(term); } else { try { docFreq = reader.docFreq(term); } catch (Exception e) { LOG.error("Cannot find term " + term.toString() + " in indexing file."); continue; } docFreqMap.put(term, docFreq); } float tfIdf = (float) (freq * Math.log(numNonEmptyDocs * 1.0 / docFreq)); docVectors.put(term.bytes().utf8ToString(), String.format("%.6f", tfIdf)); break; } } // Count size and write byte[] bytesOut = docVectors.entrySet().stream().map(e -> e.getKey() + " " + e.getValue()) .collect(joining("\n")).getBytes(StandardCharsets.UTF_8); TarArchiveEntry tarEntry = new TarArchiveEntry(new File(docid)); tarEntry.setSize(bytesOut.length + String.format("<DOCNO>%s</DOCNO>\n", docid).length()); tOut.putArchiveEntry(tarEntry); tOut.write(String.format("<DOCNO>%s</DOCNO>\n", docid).getBytes()); tOut.write(bytesOut); tOut.closeArchiveEntry(); if (counter % 100000 == 0) { LOG.info(counter + " files have been dumped."); } } tOut.close(); LOG.info("Document Vectors are output to: " + outFileName); }
From source file:io.anserini.index.IndexUtils.java
public void dumpRawDocuments(String reqDocidsPath, boolean prependDocid) throws IOException, NotStoredException { LOG.info("Start dump raw documents" + (prependDocid ? " with Docid prepended" : ".")); InputStream in = getReadFileStream(reqDocidsPath); BufferedReader bRdr = new BufferedReader(new InputStreamReader(in)); FileOutputStream fOut = new FileOutputStream(new File(reqDocidsPath + ".output.tar.gz")); BufferedOutputStream bOut = new BufferedOutputStream(fOut); GzipCompressorOutputStream gzOut = new GzipCompressorOutputStream(bOut); TarArchiveOutputStream tOut = new TarArchiveOutputStream(gzOut); String docid;//from ww w .j ava 2 s . c o m int counter = 0; while ((docid = bRdr.readLine()) != null) { counter += 1; Document d = reader.document(convertDocidToLuceneDocid(docid)); IndexableField doc = d.getField(LuceneDocumentGenerator.FIELD_RAW); if (doc == null) { throw new NotStoredException("Raw documents not stored!"); } TarArchiveEntry tarEntry = new TarArchiveEntry(new File(docid)); byte[] bytesOut = doc.stringValue().getBytes(StandardCharsets.UTF_8); tarEntry.setSize( bytesOut.length + (prependDocid ? String.format("<DOCNO>%s</DOCNO>\n", docid).length() : 0)); tOut.putArchiveEntry(tarEntry); if (prependDocid) { tOut.write(String.format("<DOCNO>%s</DOCNO>\n", docid).getBytes()); } tOut.write(bytesOut); tOut.closeArchiveEntry(); if (counter % 100000 == 0) { LOG.info(counter + " files have been dumped."); } } tOut.close(); LOG.info(String.format("Raw documents are output to: %s", reqDocidsPath + ".output.tar.gz")); }
From source file:com.st.maven.debian.DebianPackageMojo.java
private void fillDataTar(Config config, ArFileOutputStream output) throws MojoExecutionException { TarArchiveOutputStream tar = null;// w w w . j a va2 s . co m try { tar = new TarArchiveOutputStream(new GZIPOutputStream(new ArWrapper(output))); tar.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU); if (Boolean.TRUE.equals(javaServiceWrapper)) { byte[] daemonData = processTemplate(freemarkerConfig, config, "daemon.ftl"); TarArchiveEntry initScript = new TarArchiveEntry("etc/init.d/" + project.getArtifactId()); initScript.setSize(daemonData.length); initScript.setMode(040755); tar.putArchiveEntry(initScript); tar.write(daemonData); tar.closeArchiveEntry(); } String packageBaseDir = "home/" + unixUserId + "/" + project.getArtifactId() + "/"; if (fileSets != null && !fileSets.isEmpty()) { writeDirectory(tar, packageBaseDir); Collections.sort(fileSets, MappingPathComparator.INSTANCE); for (Fileset curPath : fileSets) { curPath.setTarget(packageBaseDir + curPath.getTarget()); addRecursively(config, tar, curPath); } } } catch (Exception e) { throw new MojoExecutionException("unable to create data tar", e); } finally { IOUtils.closeQuietly(tar); } }
From source file:com.st.maven.debian.DebianPackageMojo.java
private void fillControlTar(Config config, ArFileOutputStream output) throws MojoExecutionException { TarArchiveOutputStream tar = null;//from w w w.j a v a 2 s .c o m try { tar = new TarArchiveOutputStream(new GZIPOutputStream(new ArWrapper(output))); tar.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU); TarArchiveEntry rootDir = new TarArchiveEntry("./"); tar.putArchiveEntry(rootDir); tar.closeArchiveEntry(); byte[] controlData = processTemplate(freemarkerConfig, config, "control.ftl"); TarArchiveEntry controlEntry = new TarArchiveEntry("./control"); controlEntry.setSize(controlData.length); tar.putArchiveEntry(controlEntry); tar.write(controlData); tar.closeArchiveEntry(); byte[] preinstBaseData = processTemplate("preinst", freemarkerConfig, config, combine("preinst.ftl", BASE_DIR + File.separator + "preinst", false)); long size = preinstBaseData.length; TarArchiveEntry preinstEntry = new TarArchiveEntry("./preinst"); preinstEntry.setSize(size); preinstEntry.setMode(0755); tar.putArchiveEntry(preinstEntry); tar.write(preinstBaseData); tar.closeArchiveEntry(); byte[] postinstBaseData = processTemplate("postinst", freemarkerConfig, config, combine("postinst.ftl", BASE_DIR + File.separator + "postinst", true)); size = postinstBaseData.length; TarArchiveEntry postinstEntry = new TarArchiveEntry("./postinst"); postinstEntry.setSize(size); postinstEntry.setMode(0755); tar.putArchiveEntry(postinstEntry); tar.write(postinstBaseData); tar.closeArchiveEntry(); byte[] prermBaseData = processTemplate("prerm", freemarkerConfig, config, combine("prerm.ftl", BASE_DIR + File.separator + "prerm", false)); size = prermBaseData.length; TarArchiveEntry prermEntry = new TarArchiveEntry("./prerm"); prermEntry.setSize(size); prermEntry.setMode(0755); tar.putArchiveEntry(prermEntry); tar.write(prermBaseData); tar.closeArchiveEntry(); byte[] postrmBaseData = processTemplate("postrm", freemarkerConfig, config, combine("postrm.ftl", BASE_DIR + File.separator + "postrm", false)); size = postrmBaseData.length; TarArchiveEntry postrmEntry = new TarArchiveEntry("./postrm"); postrmEntry.setSize(size); postrmEntry.setMode(0755); tar.putArchiveEntry(postrmEntry); tar.write(postrmBaseData); tar.closeArchiveEntry(); } catch (Exception e) { throw new MojoExecutionException("unable to create control tar", e); } finally { if (tar != null) { try { tar.close(); } catch (IOException e) { getLog().error("unable to finish tar", e); } } } }
From source file:cpcc.vvrte.services.VirtualVehicleMigratorImpl.java
/** * @param virtualVehicle the virtual vehicle. * @param os the output stream to write to. * @param chunkNumber the chunk number./*from w w w. jav a2 s .c o m*/ * @throws IOException thrown in case of errors. */ private void writeVirtualVehicleSourceCode(VirtualVehicle virtualVehicle, ArchiveOutputStream os, int chunkNumber) throws IOException { if (virtualVehicle.getCode() == null) { return; } byte[] source = virtualVehicle.getCode().getBytes("UTF-8"); TarArchiveEntry entry = new TarArchiveEntry(DATA_VV_SOURCE_JS); entry.setModTime(new Date()); entry.setSize(source.length); entry.setIds(0, chunkNumber); entry.setNames("vvrte", "cpcc"); os.putArchiveEntry(entry); os.write(source); os.closeArchiveEntry(); }
From source file:cpcc.vvrte.services.VirtualVehicleMigratorImpl.java
/** * @param virtualVehicle the virtual vehicle. * @param os the output stream to write to. * @param chunkNumber the chunk number.//from w w w . j av a 2 s . c o m * @throws IOException thrown in case of errors. */ private void writeVirtualVehicleProperties(VirtualVehicle virtualVehicle, ArchiveOutputStream os, int chunkNumber, boolean lastChunk) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); Properties virtualVehicleProps = fillVirtualVehicleProps(virtualVehicle, lastChunk); virtualVehicleProps.store(baos, "Virtual Vehicle Properties"); baos.close(); byte[] propBytes = baos.toByteArray(); TarArchiveEntry entry = new TarArchiveEntry(DATA_VV_PROPERTIES); entry.setModTime(new Date()); entry.setSize(propBytes.length); entry.setIds(0, chunkNumber); entry.setNames("vvrte", "cpcc"); os.putArchiveEntry(entry); os.write(propBytes); os.closeArchiveEntry(); }
From source file:cpcc.vvrte.services.VirtualVehicleMigratorImpl.java
/** * @param virtualVehicle the virtual vehicle. * @param os the output stream to write to. * @param chunkNumber the chunk number.//w ww . ja va 2 s .c o m * @throws IOException thrown in case of errors. */ private void writeVirtualVehicleContinuation(VirtualVehicle virtualVehicle, ArchiveOutputStream os, int chunkNumber) throws IOException { byte[] continuation = virtualVehicle.getContinuation(); if (continuation == null) { return; } TarArchiveEntry entry = new TarArchiveEntry(DATA_VV_CONTINUATION_JS); entry.setModTime(new Date()); entry.setSize(continuation.length); entry.setIds(0, chunkNumber); entry.setNames("vvrte", "cpcc"); os.putArchiveEntry(entry); os.write(continuation); os.closeArchiveEntry(); }
From source file:cpcc.vvrte.services.VirtualVehicleMigratorImpl.java
/** * @param os the output stream to write to. * @throws IOException thrown in case of errors. *///from w w w . j a v a2 s .com private void writeVirtualVehicleStorageChunk(VirtualVehicle virtualVehicle, ArchiveOutputStream os, int chunkNumber, List<VirtualVehicleStorage> storageChunk) throws IOException { for (VirtualVehicleStorage se : storageChunk) { logger.debug("Writing storage entry '" + se.getName() + "'"); byte[] content = se.getContentAsByteArray(); TarArchiveEntry entry = new TarArchiveEntry("storage/" + se.getName()); entry.setModTime(se.getModificationTime()); entry.setSize(content.length); entry.setIds(se.getId(), chunkNumber); entry.setNames("vvrte", "cpcc"); os.putArchiveEntry(entry); os.write(content); os.closeArchiveEntry(); } }
From source file:io.fabric8.docker.client.impl.BuildImage.java
@Override public OutputHandle fromFolder(String path) { try {/*from w ww.j a v a 2 s. c o m*/ final Path root = Paths.get(path); final Path dockerIgnore = root.resolve(DOCKER_IGNORE); final List<String> ignorePatterns = new ArrayList<>(); if (dockerIgnore.toFile().exists()) { for (String p : Files.readAllLines(dockerIgnore, UTF_8)) { ignorePatterns.add(path.endsWith(File.separator) ? path + p : path + File.separator + p); } } final DockerIgnorePathMatcher dockerIgnorePathMatcher = new DockerIgnorePathMatcher(ignorePatterns); File tempFile = Files.createTempFile(Paths.get(DEFAULT_TEMP_DIR), DOCKER_PREFIX, BZIP2_SUFFIX).toFile(); try (FileOutputStream fout = new FileOutputStream(tempFile); BufferedOutputStream bout = new BufferedOutputStream(fout); BZip2CompressorOutputStream bzout = new BZip2CompressorOutputStream(bout); final TarArchiveOutputStream tout = new TarArchiveOutputStream(bzout)) { Files.walkFileTree(root, new SimpleFileVisitor<Path>() { @Override public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException { if (dockerIgnorePathMatcher.matches(dir)) { return FileVisitResult.SKIP_SUBTREE; } return FileVisitResult.CONTINUE; } @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { if (dockerIgnorePathMatcher.matches(file)) { return FileVisitResult.SKIP_SUBTREE; } final Path relativePath = root.relativize(file); final TarArchiveEntry entry = new TarArchiveEntry(file.toFile()); entry.setName(relativePath.toString()); entry.setMode(TarArchiveEntry.DEFAULT_FILE_MODE); entry.setSize(attrs.size()); tout.putArchiveEntry(entry); Files.copy(file, tout); tout.closeArchiveEntry(); return FileVisitResult.CONTINUE; } }); fout.flush(); } return fromTar(tempFile.getAbsolutePath()); } catch (IOException e) { throw DockerClientException.launderThrowable(e); } }
From source file:edu.mit.lib.bagit.Filler.java
private void fillArchive(File dirFile, String relBase, ArchiveOutputStream out) throws IOException { for (File file : dirFile.listFiles()) { String relPath = relBase + File.separator + file.getName(); if (file.isDirectory()) { fillArchive(file, relPath, out); } else {// www.ja v a 2 s. co m TarArchiveEntry entry = new TarArchiveEntry(relPath); entry.setSize(file.length()); entry.setModTime(0L); out.putArchiveEntry(entry); Files.copy(file.toPath(), out); out.closeArchiveEntry(); } } }