List of usage examples for org.apache.commons.compress.compressors.gzip GzipCompressorOutputStream GzipCompressorOutputStream
public GzipCompressorOutputStream(final OutputStream outputStream) throws IOException
From source file:edu.jhu.hlt.concrete.ingesters.webposts.WebPostIngesterRunner.java
/** * @param args/*w w w .j a v a 2 s .c o m*/ */ public static void main(String... args) { Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler()); WebPostIngesterRunner run = new WebPostIngesterRunner(); JCommander jc = new JCommander(run, args); jc.setProgramName(WebPostIngesterRunner.class.getSimpleName()); if (run.delegate.help) { jc.usage(); } try { Path outpath = Paths.get(run.delegate.outputPath); IngesterParameterDelegate.prepare(outpath); WebPostIngester ing = new WebPostIngester(); Path outWithExt = outpath.resolve("webposts.tar.gz"); if (Files.exists(outWithExt)) { if (!run.delegate.overwrite) { LOGGER.info("File: {} exists and overwrite disabled. Not running.", outWithExt.toString()); return; } else { Files.delete(outWithExt); } } try (OutputStream os = Files.newOutputStream(outWithExt); GzipCompressorOutputStream gout = new GzipCompressorOutputStream(os); TarArchiver arch = new TarArchiver(gout)) { for (String pstr : run.delegate.paths) { LOGGER.debug("Running on file: {}", pstr); Path p = Paths.get(pstr); new ExistingNonDirectoryFile(p); try { Communication next = ing.fromCharacterBasedFile(p); arch.addEntry(new ArchivableCommunication(next)); } catch (IngestException e) { LOGGER.error("Error processing file: " + pstr, e); } } } } catch (NotFileException | IOException e) { LOGGER.error("Caught exception processing.", e); } }
From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltIngesterRunner.java
/** * @param args//from w w w .j a va 2s .c o m */ public static void main(String... args) { Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler()); BoltIngesterRunner run = new BoltIngesterRunner(); JCommander jc = new JCommander(run, args); jc.setProgramName(BoltIngesterRunner.class.getSimpleName()); if (run.delegate.help) { jc.usage(); } try { Path outpath = Paths.get(run.delegate.outputPath); IngesterParameterDelegate.prepare(outpath); BoltForumPostIngester ing = new BoltForumPostIngester(); Path outWithExt = outpath.resolve("bolt.tar.gz"); if (Files.exists(outWithExt)) { if (!run.delegate.overwrite) { LOGGER.info("File: {} exists and overwrite disabled. Not running.", outWithExt.toString()); return; } else { Files.delete(outWithExt); } } try (OutputStream os = Files.newOutputStream(outWithExt); GzipCompressorOutputStream gout = new GzipCompressorOutputStream(os); TarArchiver arch = new TarArchiver(gout)) { for (Path p : run.delegate.findFilesInPaths()) { LOGGER.debug("Running on file: {}", p); new ExistingNonDirectoryFile(p); try { Communication next = ing.fromCharacterBasedFile(p); arch.addEntry(new ArchivableCommunication(next)); } catch (IngestException e) { LOGGER.error("Error processing file: " + p, e); } } } } catch (NotFileException | IOException e) { LOGGER.error("Caught exception processing.", e); } }
From source file:edu.jhu.hlt.concrete.ingesters.alnc.ALNCIngesterRunner.java
/** * @param args/* w ww .j a va2 s. c om*/ */ public static void main(String... args) { Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler()); ALNCIngesterRunner run = new ALNCIngesterRunner(); JCommander jc = new JCommander(run, args); jc.setProgramName(ALNCIngesterRunner.class.getSimpleName()); if (run.delegate.help) { jc.usage(); } try { Path outpath = Paths.get(run.delegate.outputPath); IngesterParameterDelegate.prepare(outpath); for (String pstr : run.delegate.paths) { LOGGER.debug("Running on file: {}", pstr); Path p = Paths.get(pstr); new ExistingNonDirectoryFile(p); Path outWithExt = outpath.resolve(p.getFileName() + ".tar.gz"); if (Files.exists(outWithExt)) { if (!run.delegate.overwrite) { LOGGER.info("File: {} exists and overwrite disabled. Not running.", outWithExt.toString()); continue; } else { Files.delete(outWithExt); } } try (ALNCIngester ing = new ALNCIngester(p); OutputStream os = Files.newOutputStream(outWithExt); GzipCompressorOutputStream gout = new GzipCompressorOutputStream(os); TarArchiver arch = new TarArchiver(gout)) { Iterator<Communication> iter = ing.iterator(); while (iter.hasNext()) { Communication c = iter.next(); LOGGER.debug("Got comm: {}", c.getId()); arch.addEntry(new ArchivableCommunication(c)); } } catch (IngestException e) { LOGGER.error("Caught exception processing path: " + pstr, e); } } } catch (NotFileException | IOException e) { LOGGER.error("Caught exception processing.", e); } }
From source file:edu.jhu.hlt.concrete.ingesters.gigaword.GigawordGzProcessor.java
public static void main(String... args) { Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler()); if (args.length != 2) { LOGGER.info("This program takes 2 arguments."); LOGGER.info("First: the path to a .gz file that is part of the English Gigaword v5 corpus."); LOGGER.info("Second: the path to the output file (a .tar.gz with communication files)."); LOGGER.info("Example usage:"); LOGGER.info("{} {} {}", GigawordGzProcessor.class.getName(), "/path/to/LDC/sgml/.gz", "/path/to/out.tar.gz"); System.exit(1);/*from w w w. j a v a 2 s. c o m*/ } String inPathStr = args[0]; String outPathStr = args[1]; Path inPath = Paths.get(inPathStr); if (!Files.exists(inPath)) LOGGER.error("Input path {} does not exist. Try again with the right path.", inPath.toString()); Path outPath = Paths.get(outPathStr); Optional<Path> parent = Optional.ofNullable(outPath.getParent()); // lambda does not allow caught exceptions. if (parent.isPresent()) { if (!Files.exists(outPath.getParent())) { LOGGER.info("Attempting to create output directory: {}", outPath.toString()); try { Files.createDirectories(outPath); } catch (IOException e) { LOGGER.error("Caught exception creating output directory.", e); } } } GigawordDocumentConverter conv = new GigawordDocumentConverter(); Iterator<Communication> iter = conv.gzToStringIterator(inPath); try (OutputStream os = Files.newOutputStream(outPath); BufferedOutputStream bos = new BufferedOutputStream(os, 1024 * 8 * 16); GzipCompressorOutputStream gout = new GzipCompressorOutputStream(bos); TarArchiver archiver = new TarArchiver(gout);) { while (iter.hasNext()) { Communication c = iter.next(); LOGGER.info("Adding Communication {} [UUID: {}] to archive.", c.getId(), c.getUuid().getUuidString()); archiver.addEntry(new ArchivableCommunication(c)); } } catch (IOException e) { LOGGER.error("Caught IOException during output.", e); } }
From source file:edu.jhu.hlt.concrete.ingesters.annotatednyt.AnnotatedNYTIngesterRunner.java
/** * @param args//from w w w.j a v a 2 s .co m */ public static void main(String... args) { Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler()); AnnotatedNYTIngesterRunner run = new AnnotatedNYTIngesterRunner(); JCommander jc = new JCommander(run, args); jc.setProgramName(AnnotatedNYTIngesterRunner.class.getSimpleName()); if (run.delegate.help) { jc.usage(); } try { Path outpath = Paths.get(run.delegate.outputPath); IngesterParameterDelegate.prepare(outpath); NYTCorpusDocumentParser parser = new NYTCorpusDocumentParser(); for (String pstr : run.delegate.paths) { LOGGER.debug("Running on file: {}", pstr); Path p = Paths.get(pstr); new ExistingNonDirectoryFile(p); int nPaths = p.getNameCount(); Path year = p.getName(nPaths - 2); Path outWithExt = outpath.resolve(year.toString() + p.getFileName()); if (Files.exists(outWithExt)) { if (!run.delegate.overwrite) { LOGGER.info("File: {} exists and overwrite disabled. Not running.", outWithExt.toString()); continue; } else { Files.delete(outWithExt); } } try (InputStream is = Files.newInputStream(p); BufferedInputStream bin = new BufferedInputStream(is); TarGzArchiveEntryByteIterator iter = new TarGzArchiveEntryByteIterator(bin); OutputStream os = Files.newOutputStream(outWithExt); GzipCompressorOutputStream gout = new GzipCompressorOutputStream(os); TarArchiver arch = new TarArchiver(gout)) { Iterable<byte[]> able = () -> iter; StreamSupport.stream(able.spliterator(), false).map(ba -> parser.fromByteArray(ba, false)) .map(doc -> new AnnotatedNYTDocument(doc)) .map(and -> new CommunicationizableAnnotatedNYTDocument(and).toCommunication()) .forEach(comm -> { try { arch.addEntry(new ArchivableCommunication(comm)); } catch (IOException e) { LOGGER.error("Caught exception processing file: " + pstr, e); } }); } } } catch (NotFileException | IOException e) { LOGGER.error("Caught exception processing.", e); } }
From source file:msec.org.GzipUtil.java
static public void zip(String srcFile) throws Exception { GzipCompressorOutputStream out = new GzipCompressorOutputStream(new FileOutputStream(srcFile + ".gz")); FileInputStream in = new FileInputStream(srcFile); byte[] buf = new byte[10240]; while (true) { int len = in.read(buf); if (len <= 0) { break; }/*from ww w. j a va2s.c om*/ out.write(buf, 0, len); } out.flush(); out.close(); in.close(); }
From source file:edu.umd.umiacs.clip.tools.io.SerializationTools.java
public static void serializeAndOverride(String path, Object object) { String tmp = path + "._SAVING"; new File(tmp).delete(); new File(tmp).getParentFile().mkdirs(); try (FileOutputStream os = new FileOutputStream(tmp); ObjectOutputStream out = new ObjectOutputStream( path.endsWith(".bz2") ? new BZip2CompressorOutputStream(os) : path.endsWith(".gz") ? new GzipCompressorOutputStream(os) : os)) { out.writeObject(object);//from w w w .j av a2 s. com } catch (IOException e) { e.printStackTrace(); } new File(path).delete(); new File(tmp).renameTo(new File(path)); }
From source file:msec.org.GzipUtil.java
static public void zip(String srcFile, String destFile) throws Exception { GzipCompressorOutputStream out = new GzipCompressorOutputStream(new FileOutputStream(destFile)); FileInputStream in = new FileInputStream(srcFile); byte[] buf = new byte[10240]; while (true) { int len = in.read(buf); if (len <= 0) { break; }/*from w ww . ja v a2 s. co m*/ out.write(buf, 0, len); } out.flush(); out.close(); in.close(); }
From source file:com.ipcglobal.fredimport.util.FredUtils.java
/** * Creates a tar.gz file at the specified path with the contents of the specified directory. * * @param directoryPath the directory path * @param tarGzPath the tar gz path//from ww w .ja v a 2 s. co m * @throws IOException If anything goes wrong */ public static void createTarGzOfDirectory(String directoryPath, String tarGzPath) throws IOException { FileOutputStream fOut = null; BufferedOutputStream bOut = null; GzipCompressorOutputStream gzOut = null; TarArchiveOutputStream tOut = null; try { fOut = new FileOutputStream(new File(tarGzPath)); bOut = new BufferedOutputStream(fOut); gzOut = new GzipCompressorOutputStream(bOut); tOut = new TarArchiveOutputStream(gzOut); addFileToTarGz(tOut, directoryPath, "/"); } finally { tOut.finish(); tOut.close(); gzOut.close(); bOut.close(); fOut.close(); } }
From source file:edu.jhu.hlt.concrete.serialization.TarGzCompactCommunicationSerializer.java
@Override public void toTarGz(Collection<Communication> commColl, Path outPath) throws ConcreteException { try (OutputStream os = Files.newOutputStream(outPath); BufferedOutputStream bos = new BufferedOutputStream(os); GzipCompressorOutputStream gzos = new GzipCompressorOutputStream(bos); TarArchiveOutputStream tos = new TarArchiveOutputStream(gzos);) { for (Communication c : commColl) { TarArchiveEntry entry = new TarArchiveEntry(c.getId() + ".concrete"); byte[] cbytes = this.toBytes(c); entry.setSize(cbytes.length); tos.putArchiveEntry(entry);// www. j a va2s . c om try (ByteArrayInputStream bis = new ByteArrayInputStream(cbytes)) { IOUtils.copy(bis, tos); tos.closeArchiveEntry(); } } } catch (IOException e) { throw new ConcreteException(e); } }