Example usage for org.apache.commons.compress.compressors.gzip GzipCompressorOutputStream GzipCompressorOutputStream

List of usage examples for org.apache.commons.compress.compressors.gzip GzipCompressorOutputStream GzipCompressorOutputStream

Introduction

In this page you can find the example usage for org.apache.commons.compress.compressors.gzip GzipCompressorOutputStream GzipCompressorOutputStream.

Prototype

public GzipCompressorOutputStream(final OutputStream outputStream) throws IOException 

Source Link

Usage

From source file:edu.jhu.hlt.concrete.ingesters.webposts.WebPostIngesterRunner.java

/**
 * @param args/*w  w w  .j  a v  a 2  s  .c o  m*/
 */
public static void main(String... args) {
    Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler());
    WebPostIngesterRunner run = new WebPostIngesterRunner();
    JCommander jc = new JCommander(run, args);
    jc.setProgramName(WebPostIngesterRunner.class.getSimpleName());
    if (run.delegate.help) {
        jc.usage();
    }

    try {
        Path outpath = Paths.get(run.delegate.outputPath);
        IngesterParameterDelegate.prepare(outpath);
        WebPostIngester ing = new WebPostIngester();
        Path outWithExt = outpath.resolve("webposts.tar.gz");

        if (Files.exists(outWithExt)) {
            if (!run.delegate.overwrite) {
                LOGGER.info("File: {} exists and overwrite disabled. Not running.", outWithExt.toString());
                return;
            } else {
                Files.delete(outWithExt);
            }
        }

        try (OutputStream os = Files.newOutputStream(outWithExt);
                GzipCompressorOutputStream gout = new GzipCompressorOutputStream(os);
                TarArchiver arch = new TarArchiver(gout)) {
            for (String pstr : run.delegate.paths) {
                LOGGER.debug("Running on file: {}", pstr);
                Path p = Paths.get(pstr);
                new ExistingNonDirectoryFile(p);
                try {
                    Communication next = ing.fromCharacterBasedFile(p);
                    arch.addEntry(new ArchivableCommunication(next));
                } catch (IngestException e) {
                    LOGGER.error("Error processing file: " + pstr, e);
                }
            }
        }
    } catch (NotFileException | IOException e) {
        LOGGER.error("Caught exception processing.", e);
    }
}

From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltIngesterRunner.java

/**
 * @param args//from   w w  w .j  a  va  2s .c  o  m
 */
public static void main(String... args) {
    Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler());
    BoltIngesterRunner run = new BoltIngesterRunner();
    JCommander jc = new JCommander(run, args);
    jc.setProgramName(BoltIngesterRunner.class.getSimpleName());
    if (run.delegate.help) {
        jc.usage();
    }

    try {
        Path outpath = Paths.get(run.delegate.outputPath);
        IngesterParameterDelegate.prepare(outpath);
        BoltForumPostIngester ing = new BoltForumPostIngester();
        Path outWithExt = outpath.resolve("bolt.tar.gz");

        if (Files.exists(outWithExt)) {
            if (!run.delegate.overwrite) {
                LOGGER.info("File: {} exists and overwrite disabled. Not running.", outWithExt.toString());
                return;
            } else {
                Files.delete(outWithExt);
            }
        }

        try (OutputStream os = Files.newOutputStream(outWithExt);
                GzipCompressorOutputStream gout = new GzipCompressorOutputStream(os);
                TarArchiver arch = new TarArchiver(gout)) {
            for (Path p : run.delegate.findFilesInPaths()) {
                LOGGER.debug("Running on file: {}", p);
                new ExistingNonDirectoryFile(p);
                try {
                    Communication next = ing.fromCharacterBasedFile(p);
                    arch.addEntry(new ArchivableCommunication(next));
                } catch (IngestException e) {
                    LOGGER.error("Error processing file: " + p, e);
                }
            }
        }
    } catch (NotFileException | IOException e) {
        LOGGER.error("Caught exception processing.", e);
    }
}

From source file:edu.jhu.hlt.concrete.ingesters.alnc.ALNCIngesterRunner.java

/**
 * @param args/*  w  ww  .j a va2 s. c om*/
 */
public static void main(String... args) {
    Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler());
    ALNCIngesterRunner run = new ALNCIngesterRunner();
    JCommander jc = new JCommander(run, args);
    jc.setProgramName(ALNCIngesterRunner.class.getSimpleName());
    if (run.delegate.help) {
        jc.usage();
    }

    try {
        Path outpath = Paths.get(run.delegate.outputPath);
        IngesterParameterDelegate.prepare(outpath);

        for (String pstr : run.delegate.paths) {
            LOGGER.debug("Running on file: {}", pstr);
            Path p = Paths.get(pstr);
            new ExistingNonDirectoryFile(p);
            Path outWithExt = outpath.resolve(p.getFileName() + ".tar.gz");

            if (Files.exists(outWithExt)) {
                if (!run.delegate.overwrite) {
                    LOGGER.info("File: {} exists and overwrite disabled. Not running.", outWithExt.toString());
                    continue;
                } else {
                    Files.delete(outWithExt);
                }
            }

            try (ALNCIngester ing = new ALNCIngester(p);
                    OutputStream os = Files.newOutputStream(outWithExt);
                    GzipCompressorOutputStream gout = new GzipCompressorOutputStream(os);
                    TarArchiver arch = new TarArchiver(gout)) {
                Iterator<Communication> iter = ing.iterator();
                while (iter.hasNext()) {
                    Communication c = iter.next();
                    LOGGER.debug("Got comm: {}", c.getId());
                    arch.addEntry(new ArchivableCommunication(c));
                }
            } catch (IngestException e) {
                LOGGER.error("Caught exception processing path: " + pstr, e);
            }
        }
    } catch (NotFileException | IOException e) {
        LOGGER.error("Caught exception processing.", e);
    }
}

From source file:edu.jhu.hlt.concrete.ingesters.gigaword.GigawordGzProcessor.java

public static void main(String... args) {
    Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler());
    if (args.length != 2) {
        LOGGER.info("This program takes 2 arguments.");
        LOGGER.info("First: the path to a .gz file that is part of the English Gigaword v5 corpus.");
        LOGGER.info("Second: the path to the output file (a .tar.gz with communication files).");
        LOGGER.info("Example usage:");
        LOGGER.info("{} {} {}", GigawordGzProcessor.class.getName(), "/path/to/LDC/sgml/.gz",
                "/path/to/out.tar.gz");
        System.exit(1);/*from w  w  w. j a v a 2  s. c o  m*/
    }

    String inPathStr = args[0];
    String outPathStr = args[1];

    Path inPath = Paths.get(inPathStr);
    if (!Files.exists(inPath))
        LOGGER.error("Input path {} does not exist. Try again with the right path.", inPath.toString());

    Path outPath = Paths.get(outPathStr);
    Optional<Path> parent = Optional.ofNullable(outPath.getParent());
    // lambda does not allow caught exceptions.
    if (parent.isPresent()) {
        if (!Files.exists(outPath.getParent())) {
            LOGGER.info("Attempting to create output directory: {}", outPath.toString());
            try {
                Files.createDirectories(outPath);
            } catch (IOException e) {
                LOGGER.error("Caught exception creating output directory.", e);
            }
        }
    }

    GigawordDocumentConverter conv = new GigawordDocumentConverter();
    Iterator<Communication> iter = conv.gzToStringIterator(inPath);
    try (OutputStream os = Files.newOutputStream(outPath);
            BufferedOutputStream bos = new BufferedOutputStream(os, 1024 * 8 * 16);
            GzipCompressorOutputStream gout = new GzipCompressorOutputStream(bos);
            TarArchiver archiver = new TarArchiver(gout);) {
        while (iter.hasNext()) {
            Communication c = iter.next();
            LOGGER.info("Adding Communication {} [UUID: {}] to archive.", c.getId(),
                    c.getUuid().getUuidString());
            archiver.addEntry(new ArchivableCommunication(c));
        }
    } catch (IOException e) {
        LOGGER.error("Caught IOException during output.", e);
    }
}

From source file:edu.jhu.hlt.concrete.ingesters.annotatednyt.AnnotatedNYTIngesterRunner.java

/**
 * @param args//from   w  w w.j  a v a  2 s .co m
 */
public static void main(String... args) {
    Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler());
    AnnotatedNYTIngesterRunner run = new AnnotatedNYTIngesterRunner();
    JCommander jc = new JCommander(run, args);
    jc.setProgramName(AnnotatedNYTIngesterRunner.class.getSimpleName());
    if (run.delegate.help) {
        jc.usage();
    }

    try {
        Path outpath = Paths.get(run.delegate.outputPath);
        IngesterParameterDelegate.prepare(outpath);

        NYTCorpusDocumentParser parser = new NYTCorpusDocumentParser();
        for (String pstr : run.delegate.paths) {
            LOGGER.debug("Running on file: {}", pstr);
            Path p = Paths.get(pstr);
            new ExistingNonDirectoryFile(p);
            int nPaths = p.getNameCount();
            Path year = p.getName(nPaths - 2);
            Path outWithExt = outpath.resolve(year.toString() + p.getFileName());

            if (Files.exists(outWithExt)) {
                if (!run.delegate.overwrite) {
                    LOGGER.info("File: {} exists and overwrite disabled. Not running.", outWithExt.toString());
                    continue;
                } else {
                    Files.delete(outWithExt);
                }
            }

            try (InputStream is = Files.newInputStream(p);
                    BufferedInputStream bin = new BufferedInputStream(is);
                    TarGzArchiveEntryByteIterator iter = new TarGzArchiveEntryByteIterator(bin);

                    OutputStream os = Files.newOutputStream(outWithExt);
                    GzipCompressorOutputStream gout = new GzipCompressorOutputStream(os);
                    TarArchiver arch = new TarArchiver(gout)) {
                Iterable<byte[]> able = () -> iter;
                StreamSupport.stream(able.spliterator(), false).map(ba -> parser.fromByteArray(ba, false))
                        .map(doc -> new AnnotatedNYTDocument(doc))
                        .map(and -> new CommunicationizableAnnotatedNYTDocument(and).toCommunication())
                        .forEach(comm -> {
                            try {
                                arch.addEntry(new ArchivableCommunication(comm));
                            } catch (IOException e) {
                                LOGGER.error("Caught exception processing file: " + pstr, e);
                            }
                        });
            }
        }
    } catch (NotFileException | IOException e) {
        LOGGER.error("Caught exception processing.", e);
    }
}

From source file:msec.org.GzipUtil.java

static public void zip(String srcFile) throws Exception {
    GzipCompressorOutputStream out = new GzipCompressorOutputStream(new FileOutputStream(srcFile + ".gz"));
    FileInputStream in = new FileInputStream(srcFile);
    byte[] buf = new byte[10240];
    while (true) {
        int len = in.read(buf);
        if (len <= 0) {
            break;
        }/*from   ww w.  j a  va2s.c om*/
        out.write(buf, 0, len);
    }
    out.flush();
    out.close();
    in.close();
}

From source file:edu.umd.umiacs.clip.tools.io.SerializationTools.java

public static void serializeAndOverride(String path, Object object) {
    String tmp = path + "._SAVING";
    new File(tmp).delete();
    new File(tmp).getParentFile().mkdirs();
    try (FileOutputStream os = new FileOutputStream(tmp);
            ObjectOutputStream out = new ObjectOutputStream(
                    path.endsWith(".bz2") ? new BZip2CompressorOutputStream(os)
                            : path.endsWith(".gz") ? new GzipCompressorOutputStream(os) : os)) {
        out.writeObject(object);//from   w w  w  .j av  a2  s.  com
    } catch (IOException e) {
        e.printStackTrace();
    }
    new File(path).delete();
    new File(tmp).renameTo(new File(path));
}

From source file:msec.org.GzipUtil.java

static public void zip(String srcFile, String destFile) throws Exception {
    GzipCompressorOutputStream out = new GzipCompressorOutputStream(new FileOutputStream(destFile));
    FileInputStream in = new FileInputStream(srcFile);
    byte[] buf = new byte[10240];
    while (true) {
        int len = in.read(buf);
        if (len <= 0) {
            break;
        }/*from w ww . ja v a2  s.  co  m*/
        out.write(buf, 0, len);
    }
    out.flush();
    out.close();
    in.close();
}

From source file:com.ipcglobal.fredimport.util.FredUtils.java

/**
 * Creates a tar.gz file at the specified path with the contents of the specified directory.
 *
 * @param directoryPath the directory path
 * @param tarGzPath the tar gz path//from  ww w .ja v a 2  s.  co  m
 * @throws IOException             If anything goes wrong
 */
public static void createTarGzOfDirectory(String directoryPath, String tarGzPath) throws IOException {
    FileOutputStream fOut = null;
    BufferedOutputStream bOut = null;
    GzipCompressorOutputStream gzOut = null;
    TarArchiveOutputStream tOut = null;
    try {
        fOut = new FileOutputStream(new File(tarGzPath));
        bOut = new BufferedOutputStream(fOut);
        gzOut = new GzipCompressorOutputStream(bOut);
        tOut = new TarArchiveOutputStream(gzOut);
        addFileToTarGz(tOut, directoryPath, "/");
    } finally {
        tOut.finish();
        tOut.close();
        gzOut.close();
        bOut.close();
        fOut.close();
    }
}

From source file:edu.jhu.hlt.concrete.serialization.TarGzCompactCommunicationSerializer.java

@Override
public void toTarGz(Collection<Communication> commColl, Path outPath) throws ConcreteException {
    try (OutputStream os = Files.newOutputStream(outPath);
            BufferedOutputStream bos = new BufferedOutputStream(os);
            GzipCompressorOutputStream gzos = new GzipCompressorOutputStream(bos);
            TarArchiveOutputStream tos = new TarArchiveOutputStream(gzos);) {
        for (Communication c : commColl) {
            TarArchiveEntry entry = new TarArchiveEntry(c.getId() + ".concrete");
            byte[] cbytes = this.toBytes(c);
            entry.setSize(cbytes.length);
            tos.putArchiveEntry(entry);//  www. j a va2s .  c  om
            try (ByteArrayInputStream bis = new ByteArrayInputStream(cbytes)) {
                IOUtils.copy(bis, tos);
                tos.closeArchiveEntry();
            }
        }

    } catch (IOException e) {
        throw new ConcreteException(e);
    }
}