Example usage for org.apache.commons.compress.compressors CompressorStreamFactory GZIP

List of usage examples for org.apache.commons.compress.compressors CompressorStreamFactory GZIP

Introduction

In this page you can find the example usage for org.apache.commons.compress.compressors CompressorStreamFactory GZIP.

Prototype

String GZIP

To view the source code for org.apache.commons.compress.compressors CompressorStreamFactory GZIP.

Click Source Link

Document

Constant used to identify the GZIP compression algorithm.

Usage

From source file:no.finntech.shootout.avro.GzipJsonAvro.java

@Override
protected String getCompressor() {
    return CompressorStreamFactory.GZIP;
}

From source file:org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTaskTest.java

private void doReadTest(Path file, Type fileType, String expTitle, String expDate, String expBody)
        throws Exception {
    InputStream in = Files.newInputStream(file);
    switch (fileType) {
    case BZIP2://from   w  w w.java 2 s . c  om
        in = csFactory.createCompressorInputStream(CompressorStreamFactory.BZIP2, in);
        break;
    case GZIP:
        in = csFactory.createCompressorInputStream(CompressorStreamFactory.GZIP, in);
        break;
    case PLAIN:
        break; // nothing to do
    default:
        assertFalse("Unknown file type!", true); //fail, should not happen
    }
    try (BufferedReader br = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8))) {
        String line = br.readLine();
        assertHeaderLine(line);
        line = br.readLine();
        assertNotNull(line);
        String[] parts = line.split(Character.toString(WriteLineDocTask.SEP));
        int numExpParts = expBody == null ? 2 : 3;
        assertEquals(numExpParts, parts.length);
        assertEquals(expTitle, parts[0]);
        assertEquals(expDate, parts[1]);
        if (expBody != null) {
            assertEquals(expBody, parts[2]);
        }
        assertNull(br.readLine());
    }
}

From source file:org.apache.lucene.benchmark.byTask.utils.StreamUtilsTest.java

private Path rawGzipFile(String ext) throws Exception {
    Path f = testDir.resolve("testfile." + ext);
    OutputStream os = new CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.GZIP,
            Files.newOutputStream(f));
    writeText(os);//from   w w w . j  a  v  a  2  s  . c  o  m
    return f;
}

From source file:org.apache.marmotta.loader.core.MarmottaLoader.java

/**
 * Load data from the reader given as first argument into the handler given as second argument.
 *
 * @param file     file to read the data from; in case a compression format is not explicitly given, the method will
 *                 try to decide from the file name if the file is in a compressed format
 * @param handler  handler to add the data to
 * @param format   format to use for creating the parser or null for auto-detection
 * @param compression compression format to use, or null for auto-detection (see formats in org.apache.commons.compress.compressors.CompressorStreamFactory)
 * @throws RDFParseException/*  w w w .j  a  v a2  s  .  c  om*/
 * @throws IOException
 */
public void loadFile(File file, LoaderHandler handler, RDFFormat format, String compression)
        throws RDFParseException, IOException {
    log.info("loading file {} ...", file);

    CompressorStreamFactory cf = new CompressorStreamFactory();
    cf.setDecompressConcatenated(true);

    // detect the file compression
    String detectedCompression = detectCompression(file);
    if (compression == null) {
        if (detectedCompression != null) {
            log.info("using auto-detected compression ({})", detectedCompression);
            compression = detectedCompression;
        }
    } else {
        if (detectedCompression != null && !compression.equals(detectedCompression)) {
            log.warn("user-specified compression ({}) overrides auto-detected compression ({})", compression,
                    detectedCompression);
        } else {
            log.info("using user-specified compression ({})", compression);
        }
    }

    // detect the file format
    RDFFormat detectedFormat = Rio.getParserFormatForFileName(uncompressedName(file));
    if (format == null) {
        if (detectedFormat != null) {
            log.info("using auto-detected format ({})", detectedFormat.getName());
            format = detectedFormat;
        } else {
            throw new RDFParseException("could not detect input format of file " + file);
        }
    } else {
        if (detectedFormat != null && !format.equals(detectedFormat)) {
            log.warn("user-specified format ({}) overrides auto-detected format ({})", format.getName(),
                    detectedFormat.getName());
        }
    }

    // create input stream from file and wrap in compressor stream
    InputStream in;
    InputStream fin = new BufferedInputStream(new FileInputStream(file));
    try {
        if (compression != null) {
            if (CompressorStreamFactory.GZIP.equalsIgnoreCase(compression)) {
                in = new GzipCompressorInputStream(fin, true);
            } else if (CompressorStreamFactory.BZIP2.equalsIgnoreCase(compression)) {
                in = new BZip2CompressorInputStream(fin, true);
            } else if (CompressorStreamFactory.XZ.equalsIgnoreCase(compression)) {
                in = new XZCompressorInputStream(fin, true);
            } else {
                // does not honour decompressConcatenated
                in = cf.createCompressorInputStream(compression, fin);
            }
        } else {
            in = cf.createCompressorInputStream(fin);
        }
    } catch (CompressorException ex) {
        log.info("no compression detected, using plain input stream");
        in = fin;
    }

    // load using the input stream
    load(in, handler, format);
}

From source file:org.apache.marmotta.loader.core.MarmottaLoader.java

public void loadArchive(File archive, LoaderHandler handler, RDFFormat format)
        throws RDFParseException, IOException, ArchiveException {
    log.info("loading files in archive {} ...", archive);

    if (archive.exists() && archive.canRead()) {

        if (archive.getName().endsWith("7z")) {
            log.info("auto-detected archive format: 7Z");

            final SevenZFile sevenZFile = new SevenZFile(archive);

            try {
                SevenZArchiveEntry entry;
                while ((entry = sevenZFile.getNextEntry()) != null) {

                    if (!entry.isDirectory()) {
                        log.info("loading entry {} ...", entry.getName());

                        // detect the file format
                        RDFFormat detectedFormat = Rio.getParserFormatForFileName(entry.getName());
                        if (format == null) {
                            if (detectedFormat != null) {
                                log.info("auto-detected entry format: {}", detectedFormat.getName());
                                format = detectedFormat;
                            } else {
                                throw new RDFParseException(
                                        "could not detect input format of entry " + entry.getName());
                            }/*from  w  w w  .  ja v  a 2 s .c o  m*/
                        } else {
                            if (detectedFormat != null && !format.equals(detectedFormat)) {
                                log.warn("user-specified entry format ({}) overrides auto-detected format ({})",
                                        format.getName(), detectedFormat.getName());
                            } else {
                                log.info("user-specified entry format: {}", format.getName());
                            }
                        }

                        load(new InputStream() {
                            @Override
                            public int read() throws IOException {
                                return sevenZFile.read();
                            }

                            @Override
                            public int read(byte[] b) throws IOException {
                                return sevenZFile.read(b);
                            }

                            @Override
                            public int read(byte[] b, int off, int len) throws IOException {
                                return sevenZFile.read(b, off, len);
                            }
                        }, handler, format);
                    }
                }
            } finally {
                sevenZFile.close();
            }

        } else {
            InputStream in;

            String archiveCompression = detectCompression(archive);
            InputStream fin = new BufferedInputStream(new FileInputStream(archive));
            if (archiveCompression != null) {
                if (CompressorStreamFactory.GZIP.equalsIgnoreCase(archiveCompression)) {
                    log.info("auto-detected archive compression: GZIP");
                    in = new GzipCompressorInputStream(fin, true);
                } else if (CompressorStreamFactory.BZIP2.equalsIgnoreCase(archiveCompression)) {
                    log.info("auto-detected archive compression: BZIP2");
                    in = new BZip2CompressorInputStream(fin, true);
                } else if (CompressorStreamFactory.XZ.equalsIgnoreCase(archiveCompression)) {
                    log.info("auto-detected archive compression: XZ");
                    in = new XZCompressorInputStream(fin, true);
                } else {
                    in = fin;
                }
            } else {
                in = fin;
            }

            ArchiveInputStream zipStream = new ArchiveStreamFactory()
                    .createArchiveInputStream(new BufferedInputStream(in));
            logArchiveType(zipStream);

            ArchiveEntry entry;
            while ((entry = zipStream.getNextEntry()) != null) {

                if (!entry.isDirectory()) {
                    log.info("loading entry {} ...", entry.getName());

                    // detect the file format
                    RDFFormat detectedFormat = Rio.getParserFormatForFileName(entry.getName());
                    if (format == null) {
                        if (detectedFormat != null) {
                            log.info("auto-detected entry format: {}", detectedFormat.getName());
                            format = detectedFormat;
                        } else {
                            throw new RDFParseException(
                                    "could not detect input format of entry " + entry.getName());
                        }
                    } else {
                        if (detectedFormat != null && !format.equals(detectedFormat)) {
                            log.warn("user-specified entry format ({}) overrides auto-detected format ({})",
                                    format.getName(), detectedFormat.getName());
                        } else {
                            log.info("user-specified entry format: {}", format.getName());
                        }
                    }

                    load(zipStream, handler, format);
                }
            }
        }

    } else {
        throw new RDFParseException(
                "could not load files from archive " + archive + ": it does not exist or is not readable");
    }

}

From source file:org.apache.marmotta.loader.core.MarmottaLoader.java

/**
 * Detect the compression format from the filename, or null in case auto-detection failed.
 * @param file//from  w  w  w.ja  v a 2  s . c  o  m
 * @return
 */
private String detectCompression(File file) {
    if (BZip2Utils.isCompressedFilename(file.getName())) {
        return CompressorStreamFactory.BZIP2;
    } else if (GzipUtils.isCompressedFilename(file.getName())) {
        return CompressorStreamFactory.GZIP;
    } else if (XZUtils.isCompressedFilename(file.getName())) {
        return CompressorStreamFactory.XZ;
    } else {
        return null;
    }
}

From source file:org.apache.marmotta.loader.core.MarmottaLoader.java

public static Configuration parseOptions(String[] args) throws ParseException {
    Options options = buildOptions();//w ww  . j av  a  2  s  .c  o m

    CommandLineParser parser = new PosixParser();
    CommandLine cmd = parser.parse(options, args);

    Configuration result = new MapConfiguration(new HashMap<String, Object>());

    if (cmd.hasOption('B')) {
        // check backends
        Set<String> existing = Sets
                .newHashSet(Iterators.transform(backends.iterator(), new BackendIdentifierFunction()));
        if (!existing.contains(cmd.getOptionValue('B'))) {
            throw new ParseException("the backend " + cmd.getOptionValue('B') + " does not exist");
        }

        result.setProperty(LoaderOptions.BACKEND, cmd.getOptionValue('B'));
    }

    if (cmd.hasOption('b')) {
        result.setProperty(LoaderOptions.BASE_URI, cmd.getOptionValue('b'));
    }

    if (cmd.hasOption('z')) {
        result.setProperty(LoaderOptions.COMPRESSION, CompressorStreamFactory.GZIP);
    }

    if (cmd.hasOption('j')) {
        result.setProperty(LoaderOptions.COMPRESSION, CompressorStreamFactory.BZIP2);
    }

    if (cmd.hasOption('c')) {
        result.setProperty(LoaderOptions.CONTEXT, cmd.getOptionValue('c'));
    }

    if (cmd.hasOption('t')) {
        RDFFormat fmt = getRDFFormat(cmd.getOptionValue('t'));
        if (fmt == null) {
            throw new ParseException("unrecognized MIME type: " + cmd.getOptionValue('t'));
        }

        result.setProperty(LoaderOptions.FORMAT, fmt.getDefaultMIMEType());
    }

    if (cmd.hasOption('f')) {
        result.setProperty(LoaderOptions.FILES, Arrays.asList(cmd.getOptionValues('f')));
    }

    if (cmd.hasOption('d')) {
        result.setProperty(LoaderOptions.DIRS, Arrays.asList(cmd.getOptionValues('d')));
    }

    if (cmd.hasOption('a')) {
        result.setProperty(LoaderOptions.ARCHIVES, Arrays.asList(cmd.getOptionValues('a')));
    }

    if (cmd.hasOption('s')) {
        result.setProperty(LoaderOptions.STATISTICS_ENABLED, true);
        result.setProperty(LoaderOptions.STATISTICS_GRAPH, cmd.getOptionValue('s'));
    }

    if (cmd.hasOption('D')) {
        for (Map.Entry e : cmd.getOptionProperties("D").entrySet()) {
            result.setProperty(e.getKey().toString(), e.getValue());
        }
    }

    for (LoaderBackend b : backends) {
        for (Option option : b.getOptions()) {
            if (cmd.hasOption(option.getOpt())) {
                String key = String.format("backend.%s.%s", b.getIdentifier(),
                        option.getLongOpt() != null ? option.getLongOpt() : option.getOpt());
                if (option.hasArg()) {
                    if (option.hasArgs()) {
                        result.setProperty(key, Arrays.asList(cmd.getOptionValues(option.getOpt())));
                    } else {
                        result.setProperty(key, cmd.getOptionValue(option.getOpt()));
                    }
                } else {
                    result.setProperty(key, true);
                }
            }
        }
    }

    return result;
}

From source file:org.apache.marmotta.loader.core.test.CLITest.java

@Test
public void testCompressionGZIP() throws ParseException {
    Configuration cfg = MarmottaLoader.parseOptions(new String[] { "-z", "-f", "file1.ttl" });

    Assert.assertNotNull(cfg.getString(LoaderOptions.COMPRESSION));
    Assert.assertEquals(CompressorStreamFactory.GZIP, cfg.getString(LoaderOptions.COMPRESSION));
}

From source file:org.apache.marmotta.loader.core.test.FilesTest.java

@Parameterized.Parameters
public static Collection<Object[]> data() {
    Object[][] data = new Object[][] { { null, "demo-data.rdf" },
            { CompressorStreamFactory.GZIP, "demo-data.rdf.gz" },
            { CompressorStreamFactory.BZIP2, "demo-data.rdf.bz2" } };
    return Arrays.asList(data);
}

From source file:org.dataconservancy.packaging.tool.impl.BOREMPackageGeneratorTest.java

/**
 * Tests generating a well formed package, with all required parameters and the following options:
 * <ul>//  w w  w.  j  av a 2 s  .c  o m
 *     <li>checksum alg: md5</li>
 *     <li>compression-format: gz</li>
 *     <li>archiving-format: not specified</li>
 * </ul>
 *
 * <p/>
 *
 * Expects the de-compressed, deserialized package to contain:
 * <ul>
 *     <li>bag-info.txt file: Besides the input parameters, bag-info.txt file is expected to contain reference
 *     to the ReM of the whole package, expressed in PKG-ORE-REM parameter</li>
 *     <li>bagit.txt file</li>
 *     <li>manifest-<checksumalg>.txt files</checksumalg></li>
 *     <li>tagmanifest-<checksumalg>.txt files</checksumalg></li>
 *     <li>data/ folder</li>
 *     <li>payload files in data/ folder</li>
 *     <li>ORE-REM folder</li>
 *     <li>description files in ORE-REM/folder</li>
 * </ul>
 *
 *
 * @throws CompressorException
 * @throws ArchiveException
 * @throws IOException
 */
@Test
public void testGeneratingAGoodPackage() throws CompressorException, ArchiveException, IOException {
    params.addParam(GeneralParameterNames.PACKAGE_FORMAT_ID, PackagingFormat.BOREM.toString());
    params.addParam(GeneralParameterNames.PACKAGE_NAME, packageName);
    params.addParam(GeneralParameterNames.PACKAGE_LOCATION, packageLocationName);
    params.addParam(GeneralParameterNames.PACKAGE_STAGING_LOCATION, packageStagingLocationName);
    params.addParam(BagItParameterNames.BAGIT_PROFILE_ID, bagItProfileId);
    params.addParam(BagItParameterNames.CONTACT_NAME, contactName);
    params.addParam(BagItParameterNames.CONTACT_EMAIL, contactEmail);
    params.addParam(BagItParameterNames.CONTACT_PHONE, contactPhone);
    params.addParam(GeneralParameterNames.CHECKSUM_ALGORITHMS, checksumAlg);
    params.addParam(BagItParameterNames.COMPRESSION_FORMAT, compressionFormat);
    params.addParam(BagItParameterNames.PKG_BAG_DIR, packageName);
    // params.addParam(GeneralParameterNames.CONTENT_ROOT_LOCATION, pkgBagDir);
    params.addParam(GeneralParameterNames.CONTENT_ROOT_LOCATION, contentRootLocation);
    Package resultedPackage = underTest.generatePackage(desc, params);

    //Decompress and de archive files
    CompressorInputStream cis = new CompressorStreamFactory()
            .createCompressorInputStream(CompressorStreamFactory.GZIP, resultedPackage.serialize());
    TarArchiveInputStream ais = (TarArchiveInputStream) new ArchiveStreamFactory()
            .createArchiveInputStream(ArchiveStreamFactory.TAR, cis);

    //get files from archive
    Set<String> files = new HashSet<String>();
    ArchiveEntry entry = ais.getNextEntry();
    while (entry != null) {
        files.add(entry.getName().replace("\\", "/"));
        if (entry.getName().equals(packageName + "/bag-info.txt") && ais.canReadEntryData(entry)) {
            verifyBagInfoContent(ais);
        }
        if (entry.getName()
                .equals(packageName + "/data/ProjectOne/Collection One/DataItem One/" + dataFileOneName)) {
            compareDataFile(ais, pathToFileOne);
        }
        if (entry.getName()
                .equals(packageName + "/data/ProjectOne/Collection One/DataItem One/" + dataFileTwoName)) {
            compareDataFile(ais, pathToFileTwo);
        }
        entry = ais.getNextEntry();
    }
    assertTrue(files.contains(packageName + "/bag-info.txt"));
    assertTrue(files.contains(packageName + "/bagit.txt"));
    assertTrue(files.contains(packageName + "/tagmanifest-md5.txt"));
    assertTrue(files.contains(packageName + "/manifest-md5.txt"));
    assertTrue(files.contains(packageName + "/data/"));
    assertTrue(files.contains(packageName + "/data/ProjectOne/Collection One/DataItem One/" + dataFileOneName));
    assertTrue(files.contains(packageName + "/data/ProjectOne/Collection One/DataItem One/" + dataFileTwoName));
    assertTrue(files.contains(packageName + "/ORE-REM/"));

    assertTrue(SupportedMimeTypes.getMimeType(compressionFormat).contains(resultedPackage.getContentType()));

}