Example usage for org.apache.commons.compress.compressors CompressorStreamFactory BZIP2

Introduction

In this page you can find the example usage for org.apache.commons.compress.compressors CompressorStreamFactory BZIP2.

Prototype

String BZIP2

To view the source code for org.apache.commons.compress.compressors CompressorStreamFactory BZIP2.

Click Source Link

Document

Constant used to identify the BZIP2 compression algorithm.

Usage

From source file:com.msd.gin.halyard.tools.HalyardExportTest.java

@Test
public void testExport_TTL_BZ2() throws Exception {
    HalyardExport.main(/*  w  w  w . j  a va 2 s.  c o m*/
            new String[] { "-s", TABLE, "-q", GRAPH_QUERY, "-t", ROOT + name.getMethodName() + ".ttl.bz2" });
    assertEquals(1000, getTriplesCount(ROOT + name.getMethodName() + ".ttl.bz2", CompressorStreamFactory.BZIP2,
            RDFFormat.TURTLE));
}

From source file:com.gitblit.utils.CompressionUtils.java

/**
 * tar.bzip2 the contents of the tree at the (optionally) specified revision and
 * the (optionally) specified basepath to the supplied outputstream.
 * // w  w w .j  ava  2  s. co  m
 * @param repository
 * @param basePath
 *            if unspecified, entire repository is assumed.
 * @param objectId
 *            if unspecified, HEAD is assumed.
 * @param os
 * @return true if repository was successfully zipped to supplied output
 *         stream
 */
public static boolean bzip2(Repository repository, String basePath, String objectId, OutputStream os) {

    return tar(CompressorStreamFactory.BZIP2, repository, basePath, objectId, os);
}

From source file:com.msd.gin.halyard.tools.HalyardExport.java

/**
 * Export function is called for the export execution with given arguments.
 * @param conf Hadoop Configuration instance
 * @param log StatusLog notification service implementation for back-calls
 * @param source String source HTable name
 * @param query String SPARQL Graph query
 * @param targetUrl String URL of the target system (+folder or schema, +table or file name)
 * @param driverClass String JDBC Driver class name (for JDBC export only)
 * @param driverClasspath Array of URLs with JDBC Driver classpath (for JDB export only)
 * @param jdbcProperties Arrays of String JDBC connection properties (for JDB export only)
 * @param trimTable boolean option to trim target JDBC table before export (for JDB export only)
 * @throws ExportException in case of an export problem
 *//* w  w  w .  j a  v  a 2s  . c  o m*/
public static void export(Configuration conf, StatusLog log, String source, String query, String targetUrl,
        String driverClass, URL[] driverClasspath, String[] jdbcProperties, boolean trimTable)
        throws ExportException {
    try {
        QueryResultWriter writer = null;
        if (targetUrl.startsWith("file:") || targetUrl.startsWith("hdfs:")) {
            OutputStream out = FileSystem.get(URI.create(targetUrl), conf).create(new Path(targetUrl));
            try {
                if (targetUrl.endsWith(".bz2")) {
                    out = new CompressorStreamFactory()
                            .createCompressorOutputStream(CompressorStreamFactory.BZIP2, out);
                    targetUrl = targetUrl.substring(0, targetUrl.length() - 4);
                } else if (targetUrl.endsWith(".gz")) {
                    out = new CompressorStreamFactory()
                            .createCompressorOutputStream(CompressorStreamFactory.GZIP, out);
                    targetUrl = targetUrl.substring(0, targetUrl.length() - 3);
                }
            } catch (CompressorException e) {
                IOUtils.closeQuietly(out);
                throw new ExportException(e);
            }
            if (targetUrl.endsWith(".csv")) {
                writer = new CSVResultWriter(log, out);
            } else {
                Optional<RDFFormat> form = Rio.getWriterFormatForFileName(targetUrl);
                if (!form.isPresent())
                    throw new ExportException("Unsupported target file format extension: " + targetUrl);
                writer = new RIOResultWriter(log, form.get(), out);
            }
        } else if (targetUrl.startsWith("jdbc:")) {
            int i = targetUrl.lastIndexOf('/');
            if (i < 0)
                throw new ExportException("Taret URL does not end with /<table_name>");
            if (driverClass == null)
                throw new ExportException(
                        "Missing mandatory JDBC driver class name argument -c <driver_class>");
            writer = new JDBCResultWriter(log, targetUrl.substring(0, i), targetUrl.substring(i + 1),
                    jdbcProperties, driverClass, driverClasspath, trimTable);
        } else {
            throw new ExportException("Unsupported target URL protocol " + targetUrl);
        }
        new HalyardExport(source, query, writer, log).run(conf);
    } catch (IOException e) {
        throw new ExportException(e);
    }
}

From source file:org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTaskTest.java

private void doReadTest(Path file, Type fileType, String expTitle, String expDate, String expBody)
        throws Exception {
    InputStream in = Files.newInputStream(file);
    switch (fileType) {
    case BZIP2://from   w ww.j a  v a  2s  .c  om
        in = csFactory.createCompressorInputStream(CompressorStreamFactory.BZIP2, in);
        break;
    case GZIP:
        in = csFactory.createCompressorInputStream(CompressorStreamFactory.GZIP, in);
        break;
    case PLAIN:
        break; // nothing to do
    default:
        assertFalse("Unknown file type!", true); //fail, should not happen
    }
    try (BufferedReader br = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8))) {
        String line = br.readLine();
        assertHeaderLine(line);
        line = br.readLine();
        assertNotNull(line);
        String[] parts = line.split(Character.toString(WriteLineDocTask.SEP));
        int numExpParts = expBody == null ? 2 : 3;
        assertEquals(numExpParts, parts.length);
        assertEquals(expTitle, parts[0]);
        assertEquals(expDate, parts[1]);
        if (expBody != null) {
            assertEquals(expBody, parts[2]);
        }
        assertNull(br.readLine());
    }
}

From source file:org.apache.lucene.benchmark.byTask.utils.StreamUtilsTest.java

private Path rawBzip2File(String ext) throws Exception {
    Path f = testDir.resolve("testfile." + ext);
    OutputStream os = new CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.BZIP2,
            Files.newOutputStream(f));
    writeText(os);/*w w  w  .ja  v a 2 s. c  om*/
    return f;
}

From source file:org.apache.marmotta.loader.core.MarmottaLoader.java

/**
 * Load data from the reader given as first argument into the handler given as second argument.
 *
 * @param file     file to read the data from; in case a compression format is not explicitly given, the method will
 *                 try to decide from the file name if the file is in a compressed format
 * @param handler  handler to add the data to
 * @param format   format to use for creating the parser or null for auto-detection
 * @param compression compression format to use, or null for auto-detection (see formats in org.apache.commons.compress.compressors.CompressorStreamFactory)
 * @throws RDFParseException// w w w  .  j av a  2 s  .  c  o  m
 * @throws IOException
 */
public void loadFile(File file, LoaderHandler handler, RDFFormat format, String compression)
        throws RDFParseException, IOException {
    log.info("loading file {} ...", file);

    CompressorStreamFactory cf = new CompressorStreamFactory();
    cf.setDecompressConcatenated(true);

    // detect the file compression
    String detectedCompression = detectCompression(file);
    if (compression == null) {
        if (detectedCompression != null) {
            log.info("using auto-detected compression ({})", detectedCompression);
            compression = detectedCompression;
        }
    } else {
        if (detectedCompression != null && !compression.equals(detectedCompression)) {
            log.warn("user-specified compression ({}) overrides auto-detected compression ({})", compression,
                    detectedCompression);
        } else {
            log.info("using user-specified compression ({})", compression);
        }
    }

    // detect the file format
    RDFFormat detectedFormat = Rio.getParserFormatForFileName(uncompressedName(file));
    if (format == null) {
        if (detectedFormat != null) {
            log.info("using auto-detected format ({})", detectedFormat.getName());
            format = detectedFormat;
        } else {
            throw new RDFParseException("could not detect input format of file " + file);
        }
    } else {
        if (detectedFormat != null && !format.equals(detectedFormat)) {
            log.warn("user-specified format ({}) overrides auto-detected format ({})", format.getName(),
                    detectedFormat.getName());
        }
    }

    // create input stream from file and wrap in compressor stream
    InputStream in;
    InputStream fin = new BufferedInputStream(new FileInputStream(file));
    try {
        if (compression != null) {
            if (CompressorStreamFactory.GZIP.equalsIgnoreCase(compression)) {
                in = new GzipCompressorInputStream(fin, true);
            } else if (CompressorStreamFactory.BZIP2.equalsIgnoreCase(compression)) {
                in = new BZip2CompressorInputStream(fin, true);
            } else if (CompressorStreamFactory.XZ.equalsIgnoreCase(compression)) {
                in = new XZCompressorInputStream(fin, true);
            } else {
                // does not honour decompressConcatenated
                in = cf.createCompressorInputStream(compression, fin);
            }
        } else {
            in = cf.createCompressorInputStream(fin);
        }
    } catch (CompressorException ex) {
        log.info("no compression detected, using plain input stream");
        in = fin;
    }

    // load using the input stream
    load(in, handler, format);
}

From source file:org.apache.marmotta.loader.core.MarmottaLoader.java

public void loadArchive(File archive, LoaderHandler handler, RDFFormat format)
        throws RDFParseException, IOException, ArchiveException {
    log.info("loading files in archive {} ...", archive);

    if (archive.exists() && archive.canRead()) {

        if (archive.getName().endsWith("7z")) {
            log.info("auto-detected archive format: 7Z");

            final SevenZFile sevenZFile = new SevenZFile(archive);

            try {
                SevenZArchiveEntry entry;
                while ((entry = sevenZFile.getNextEntry()) != null) {

                    if (!entry.isDirectory()) {
                        log.info("loading entry {} ...", entry.getName());

                        // detect the file format
                        RDFFormat detectedFormat = Rio.getParserFormatForFileName(entry.getName());
                        if (format == null) {
                            if (detectedFormat != null) {
                                log.info("auto-detected entry format: {}", detectedFormat.getName());
                                format = detectedFormat;
                            } else {
                                throw new RDFParseException(
                                        "could not detect input format of entry " + entry.getName());
                            }//from  ww w . j a  v a 2s  . c o  m
                        } else {
                            if (detectedFormat != null && !format.equals(detectedFormat)) {
                                log.warn("user-specified entry format ({}) overrides auto-detected format ({})",
                                        format.getName(), detectedFormat.getName());
                            } else {
                                log.info("user-specified entry format: {}", format.getName());
                            }
                        }

                        load(new InputStream() {
                            @Override
                            public int read() throws IOException {
                                return sevenZFile.read();
                            }

                            @Override
                            public int read(byte[] b) throws IOException {
                                return sevenZFile.read(b);
                            }

                            @Override
                            public int read(byte[] b, int off, int len) throws IOException {
                                return sevenZFile.read(b, off, len);
                            }
                        }, handler, format);
                    }
                }
            } finally {
                sevenZFile.close();
            }

        } else {
            InputStream in;

            String archiveCompression = detectCompression(archive);
            InputStream fin = new BufferedInputStream(new FileInputStream(archive));
            if (archiveCompression != null) {
                if (CompressorStreamFactory.GZIP.equalsIgnoreCase(archiveCompression)) {
                    log.info("auto-detected archive compression: GZIP");
                    in = new GzipCompressorInputStream(fin, true);
                } else if (CompressorStreamFactory.BZIP2.equalsIgnoreCase(archiveCompression)) {
                    log.info("auto-detected archive compression: BZIP2");
                    in = new BZip2CompressorInputStream(fin, true);
                } else if (CompressorStreamFactory.XZ.equalsIgnoreCase(archiveCompression)) {
                    log.info("auto-detected archive compression: XZ");
                    in = new XZCompressorInputStream(fin, true);
                } else {
                    in = fin;
                }
            } else {
                in = fin;
            }

            ArchiveInputStream zipStream = new ArchiveStreamFactory()
                    .createArchiveInputStream(new BufferedInputStream(in));
            logArchiveType(zipStream);

            ArchiveEntry entry;
            while ((entry = zipStream.getNextEntry()) != null) {

                if (!entry.isDirectory()) {
                    log.info("loading entry {} ...", entry.getName());

                    // detect the file format
                    RDFFormat detectedFormat = Rio.getParserFormatForFileName(entry.getName());
                    if (format == null) {
                        if (detectedFormat != null) {
                            log.info("auto-detected entry format: {}", detectedFormat.getName());
                            format = detectedFormat;
                        } else {
                            throw new RDFParseException(
                                    "could not detect input format of entry " + entry.getName());
                        }
                    } else {
                        if (detectedFormat != null && !format.equals(detectedFormat)) {
                            log.warn("user-specified entry format ({}) overrides auto-detected format ({})",
                                    format.getName(), detectedFormat.getName());
                        } else {
                            log.info("user-specified entry format: {}", format.getName());
                        }
                    }

                    load(zipStream, handler, format);
                }
            }
        }

    } else {
        throw new RDFParseException(
                "could not load files from archive " + archive + ": it does not exist or is not readable");
    }

}

From source file:org.apache.marmotta.loader.core.MarmottaLoader.java

/**
 * Detect the compression format from the filename, or null in case auto-detection failed.
 * @param file//from w  w w  . j av  a2 s .  c o m
 * @return
 */
private String detectCompression(File file) {
    if (BZip2Utils.isCompressedFilename(file.getName())) {
        return CompressorStreamFactory.BZIP2;
    } else if (GzipUtils.isCompressedFilename(file.getName())) {
        return CompressorStreamFactory.GZIP;
    } else if (XZUtils.isCompressedFilename(file.getName())) {
        return CompressorStreamFactory.XZ;
    } else {
        return null;
    }
}

From source file:org.apache.marmotta.loader.core.MarmottaLoader.java

public static Configuration parseOptions(String[] args) throws ParseException {
    Options options = buildOptions();/*from w  w  w.j  a v a2s  . c om*/

    CommandLineParser parser = new PosixParser();
    CommandLine cmd = parser.parse(options, args);

    Configuration result = new MapConfiguration(new HashMap<String, Object>());

    if (cmd.hasOption('B')) {
        // check backends
        Set<String> existing = Sets
                .newHashSet(Iterators.transform(backends.iterator(), new BackendIdentifierFunction()));
        if (!existing.contains(cmd.getOptionValue('B'))) {
            throw new ParseException("the backend " + cmd.getOptionValue('B') + " does not exist");
        }

        result.setProperty(LoaderOptions.BACKEND, cmd.getOptionValue('B'));
    }

    if (cmd.hasOption('b')) {
        result.setProperty(LoaderOptions.BASE_URI, cmd.getOptionValue('b'));
    }

    if (cmd.hasOption('z')) {
        result.setProperty(LoaderOptions.COMPRESSION, CompressorStreamFactory.GZIP);
    }

    if (cmd.hasOption('j')) {
        result.setProperty(LoaderOptions.COMPRESSION, CompressorStreamFactory.BZIP2);
    }

    if (cmd.hasOption('c')) {
        result.setProperty(LoaderOptions.CONTEXT, cmd.getOptionValue('c'));
    }

    if (cmd.hasOption('t')) {
        RDFFormat fmt = getRDFFormat(cmd.getOptionValue('t'));
        if (fmt == null) {
            throw new ParseException("unrecognized MIME type: " + cmd.getOptionValue('t'));
        }

        result.setProperty(LoaderOptions.FORMAT, fmt.getDefaultMIMEType());
    }

    if (cmd.hasOption('f')) {
        result.setProperty(LoaderOptions.FILES, Arrays.asList(cmd.getOptionValues('f')));
    }

    if (cmd.hasOption('d')) {
        result.setProperty(LoaderOptions.DIRS, Arrays.asList(cmd.getOptionValues('d')));
    }

    if (cmd.hasOption('a')) {
        result.setProperty(LoaderOptions.ARCHIVES, Arrays.asList(cmd.getOptionValues('a')));
    }

    if (cmd.hasOption('s')) {
        result.setProperty(LoaderOptions.STATISTICS_ENABLED, true);
        result.setProperty(LoaderOptions.STATISTICS_GRAPH, cmd.getOptionValue('s'));
    }

    if (cmd.hasOption('D')) {
        for (Map.Entry e : cmd.getOptionProperties("D").entrySet()) {
            result.setProperty(e.getKey().toString(), e.getValue());
        }
    }

    for (LoaderBackend b : backends) {
        for (Option option : b.getOptions()) {
            if (cmd.hasOption(option.getOpt())) {
                String key = String.format("backend.%s.%s", b.getIdentifier(),
                        option.getLongOpt() != null ? option.getLongOpt() : option.getOpt());
                if (option.hasArg()) {
                    if (option.hasArgs()) {
                        result.setProperty(key, Arrays.asList(cmd.getOptionValues(option.getOpt())));
                    } else {
                        result.setProperty(key, cmd.getOptionValue(option.getOpt()));
                    }
                } else {
                    result.setProperty(key, true);
                }
            }
        }
    }

    return result;
}

From source file:org.apache.marmotta.loader.core.test.CLITest.java

@Test
public void testCompressionBZIP2() throws ParseException {
    Configuration cfg = MarmottaLoader.parseOptions(new String[] { "-j", "-f", "file1.ttl" });

    Assert.assertNotNull(cfg.getString(LoaderOptions.COMPRESSION));
    Assert.assertEquals(CompressorStreamFactory.BZIP2, cfg.getString(LoaderOptions.COMPRESSION));
}