List of usage examples for org.apache.commons.compress.compressors CompressorStreamFactory BZIP2
String BZIP2
To view the source code for org.apache.commons.compress.compressors CompressorStreamFactory BZIP2.
Click Source Link
From source file:com.msd.gin.halyard.tools.HalyardExportTest.java
@Test public void testExport_TTL_BZ2() throws Exception { HalyardExport.main(/* w w w . j a va 2 s. c o m*/ new String[] { "-s", TABLE, "-q", GRAPH_QUERY, "-t", ROOT + name.getMethodName() + ".ttl.bz2" }); assertEquals(1000, getTriplesCount(ROOT + name.getMethodName() + ".ttl.bz2", CompressorStreamFactory.BZIP2, RDFFormat.TURTLE)); }
From source file:com.gitblit.utils.CompressionUtils.java
/** * tar.bzip2 the contents of the tree at the (optionally) specified revision and * the (optionally) specified basepath to the supplied outputstream. * // w w w .j ava 2 s. co m * @param repository * @param basePath * if unspecified, entire repository is assumed. * @param objectId * if unspecified, HEAD is assumed. * @param os * @return true if repository was successfully zipped to supplied output * stream */ public static boolean bzip2(Repository repository, String basePath, String objectId, OutputStream os) { return tar(CompressorStreamFactory.BZIP2, repository, basePath, objectId, os); }
From source file:com.msd.gin.halyard.tools.HalyardExport.java
/** * Export function is called for the export execution with given arguments. * @param conf Hadoop Configuration instance * @param log StatusLog notification service implementation for back-calls * @param source String source HTable name * @param query String SPARQL Graph query * @param targetUrl String URL of the target system (+folder or schema, +table or file name) * @param driverClass String JDBC Driver class name (for JDBC export only) * @param driverClasspath Array of URLs with JDBC Driver classpath (for JDB export only) * @param jdbcProperties Arrays of String JDBC connection properties (for JDB export only) * @param trimTable boolean option to trim target JDBC table before export (for JDB export only) * @throws ExportException in case of an export problem *//* w w w . j a v a 2s . c o m*/ public static void export(Configuration conf, StatusLog log, String source, String query, String targetUrl, String driverClass, URL[] driverClasspath, String[] jdbcProperties, boolean trimTable) throws ExportException { try { QueryResultWriter writer = null; if (targetUrl.startsWith("file:") || targetUrl.startsWith("hdfs:")) { OutputStream out = FileSystem.get(URI.create(targetUrl), conf).create(new Path(targetUrl)); try { if (targetUrl.endsWith(".bz2")) { out = new CompressorStreamFactory() .createCompressorOutputStream(CompressorStreamFactory.BZIP2, out); targetUrl = targetUrl.substring(0, targetUrl.length() - 4); } else if (targetUrl.endsWith(".gz")) { out = new CompressorStreamFactory() .createCompressorOutputStream(CompressorStreamFactory.GZIP, out); targetUrl = targetUrl.substring(0, targetUrl.length() - 3); } } catch (CompressorException e) { IOUtils.closeQuietly(out); throw new ExportException(e); } if (targetUrl.endsWith(".csv")) { writer = new CSVResultWriter(log, out); } else { Optional<RDFFormat> form = Rio.getWriterFormatForFileName(targetUrl); if (!form.isPresent()) throw new ExportException("Unsupported target file format extension: " + targetUrl); writer = new RIOResultWriter(log, form.get(), out); } } else if (targetUrl.startsWith("jdbc:")) { int i = targetUrl.lastIndexOf('/'); if (i < 0) throw new ExportException("Taret URL does not end with /<table_name>"); if (driverClass == null) throw new ExportException( "Missing mandatory JDBC driver class name argument -c <driver_class>"); writer = new JDBCResultWriter(log, targetUrl.substring(0, i), targetUrl.substring(i + 1), jdbcProperties, driverClass, driverClasspath, trimTable); } else { throw new ExportException("Unsupported target URL protocol " + targetUrl); } new HalyardExport(source, query, writer, log).run(conf); } catch (IOException e) { throw new ExportException(e); } }
From source file:org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTaskTest.java
private void doReadTest(Path file, Type fileType, String expTitle, String expDate, String expBody) throws Exception { InputStream in = Files.newInputStream(file); switch (fileType) { case BZIP2://from w ww.j a v a 2s .c om in = csFactory.createCompressorInputStream(CompressorStreamFactory.BZIP2, in); break; case GZIP: in = csFactory.createCompressorInputStream(CompressorStreamFactory.GZIP, in); break; case PLAIN: break; // nothing to do default: assertFalse("Unknown file type!", true); //fail, should not happen } try (BufferedReader br = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8))) { String line = br.readLine(); assertHeaderLine(line); line = br.readLine(); assertNotNull(line); String[] parts = line.split(Character.toString(WriteLineDocTask.SEP)); int numExpParts = expBody == null ? 2 : 3; assertEquals(numExpParts, parts.length); assertEquals(expTitle, parts[0]); assertEquals(expDate, parts[1]); if (expBody != null) { assertEquals(expBody, parts[2]); } assertNull(br.readLine()); } }
From source file:org.apache.lucene.benchmark.byTask.utils.StreamUtilsTest.java
private Path rawBzip2File(String ext) throws Exception { Path f = testDir.resolve("testfile." + ext); OutputStream os = new CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.BZIP2, Files.newOutputStream(f)); writeText(os);/*w w w .ja v a 2 s. c om*/ return f; }
From source file:org.apache.marmotta.loader.core.MarmottaLoader.java
/** * Load data from the reader given as first argument into the handler given as second argument. * * @param file file to read the data from; in case a compression format is not explicitly given, the method will * try to decide from the file name if the file is in a compressed format * @param handler handler to add the data to * @param format format to use for creating the parser or null for auto-detection * @param compression compression format to use, or null for auto-detection (see formats in org.apache.commons.compress.compressors.CompressorStreamFactory) * @throws RDFParseException// w w w . j av a 2 s . c o m * @throws IOException */ public void loadFile(File file, LoaderHandler handler, RDFFormat format, String compression) throws RDFParseException, IOException { log.info("loading file {} ...", file); CompressorStreamFactory cf = new CompressorStreamFactory(); cf.setDecompressConcatenated(true); // detect the file compression String detectedCompression = detectCompression(file); if (compression == null) { if (detectedCompression != null) { log.info("using auto-detected compression ({})", detectedCompression); compression = detectedCompression; } } else { if (detectedCompression != null && !compression.equals(detectedCompression)) { log.warn("user-specified compression ({}) overrides auto-detected compression ({})", compression, detectedCompression); } else { log.info("using user-specified compression ({})", compression); } } // detect the file format RDFFormat detectedFormat = Rio.getParserFormatForFileName(uncompressedName(file)); if (format == null) { if (detectedFormat != null) { log.info("using auto-detected format ({})", detectedFormat.getName()); format = detectedFormat; } else { throw new RDFParseException("could not detect input format of file " + file); } } else { if (detectedFormat != null && !format.equals(detectedFormat)) { log.warn("user-specified format ({}) overrides auto-detected format ({})", format.getName(), detectedFormat.getName()); } } // create input stream from file and wrap in compressor stream InputStream in; InputStream fin = new BufferedInputStream(new FileInputStream(file)); try { if (compression != null) { if (CompressorStreamFactory.GZIP.equalsIgnoreCase(compression)) { in = new GzipCompressorInputStream(fin, true); } else if (CompressorStreamFactory.BZIP2.equalsIgnoreCase(compression)) { in = new BZip2CompressorInputStream(fin, true); } else if (CompressorStreamFactory.XZ.equalsIgnoreCase(compression)) { in = new XZCompressorInputStream(fin, true); } else { // does not honour decompressConcatenated in = cf.createCompressorInputStream(compression, fin); } } else { in = cf.createCompressorInputStream(fin); } } catch (CompressorException ex) { log.info("no compression detected, using plain input stream"); in = fin; } // load using the input stream load(in, handler, format); }
From source file:org.apache.marmotta.loader.core.MarmottaLoader.java
public void loadArchive(File archive, LoaderHandler handler, RDFFormat format) throws RDFParseException, IOException, ArchiveException { log.info("loading files in archive {} ...", archive); if (archive.exists() && archive.canRead()) { if (archive.getName().endsWith("7z")) { log.info("auto-detected archive format: 7Z"); final SevenZFile sevenZFile = new SevenZFile(archive); try { SevenZArchiveEntry entry; while ((entry = sevenZFile.getNextEntry()) != null) { if (!entry.isDirectory()) { log.info("loading entry {} ...", entry.getName()); // detect the file format RDFFormat detectedFormat = Rio.getParserFormatForFileName(entry.getName()); if (format == null) { if (detectedFormat != null) { log.info("auto-detected entry format: {}", detectedFormat.getName()); format = detectedFormat; } else { throw new RDFParseException( "could not detect input format of entry " + entry.getName()); }//from ww w . j a v a 2s . c o m } else { if (detectedFormat != null && !format.equals(detectedFormat)) { log.warn("user-specified entry format ({}) overrides auto-detected format ({})", format.getName(), detectedFormat.getName()); } else { log.info("user-specified entry format: {}", format.getName()); } } load(new InputStream() { @Override public int read() throws IOException { return sevenZFile.read(); } @Override public int read(byte[] b) throws IOException { return sevenZFile.read(b); } @Override public int read(byte[] b, int off, int len) throws IOException { return sevenZFile.read(b, off, len); } }, handler, format); } } } finally { sevenZFile.close(); } } else { InputStream in; String archiveCompression = detectCompression(archive); InputStream fin = new BufferedInputStream(new FileInputStream(archive)); if (archiveCompression != null) { if (CompressorStreamFactory.GZIP.equalsIgnoreCase(archiveCompression)) { log.info("auto-detected archive compression: GZIP"); in = new GzipCompressorInputStream(fin, true); } else if (CompressorStreamFactory.BZIP2.equalsIgnoreCase(archiveCompression)) { log.info("auto-detected archive compression: BZIP2"); in = new BZip2CompressorInputStream(fin, true); } else if (CompressorStreamFactory.XZ.equalsIgnoreCase(archiveCompression)) { log.info("auto-detected archive compression: XZ"); in = new XZCompressorInputStream(fin, true); } else { in = fin; } } else { in = fin; } ArchiveInputStream zipStream = new ArchiveStreamFactory() .createArchiveInputStream(new BufferedInputStream(in)); logArchiveType(zipStream); ArchiveEntry entry; while ((entry = zipStream.getNextEntry()) != null) { if (!entry.isDirectory()) { log.info("loading entry {} ...", entry.getName()); // detect the file format RDFFormat detectedFormat = Rio.getParserFormatForFileName(entry.getName()); if (format == null) { if (detectedFormat != null) { log.info("auto-detected entry format: {}", detectedFormat.getName()); format = detectedFormat; } else { throw new RDFParseException( "could not detect input format of entry " + entry.getName()); } } else { if (detectedFormat != null && !format.equals(detectedFormat)) { log.warn("user-specified entry format ({}) overrides auto-detected format ({})", format.getName(), detectedFormat.getName()); } else { log.info("user-specified entry format: {}", format.getName()); } } load(zipStream, handler, format); } } } } else { throw new RDFParseException( "could not load files from archive " + archive + ": it does not exist or is not readable"); } }
From source file:org.apache.marmotta.loader.core.MarmottaLoader.java
/** * Detect the compression format from the filename, or null in case auto-detection failed. * @param file//from w w w . j av a2 s . c o m * @return */ private String detectCompression(File file) { if (BZip2Utils.isCompressedFilename(file.getName())) { return CompressorStreamFactory.BZIP2; } else if (GzipUtils.isCompressedFilename(file.getName())) { return CompressorStreamFactory.GZIP; } else if (XZUtils.isCompressedFilename(file.getName())) { return CompressorStreamFactory.XZ; } else { return null; } }
From source file:org.apache.marmotta.loader.core.MarmottaLoader.java
public static Configuration parseOptions(String[] args) throws ParseException { Options options = buildOptions();/*from w w w.j a v a2s . c om*/ CommandLineParser parser = new PosixParser(); CommandLine cmd = parser.parse(options, args); Configuration result = new MapConfiguration(new HashMap<String, Object>()); if (cmd.hasOption('B')) { // check backends Set<String> existing = Sets .newHashSet(Iterators.transform(backends.iterator(), new BackendIdentifierFunction())); if (!existing.contains(cmd.getOptionValue('B'))) { throw new ParseException("the backend " + cmd.getOptionValue('B') + " does not exist"); } result.setProperty(LoaderOptions.BACKEND, cmd.getOptionValue('B')); } if (cmd.hasOption('b')) { result.setProperty(LoaderOptions.BASE_URI, cmd.getOptionValue('b')); } if (cmd.hasOption('z')) { result.setProperty(LoaderOptions.COMPRESSION, CompressorStreamFactory.GZIP); } if (cmd.hasOption('j')) { result.setProperty(LoaderOptions.COMPRESSION, CompressorStreamFactory.BZIP2); } if (cmd.hasOption('c')) { result.setProperty(LoaderOptions.CONTEXT, cmd.getOptionValue('c')); } if (cmd.hasOption('t')) { RDFFormat fmt = getRDFFormat(cmd.getOptionValue('t')); if (fmt == null) { throw new ParseException("unrecognized MIME type: " + cmd.getOptionValue('t')); } result.setProperty(LoaderOptions.FORMAT, fmt.getDefaultMIMEType()); } if (cmd.hasOption('f')) { result.setProperty(LoaderOptions.FILES, Arrays.asList(cmd.getOptionValues('f'))); } if (cmd.hasOption('d')) { result.setProperty(LoaderOptions.DIRS, Arrays.asList(cmd.getOptionValues('d'))); } if (cmd.hasOption('a')) { result.setProperty(LoaderOptions.ARCHIVES, Arrays.asList(cmd.getOptionValues('a'))); } if (cmd.hasOption('s')) { result.setProperty(LoaderOptions.STATISTICS_ENABLED, true); result.setProperty(LoaderOptions.STATISTICS_GRAPH, cmd.getOptionValue('s')); } if (cmd.hasOption('D')) { for (Map.Entry e : cmd.getOptionProperties("D").entrySet()) { result.setProperty(e.getKey().toString(), e.getValue()); } } for (LoaderBackend b : backends) { for (Option option : b.getOptions()) { if (cmd.hasOption(option.getOpt())) { String key = String.format("backend.%s.%s", b.getIdentifier(), option.getLongOpt() != null ? option.getLongOpt() : option.getOpt()); if (option.hasArg()) { if (option.hasArgs()) { result.setProperty(key, Arrays.asList(cmd.getOptionValues(option.getOpt()))); } else { result.setProperty(key, cmd.getOptionValue(option.getOpt())); } } else { result.setProperty(key, true); } } } } return result; }
From source file:org.apache.marmotta.loader.core.test.CLITest.java
@Test public void testCompressionBZIP2() throws ParseException { Configuration cfg = MarmottaLoader.parseOptions(new String[] { "-j", "-f", "file1.ttl" }); Assert.assertNotNull(cfg.getString(LoaderOptions.COMPRESSION)); Assert.assertEquals(CompressorStreamFactory.BZIP2, cfg.getString(LoaderOptions.COMPRESSION)); }