List of usage examples for org.apache.commons.compress.compressors CompressorStreamFactory GZIP
String GZIP
To view the source code for org.apache.commons.compress.compressors CompressorStreamFactory GZIP.
Click Source Link
From source file:no.finntech.shootout.avro.GzipJsonAvro.java
@Override protected String getCompressor() { return CompressorStreamFactory.GZIP; }
From source file:org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTaskTest.java
private void doReadTest(Path file, Type fileType, String expTitle, String expDate, String expBody) throws Exception { InputStream in = Files.newInputStream(file); switch (fileType) { case BZIP2://from w w w.java 2 s . c om in = csFactory.createCompressorInputStream(CompressorStreamFactory.BZIP2, in); break; case GZIP: in = csFactory.createCompressorInputStream(CompressorStreamFactory.GZIP, in); break; case PLAIN: break; // nothing to do default: assertFalse("Unknown file type!", true); //fail, should not happen } try (BufferedReader br = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8))) { String line = br.readLine(); assertHeaderLine(line); line = br.readLine(); assertNotNull(line); String[] parts = line.split(Character.toString(WriteLineDocTask.SEP)); int numExpParts = expBody == null ? 2 : 3; assertEquals(numExpParts, parts.length); assertEquals(expTitle, parts[0]); assertEquals(expDate, parts[1]); if (expBody != null) { assertEquals(expBody, parts[2]); } assertNull(br.readLine()); } }
From source file:org.apache.lucene.benchmark.byTask.utils.StreamUtilsTest.java
private Path rawGzipFile(String ext) throws Exception { Path f = testDir.resolve("testfile." + ext); OutputStream os = new CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.GZIP, Files.newOutputStream(f)); writeText(os);//from w w w . j a v a 2 s . c o m return f; }
From source file:org.apache.marmotta.loader.core.MarmottaLoader.java
/** * Load data from the reader given as first argument into the handler given as second argument. * * @param file file to read the data from; in case a compression format is not explicitly given, the method will * try to decide from the file name if the file is in a compressed format * @param handler handler to add the data to * @param format format to use for creating the parser or null for auto-detection * @param compression compression format to use, or null for auto-detection (see formats in org.apache.commons.compress.compressors.CompressorStreamFactory) * @throws RDFParseException/* w w w .j a v a2 s . c om*/ * @throws IOException */ public void loadFile(File file, LoaderHandler handler, RDFFormat format, String compression) throws RDFParseException, IOException { log.info("loading file {} ...", file); CompressorStreamFactory cf = new CompressorStreamFactory(); cf.setDecompressConcatenated(true); // detect the file compression String detectedCompression = detectCompression(file); if (compression == null) { if (detectedCompression != null) { log.info("using auto-detected compression ({})", detectedCompression); compression = detectedCompression; } } else { if (detectedCompression != null && !compression.equals(detectedCompression)) { log.warn("user-specified compression ({}) overrides auto-detected compression ({})", compression, detectedCompression); } else { log.info("using user-specified compression ({})", compression); } } // detect the file format RDFFormat detectedFormat = Rio.getParserFormatForFileName(uncompressedName(file)); if (format == null) { if (detectedFormat != null) { log.info("using auto-detected format ({})", detectedFormat.getName()); format = detectedFormat; } else { throw new RDFParseException("could not detect input format of file " + file); } } else { if (detectedFormat != null && !format.equals(detectedFormat)) { log.warn("user-specified format ({}) overrides auto-detected format ({})", format.getName(), detectedFormat.getName()); } } // create input stream from file and wrap in compressor stream InputStream in; InputStream fin = new BufferedInputStream(new FileInputStream(file)); try { if (compression != null) { if (CompressorStreamFactory.GZIP.equalsIgnoreCase(compression)) { in = new GzipCompressorInputStream(fin, true); } else if (CompressorStreamFactory.BZIP2.equalsIgnoreCase(compression)) { in = new BZip2CompressorInputStream(fin, true); } else if (CompressorStreamFactory.XZ.equalsIgnoreCase(compression)) { in = new XZCompressorInputStream(fin, true); } else { // does not honour decompressConcatenated in = cf.createCompressorInputStream(compression, fin); } } else { in = cf.createCompressorInputStream(fin); } } catch (CompressorException ex) { log.info("no compression detected, using plain input stream"); in = fin; } // load using the input stream load(in, handler, format); }
From source file:org.apache.marmotta.loader.core.MarmottaLoader.java
public void loadArchive(File archive, LoaderHandler handler, RDFFormat format) throws RDFParseException, IOException, ArchiveException { log.info("loading files in archive {} ...", archive); if (archive.exists() && archive.canRead()) { if (archive.getName().endsWith("7z")) { log.info("auto-detected archive format: 7Z"); final SevenZFile sevenZFile = new SevenZFile(archive); try { SevenZArchiveEntry entry; while ((entry = sevenZFile.getNextEntry()) != null) { if (!entry.isDirectory()) { log.info("loading entry {} ...", entry.getName()); // detect the file format RDFFormat detectedFormat = Rio.getParserFormatForFileName(entry.getName()); if (format == null) { if (detectedFormat != null) { log.info("auto-detected entry format: {}", detectedFormat.getName()); format = detectedFormat; } else { throw new RDFParseException( "could not detect input format of entry " + entry.getName()); }/*from w w w . ja v a 2 s .c o m*/ } else { if (detectedFormat != null && !format.equals(detectedFormat)) { log.warn("user-specified entry format ({}) overrides auto-detected format ({})", format.getName(), detectedFormat.getName()); } else { log.info("user-specified entry format: {}", format.getName()); } } load(new InputStream() { @Override public int read() throws IOException { return sevenZFile.read(); } @Override public int read(byte[] b) throws IOException { return sevenZFile.read(b); } @Override public int read(byte[] b, int off, int len) throws IOException { return sevenZFile.read(b, off, len); } }, handler, format); } } } finally { sevenZFile.close(); } } else { InputStream in; String archiveCompression = detectCompression(archive); InputStream fin = new BufferedInputStream(new FileInputStream(archive)); if (archiveCompression != null) { if (CompressorStreamFactory.GZIP.equalsIgnoreCase(archiveCompression)) { log.info("auto-detected archive compression: GZIP"); in = new GzipCompressorInputStream(fin, true); } else if (CompressorStreamFactory.BZIP2.equalsIgnoreCase(archiveCompression)) { log.info("auto-detected archive compression: BZIP2"); in = new BZip2CompressorInputStream(fin, true); } else if (CompressorStreamFactory.XZ.equalsIgnoreCase(archiveCompression)) { log.info("auto-detected archive compression: XZ"); in = new XZCompressorInputStream(fin, true); } else { in = fin; } } else { in = fin; } ArchiveInputStream zipStream = new ArchiveStreamFactory() .createArchiveInputStream(new BufferedInputStream(in)); logArchiveType(zipStream); ArchiveEntry entry; while ((entry = zipStream.getNextEntry()) != null) { if (!entry.isDirectory()) { log.info("loading entry {} ...", entry.getName()); // detect the file format RDFFormat detectedFormat = Rio.getParserFormatForFileName(entry.getName()); if (format == null) { if (detectedFormat != null) { log.info("auto-detected entry format: {}", detectedFormat.getName()); format = detectedFormat; } else { throw new RDFParseException( "could not detect input format of entry " + entry.getName()); } } else { if (detectedFormat != null && !format.equals(detectedFormat)) { log.warn("user-specified entry format ({}) overrides auto-detected format ({})", format.getName(), detectedFormat.getName()); } else { log.info("user-specified entry format: {}", format.getName()); } } load(zipStream, handler, format); } } } } else { throw new RDFParseException( "could not load files from archive " + archive + ": it does not exist or is not readable"); } }
From source file:org.apache.marmotta.loader.core.MarmottaLoader.java
/** * Detect the compression format from the filename, or null in case auto-detection failed. * @param file//from w w w.ja v a 2 s . c o m * @return */ private String detectCompression(File file) { if (BZip2Utils.isCompressedFilename(file.getName())) { return CompressorStreamFactory.BZIP2; } else if (GzipUtils.isCompressedFilename(file.getName())) { return CompressorStreamFactory.GZIP; } else if (XZUtils.isCompressedFilename(file.getName())) { return CompressorStreamFactory.XZ; } else { return null; } }
From source file:org.apache.marmotta.loader.core.MarmottaLoader.java
public static Configuration parseOptions(String[] args) throws ParseException { Options options = buildOptions();//w ww . j av a 2 s .c o m CommandLineParser parser = new PosixParser(); CommandLine cmd = parser.parse(options, args); Configuration result = new MapConfiguration(new HashMap<String, Object>()); if (cmd.hasOption('B')) { // check backends Set<String> existing = Sets .newHashSet(Iterators.transform(backends.iterator(), new BackendIdentifierFunction())); if (!existing.contains(cmd.getOptionValue('B'))) { throw new ParseException("the backend " + cmd.getOptionValue('B') + " does not exist"); } result.setProperty(LoaderOptions.BACKEND, cmd.getOptionValue('B')); } if (cmd.hasOption('b')) { result.setProperty(LoaderOptions.BASE_URI, cmd.getOptionValue('b')); } if (cmd.hasOption('z')) { result.setProperty(LoaderOptions.COMPRESSION, CompressorStreamFactory.GZIP); } if (cmd.hasOption('j')) { result.setProperty(LoaderOptions.COMPRESSION, CompressorStreamFactory.BZIP2); } if (cmd.hasOption('c')) { result.setProperty(LoaderOptions.CONTEXT, cmd.getOptionValue('c')); } if (cmd.hasOption('t')) { RDFFormat fmt = getRDFFormat(cmd.getOptionValue('t')); if (fmt == null) { throw new ParseException("unrecognized MIME type: " + cmd.getOptionValue('t')); } result.setProperty(LoaderOptions.FORMAT, fmt.getDefaultMIMEType()); } if (cmd.hasOption('f')) { result.setProperty(LoaderOptions.FILES, Arrays.asList(cmd.getOptionValues('f'))); } if (cmd.hasOption('d')) { result.setProperty(LoaderOptions.DIRS, Arrays.asList(cmd.getOptionValues('d'))); } if (cmd.hasOption('a')) { result.setProperty(LoaderOptions.ARCHIVES, Arrays.asList(cmd.getOptionValues('a'))); } if (cmd.hasOption('s')) { result.setProperty(LoaderOptions.STATISTICS_ENABLED, true); result.setProperty(LoaderOptions.STATISTICS_GRAPH, cmd.getOptionValue('s')); } if (cmd.hasOption('D')) { for (Map.Entry e : cmd.getOptionProperties("D").entrySet()) { result.setProperty(e.getKey().toString(), e.getValue()); } } for (LoaderBackend b : backends) { for (Option option : b.getOptions()) { if (cmd.hasOption(option.getOpt())) { String key = String.format("backend.%s.%s", b.getIdentifier(), option.getLongOpt() != null ? option.getLongOpt() : option.getOpt()); if (option.hasArg()) { if (option.hasArgs()) { result.setProperty(key, Arrays.asList(cmd.getOptionValues(option.getOpt()))); } else { result.setProperty(key, cmd.getOptionValue(option.getOpt())); } } else { result.setProperty(key, true); } } } } return result; }
From source file:org.apache.marmotta.loader.core.test.CLITest.java
@Test public void testCompressionGZIP() throws ParseException { Configuration cfg = MarmottaLoader.parseOptions(new String[] { "-z", "-f", "file1.ttl" }); Assert.assertNotNull(cfg.getString(LoaderOptions.COMPRESSION)); Assert.assertEquals(CompressorStreamFactory.GZIP, cfg.getString(LoaderOptions.COMPRESSION)); }
From source file:org.apache.marmotta.loader.core.test.FilesTest.java
@Parameterized.Parameters public static Collection<Object[]> data() { Object[][] data = new Object[][] { { null, "demo-data.rdf" }, { CompressorStreamFactory.GZIP, "demo-data.rdf.gz" }, { CompressorStreamFactory.BZIP2, "demo-data.rdf.bz2" } }; return Arrays.asList(data); }
From source file:org.dataconservancy.packaging.tool.impl.BOREMPackageGeneratorTest.java
/** * Tests generating a well formed package, with all required parameters and the following options: * <ul>// w w w. j av a 2 s .c o m * <li>checksum alg: md5</li> * <li>compression-format: gz</li> * <li>archiving-format: not specified</li> * </ul> * * <p/> * * Expects the de-compressed, deserialized package to contain: * <ul> * <li>bag-info.txt file: Besides the input parameters, bag-info.txt file is expected to contain reference * to the ReM of the whole package, expressed in PKG-ORE-REM parameter</li> * <li>bagit.txt file</li> * <li>manifest-<checksumalg>.txt files</checksumalg></li> * <li>tagmanifest-<checksumalg>.txt files</checksumalg></li> * <li>data/ folder</li> * <li>payload files in data/ folder</li> * <li>ORE-REM folder</li> * <li>description files in ORE-REM/folder</li> * </ul> * * * @throws CompressorException * @throws ArchiveException * @throws IOException */ @Test public void testGeneratingAGoodPackage() throws CompressorException, ArchiveException, IOException { params.addParam(GeneralParameterNames.PACKAGE_FORMAT_ID, PackagingFormat.BOREM.toString()); params.addParam(GeneralParameterNames.PACKAGE_NAME, packageName); params.addParam(GeneralParameterNames.PACKAGE_LOCATION, packageLocationName); params.addParam(GeneralParameterNames.PACKAGE_STAGING_LOCATION, packageStagingLocationName); params.addParam(BagItParameterNames.BAGIT_PROFILE_ID, bagItProfileId); params.addParam(BagItParameterNames.CONTACT_NAME, contactName); params.addParam(BagItParameterNames.CONTACT_EMAIL, contactEmail); params.addParam(BagItParameterNames.CONTACT_PHONE, contactPhone); params.addParam(GeneralParameterNames.CHECKSUM_ALGORITHMS, checksumAlg); params.addParam(BagItParameterNames.COMPRESSION_FORMAT, compressionFormat); params.addParam(BagItParameterNames.PKG_BAG_DIR, packageName); // params.addParam(GeneralParameterNames.CONTENT_ROOT_LOCATION, pkgBagDir); params.addParam(GeneralParameterNames.CONTENT_ROOT_LOCATION, contentRootLocation); Package resultedPackage = underTest.generatePackage(desc, params); //Decompress and de archive files CompressorInputStream cis = new CompressorStreamFactory() .createCompressorInputStream(CompressorStreamFactory.GZIP, resultedPackage.serialize()); TarArchiveInputStream ais = (TarArchiveInputStream) new ArchiveStreamFactory() .createArchiveInputStream(ArchiveStreamFactory.TAR, cis); //get files from archive Set<String> files = new HashSet<String>(); ArchiveEntry entry = ais.getNextEntry(); while (entry != null) { files.add(entry.getName().replace("\\", "/")); if (entry.getName().equals(packageName + "/bag-info.txt") && ais.canReadEntryData(entry)) { verifyBagInfoContent(ais); } if (entry.getName() .equals(packageName + "/data/ProjectOne/Collection One/DataItem One/" + dataFileOneName)) { compareDataFile(ais, pathToFileOne); } if (entry.getName() .equals(packageName + "/data/ProjectOne/Collection One/DataItem One/" + dataFileTwoName)) { compareDataFile(ais, pathToFileTwo); } entry = ais.getNextEntry(); } assertTrue(files.contains(packageName + "/bag-info.txt")); assertTrue(files.contains(packageName + "/bagit.txt")); assertTrue(files.contains(packageName + "/tagmanifest-md5.txt")); assertTrue(files.contains(packageName + "/manifest-md5.txt")); assertTrue(files.contains(packageName + "/data/")); assertTrue(files.contains(packageName + "/data/ProjectOne/Collection One/DataItem One/" + dataFileOneName)); assertTrue(files.contains(packageName + "/data/ProjectOne/Collection One/DataItem One/" + dataFileTwoName)); assertTrue(files.contains(packageName + "/ORE-REM/")); assertTrue(SupportedMimeTypes.getMimeType(compressionFormat).contains(resultedPackage.getContentType())); }