List of usage examples for org.apache.commons.compress.compressors.bzip2 BZip2CompressorInputStream BZip2CompressorInputStream
public BZip2CompressorInputStream(final InputStream in) throws IOException
From source file:com.graphhopper.tools.Bzip2.java
public static void main(String[] args) throws IOException { if (args.length == 0) { throw new IllegalArgumentException("You need to specify the bz2 file!"); }/* w ww .j av a 2s . co m*/ String fromFile = args[0]; if (!fromFile.endsWith(".bz2")) { throw new IllegalArgumentException("You need to specify a bz2 file! But was:" + fromFile); } String toFile = Helper.pruneFileEnd(fromFile); FileInputStream in = new FileInputStream(fromFile); FileOutputStream out = new FileOutputStream(toFile); BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(in); try { final byte[] buffer = new byte[1024 * 8]; int n = 0; while (-1 != (n = bzIn.read(buffer))) { out.write(buffer, 0, n); } } finally { out.close(); bzIn.close(); } }
From source file:com.kasabi.data.movies.freebase.FreebaseMovies2RDF.java
public static void main(String[] args) throws Exception { String filename = "/backups/tmp/freebase-datadump-quadruples.tsv.bz2"; BufferedReader in = new BufferedReader( new InputStreamReader(new BZip2CompressorInputStream(new FileInputStream(filename)))); String line;/*from ww w . ja va2s .com*/ int count = 0; Model model = MoviesCommon.createModel(); String prev_subject = null; while ((line = in.readLine()) != null) { count++; String[] tokens = line.split("\\t"); if (tokens.length > 0) { String subject = tokens[0].trim(); if (!subject.equals(prev_subject)) { process(model); model = MoviesCommon.createModel(); } prev_subject = subject; if ((tokens.length == 3) && (tokens[0].trim().length() > 0) && (tokens[1].trim().length() > 0) && (tokens[2].trim().length() > 0)) { output_resource(model, tokens[0], tokens[1], tokens[2]); } else if ((tokens.length == 4) && (tokens[0].trim().length() > 0) && (tokens[1].trim().length() > 0) && (tokens[3].trim().length() > 0)) { if (tokens[2].trim().length() == 0) { output_literal(model, tokens[0], tokens[1], tokens[3]); } else { if (tokens[2].startsWith(LANG)) { output_literal_lang(model, tokens[0], tokens[1], tokens[3], tokens[2]); } else { if (tokens[1].equals("/type/object/key")) { output_literal2(model, tokens[0], tokens[1], tokens[2], tokens[3]); } else if ((tokens[1].equals("/type/object/name")) && (tokens[2].startsWith("/guid/"))) { output_literal2(model, tokens[0], tokens[1], tokens[2], tokens[3]); } else { log.warn("Unexpected data at {}, ignoring: {}", count, line); } } } } else { if (tokens.length < 3) { log.warn("Line {} has only {} tokens: {}", new Object[] { count, tokens.length, line }); } else { log.warn("Line {} has one or more empty tokens: {}", new Object[] { count, line }); } } } if (count % 1000000 == 0) log.info("Processed {} lines...", count); } }
From source file:cmd.freebase2rdf.java
public static void main(String[] args) throws Exception { if (args.length != 2) { usage();//from w w w.j a v a 2 s .c o m } File input = new File(args[0]); if (!input.exists()) error("File " + input.getAbsolutePath() + " does not exist."); if (!input.canRead()) error("Cannot read file " + input.getAbsolutePath()); if (!input.isFile()) error("Not a file " + input.getAbsolutePath()); File output = new File(args[1]); if (output.exists()) error("Output file " + output.getAbsolutePath() + " already exists, this program do not override existing files."); if (output.canWrite()) error("Cannot write file " + output.getAbsolutePath()); if (output.isDirectory()) error("Not a file " + output.getAbsolutePath()); if (!output.getName().endsWith(".nt.gz")) error("Output filename should end with .nt.gz, this is the only format supported."); BufferedReader in = new BufferedReader( new InputStreamReader(new BZip2CompressorInputStream(new FileInputStream(input)))); BufferedOutputStream out = new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(output))); String line; ProgressLogger progressLogger = new ProgressLogger(log, "lines", 100000, 1000000); progressLogger.start(); Freebase2RDF freebase2rdf = null; try { freebase2rdf = new Freebase2RDF(out); while ((line = in.readLine()) != null) { freebase2rdf.send(line); progressLogger.tick(); } } finally { if (freebase2rdf != null) freebase2rdf.close(); } print(log, progressLogger); }
From source file:Bzip2Uncompress.java
public static void main(final String[] args) { try {/* ww w .j a va2 s.c o m*/ if (2 != args.length) { System.out.println("java Bzip2Uncompress <input> <output>"); System.exit(1); } final File source = new File(args[0]); final File destination = new File(args[1]); final FileOutputStream output = new FileOutputStream(destination); final BZip2CompressorInputStream input = new BZip2CompressorInputStream(new FileInputStream(source)); copy(input, output); input.close(); output.close(); } catch (Exception e) { e.printStackTrace(); System.exit(1); } }
From source file:com.vionto.vithesaurus.wikipedia.CommonsAudioDumper.java
public static void main(String[] args) throws Exception { if (args.length != 1) { System.out.println("Usage: CommonsAudioDumper <xmldump>"); System.out.println(//from ww w . j a v a 2 s . c o m " <xmldump> is a compressed XML dump from http://dumps.wikimedia.org/commonswiki/, e.g. 'commonswiki-20150602-pages-articles.xml.bz2'"); System.exit(1); } CommonsAudioDumper prg = new CommonsAudioDumper(); try (InputStream fileStream = new FileInputStream(args[0]); InputStream gzipStream = new BZip2CompressorInputStream(fileStream);) { prg.run(gzipStream); } }
From source file:marmot.tokenize.preprocess.WikiReader.java
public static InternalReader openFile(String file) { try {// w w w. ja va 2 s . c om return new BufferedReaderWrapper(new BufferedReader( new InputStreamReader(new BZip2CompressorInputStream(new FileInputStream(file)), "UTF-8"))); } catch (FileNotFoundException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:azkaban.project.DirectoryFlowLoaderTest.java
private static File decompressTarBZ2(InputStream is) throws IOException { File outputDir = Files.createTempDir(); try (TarArchiveInputStream tais = new TarArchiveInputStream(new BZip2CompressorInputStream(is))) { TarArchiveEntry entry;/*from w w w . j a v a 2s . c o m*/ while ((entry = tais.getNextTarEntry()) != null) { if (entry.isDirectory()) { continue; } File outputFile = new File(outputDir, entry.getName()); File parent = outputFile.getParentFile(); if (!parent.exists()) { parent.mkdirs(); } try (FileOutputStream os = new FileOutputStream(outputFile)) { IOUtils.copy(tais, os); } } return outputDir; } }
From source file:fr.ens.biologie.genomique.eoulsan.io.ApacheCommonCompressionCodecs.java
/** * Create a bzip2 input stream.// ww w .ja va 2 s . c om * @param is input stream * @return an uncompressed input stream * @throws IOException if an error occurs while creating the input stream */ public static InputStream createBZip2InputStream(final InputStream is) throws IOException { return new BZip2CompressorInputStream(is); }
From source file:edu.jhu.hlt.acute.iterators.tar.TarBzArchiveEntryByteIterator.java
/** * @throws IOException// w ww .j a v a 2 s.c o m */ public TarBzArchiveEntryByteIterator(InputStream is) throws IOException { super(new BZip2CompressorInputStream(is)); }
From source file:com.github.harmanpa.jrecon.utils.Compression.java
public static byte[] decompress(byte[] data) throws IOException { InputStream is = new BZip2CompressorInputStream(new ByteArrayInputStream(data)); ByteArrayOutputStream baos = new ByteArrayOutputStream(128); int b;//from w ww. j a v a 2s.c om while ((b = is.read()) > -1) { baos.write(b); } is.close(); return baos.toByteArray(); }