List of usage examples for org.apache.commons.compress.compressors.gzip GzipCompressorInputStream GzipCompressorInputStream
public GzipCompressorInputStream(InputStream inputStream) throws IOException
From source file:eval.dataset.ParseWikiLog.java
public static void main(String[] ss) throws FileNotFoundException, ParserConfigurationException, IOException { FileInputStream fin = new FileInputStream("data/enwiki-20151201-pages-logging.xml.gz"); GzipCompressorInputStream gzIn = new GzipCompressorInputStream(fin); InputStreamReader reader = new InputStreamReader(gzIn); BufferedReader br = new BufferedReader(reader); PrintWriter pw = new PrintWriter(new FileWriter("data/user_page.txt")); pw.println(//from w w w. j a va 2 s .co m "#list of user names and pages that they have edited, deleted or created. These info are mined from logitems of enwiki-20150304-pages-logging.xml.gz"); TreeMap<String, Set<String>> userPageList = new TreeMap(); TreeSet<String> pageList = new TreeSet(); int counterEntry = 0; String currentUser = null; String currentPage = null; try { for (String line = br.readLine(); line != null; line = br.readLine()) { if (line.trim().equals("</logitem>")) { counterEntry++; if (currentUser != null && currentPage != null) { updateMap(userPageList, currentUser, currentPage); pw.println(currentUser + "\t" + currentPage); pageList.add(currentPage); } currentUser = null; currentPage = null; } else if (line.trim().startsWith("<username>")) { currentUser = line.trim().split(">")[1].split("<")[0].replace(" ", "_"); } else if (line.trim().startsWith("<logtitle>")) { String content = line.trim().split(">")[1].split("<")[0]; if (content.split(":").length == 1) { currentPage = content.replace(" ", "_"); } } } } catch (IOException ex) { Logger.getLogger(ParseWikiLog.class.getName()).log(Level.SEVERE, null, ex); } pw.println("#analysed " + counterEntry + " entries of wikipesia log file"); pw.println("#gathered a list of unique user of size " + userPageList.size()); pw.println("#gathered a list of pages of size " + pageList.size()); pw.close(); gzIn.close(); PrintWriter pwUser = new PrintWriter(new FileWriter("data/user_list_page_edited.txt")); pwUser.println( "#list of unique users and pages that they have edited, extracted from logitems of enwiki-20150304-pages-logging.xml.gz"); for (String user : userPageList.keySet()) { pwUser.print(user); Set<String> getList = userPageList.get(user); for (String page : getList) { pwUser.print("\t" + page); } pwUser.println(); } pwUser.close(); PrintWriter pwPage = new PrintWriter(new FileWriter("data/all_pages.txt")); pwPage.println("#list of the unique pages that are extracted from enwiki-20150304-pages-logging.xml.gz"); for (String page : pageList) { pwPage.println(page); } pwPage.close(); System.out.println("#analysed " + counterEntry + " entries of wikipesia log file"); System.out.println("#gathered a list of unique user of size " + userPageList.size()); System.out.println("#gathered a list of pages of size " + pageList.size()); }
From source file:com.openshift.client.utils.TarFileUtils.java
public static boolean hasGitFolder(InputStream inputStream) throws IOException { TarArchiveInputStream tarInputStream = null; try {//ww w . jav a2 s . c o m boolean gitFolderPresent = false; tarInputStream = new TarArchiveInputStream(new GzipCompressorInputStream(inputStream)); for (TarArchiveEntry entry = null; (entry = tarInputStream.getNextTarEntry()) != null;) { if (GIT_FOLDER_NAME.equals(entry.getName()) && entry.isDirectory()) { gitFolderPresent = true; break; } } return gitFolderPresent; } finally { StreamUtils.close(tarInputStream); } }
From source file:com.twitter.heron.downloader.Extractor.java
static void extract(InputStream in, Path destination) throws IOException { try (final BufferedInputStream bufferedInputStream = new BufferedInputStream(in); final GzipCompressorInputStream gzipInputStream = new GzipCompressorInputStream( bufferedInputStream); final TarArchiveInputStream tarInputStream = new TarArchiveInputStream(gzipInputStream)) { final String destinationAbsolutePath = destination.toFile().getAbsolutePath(); TarArchiveEntry entry;//www.j a va 2 s . c o m while ((entry = (TarArchiveEntry) tarInputStream.getNextEntry()) != null) { if (entry.isDirectory()) { File f = Paths.get(destinationAbsolutePath, entry.getName()).toFile(); f.mkdirs(); } else { Path fileDestinationPath = Paths.get(destinationAbsolutePath, entry.getName()); Files.copy(tarInputStream, fileDestinationPath, StandardCopyOption.REPLACE_EXISTING); } } } }
From source file:edu.umd.umiacs.clip.tools.io.SerializationTools.java
public static Object deserialize(String path) { try (FileInputStream is = new FileInputStream(path); ObjectInputStream in = new ObjectInputStream( path.endsWith(".bz2") ? new BZip2CompressorInputStream(is) : path.endsWith(".gz") ? new GzipCompressorInputStream(is) : is)) { return in.readObject(); } catch (IOException | ClassNotFoundException e) { e.printStackTrace();//from ww w . j a v a 2 s.c o m } return null; }
From source file:msec.org.GzipUtil.java
static public void unzip(String srcFile) throws Exception { GzipCompressorInputStream in = new GzipCompressorInputStream(new FileInputStream(srcFile)); int index = srcFile.indexOf(".gz"); String destFile = ""; if (index == srcFile.length() - 3) { destFile = srcFile.substring(0, index); } else {/* ww w . j a v a 2s.c o m*/ destFile = srcFile + ".decompress"; } FileOutputStream out = new FileOutputStream(destFile); byte[] buf = new byte[10240]; while (true) { int len = in.read(buf); if (len <= 0) { break; } out.write(buf, 0, len); } out.flush(); out.close(); in.close(); }
From source file:com.dubture.symfony.core.util.UncompressUtils.java
/** * Uncompress a gzip archive and returns the file where it has been * extracted./*from ww w.ja v a2s .c om*/ * * @param archiveFile The archive file to uncompress * @param outputDirectory The output directory where to put the uncompressed archive * * @return The output file where the archive has been uncompressed * * @throws IOException When a problem occurs with either the input or output stream */ public static File uncompressGzipArchive(File archiveFile, File outputDirectory) throws IOException { FileInputStream fileInputStream = new FileInputStream(archiveFile); BufferedInputStream bufferedInputStream = new BufferedInputStream(fileInputStream); GzipCompressorInputStream gzipInputStream = new GzipCompressorInputStream(bufferedInputStream); String tarArchiveFilename = GzipUtils.getUncompressedFilename(archiveFile.getName()); File outputFile = new File(outputDirectory, tarArchiveFilename); FileOutputStream outputStream = new FileOutputStream(outputFile); int byteReadCount = 0; final byte[] data = new byte[BUFFER_SIZE]; try { while ((byteReadCount = gzipInputStream.read(data, 0, BUFFER_SIZE)) != -1) { outputStream.write(data, 0, byteReadCount); } } finally { outputStream.close(); gzipInputStream.close(); } return outputFile; }
From source file:de.flapdoodle.embed.process.extract.TgzExtractor.java
protected ArchiveWrapper archiveStream(File source) throws IOException { FileInputStream fin = new FileInputStream(source); BufferedInputStream in = new BufferedInputStream(fin); GzipCompressorInputStream gzIn = new GzipCompressorInputStream(in); TarArchiveInputStream tarIn = new TarArchiveInputStream(gzIn); return new TarArchiveWrapper(tarIn); }
From source file:com.ttech.cordovabuild.infrastructure.archive.ArchiveUtils.java
public static void extractFiles(InputStream is, Path localPath) { ArchiveStreamFactory archiveStreamFactory = new ArchiveStreamFactory(); try {/*from w ww . j a v a 2 s. co m*/ Files.createDirectories(localPath); } catch (IOException e) { throw new ArchiveExtractionException(e); } try (ArchiveInputStream ais = archiveStreamFactory.createArchiveInputStream(is);) { extractArchive(localPath, ais); } catch (ArchiveException e) { LOGGER.info("archiveFactory could not determine archive file type probably tar.gz"); try (ArchiveInputStream ais = new TarArchiveInputStream(new GzipCompressorInputStream(is))) { extractArchive(localPath, ais); } catch (IOException e1) { throw new ArchiveExtractionException(e1); } } catch (IOException e) { throw new ArchiveExtractionException(e); } }
From source file:com.amazonaws.codepipeline.jenkinsplugin.ExtractionTools.java
private static void extractTarGz(final File source, final File destination) throws IOException { try (final ArchiveInputStream tarGzArchiveInputStream = new TarArchiveInputStream( new GzipCompressorInputStream(new FileInputStream(source)))) { extractArchive(destination, tarGzArchiveInputStream); }/*from ww w. jav a 2s .c om*/ }
From source file:net.orpiske.ssps.common.archive.CompressedArchiveUtils.java
/** * Decompress a file/*from w w w . ja v a 2 s . c o m*/ * @param source the source file to be uncompressed * @param destination the destination directory * @return the number of bytes read * @throws IOException for lower level I/O errors */ public static long gzDecompress(File source, File destination) throws IOException { FileOutputStream out; prepareDestination(destination); out = new FileOutputStream(destination); FileInputStream fin = null; BufferedInputStream bin = null; GzipCompressorInputStream gzIn = null; try { fin = new FileInputStream(source); bin = new BufferedInputStream(fin); gzIn = new GzipCompressorInputStream(bin); IOUtils.copy(gzIn, out); gzIn.close(); fin.close(); bin.close(); out.close(); } catch (IOException e) { IOUtils.closeQuietly(out); IOUtils.closeQuietly(fin); IOUtils.closeQuietly(bin); IOUtils.closeQuietly(gzIn); throw e; } return gzIn.getBytesRead(); }