List of usage examples for org.apache.commons.compress.compressors.gzip GzipCompressorInputStream close
public void close() throws IOException
From source file:eval.dataset.ParseWikiLog.java
public static void main(String[] ss) throws FileNotFoundException, ParserConfigurationException, IOException { FileInputStream fin = new FileInputStream("data/enwiki-20151201-pages-logging.xml.gz"); GzipCompressorInputStream gzIn = new GzipCompressorInputStream(fin); InputStreamReader reader = new InputStreamReader(gzIn); BufferedReader br = new BufferedReader(reader); PrintWriter pw = new PrintWriter(new FileWriter("data/user_page.txt")); pw.println(//from w w w. j ava2 s. c om "#list of user names and pages that they have edited, deleted or created. These info are mined from logitems of enwiki-20150304-pages-logging.xml.gz"); TreeMap<String, Set<String>> userPageList = new TreeMap(); TreeSet<String> pageList = new TreeSet(); int counterEntry = 0; String currentUser = null; String currentPage = null; try { for (String line = br.readLine(); line != null; line = br.readLine()) { if (line.trim().equals("</logitem>")) { counterEntry++; if (currentUser != null && currentPage != null) { updateMap(userPageList, currentUser, currentPage); pw.println(currentUser + "\t" + currentPage); pageList.add(currentPage); } currentUser = null; currentPage = null; } else if (line.trim().startsWith("<username>")) { currentUser = line.trim().split(">")[1].split("<")[0].replace(" ", "_"); } else if (line.trim().startsWith("<logtitle>")) { String content = line.trim().split(">")[1].split("<")[0]; if (content.split(":").length == 1) { currentPage = content.replace(" ", "_"); } } } } catch (IOException ex) { Logger.getLogger(ParseWikiLog.class.getName()).log(Level.SEVERE, null, ex); } pw.println("#analysed " + counterEntry + " entries of wikipesia log file"); pw.println("#gathered a list of unique user of size " + userPageList.size()); pw.println("#gathered a list of pages of size " + pageList.size()); pw.close(); gzIn.close(); PrintWriter pwUser = new PrintWriter(new FileWriter("data/user_list_page_edited.txt")); pwUser.println( "#list of unique users and pages that they have edited, extracted from logitems of enwiki-20150304-pages-logging.xml.gz"); for (String user : userPageList.keySet()) { pwUser.print(user); Set<String> getList = userPageList.get(user); for (String page : getList) { pwUser.print("\t" + page); } pwUser.println(); } pwUser.close(); PrintWriter pwPage = new PrintWriter(new FileWriter("data/all_pages.txt")); pwPage.println("#list of the unique pages that are extracted from enwiki-20150304-pages-logging.xml.gz"); for (String page : pageList) { pwPage.println(page); } pwPage.close(); System.out.println("#analysed " + counterEntry + " entries of wikipesia log file"); System.out.println("#gathered a list of unique user of size " + userPageList.size()); System.out.println("#gathered a list of pages of size " + pageList.size()); }
From source file:com.aliyun.odps.local.common.utils.ArchiveUtils.java
public static void unGZip(File gzFile, File ungzipDir) throws IOException, ArchiveException { String gzFileName = gzFile.getName(); String tarFileName = ""; if (gzFileName.endsWith(".tar.gz")) { tarFileName = gzFileName.substring(0, gzFileName.length() - 3);// 3 is length of '.gz' } else if (gzFileName.endsWith(".tgz")) { tarFileName = gzFileName.substring(0, gzFileName.length() - 4) + ".tar";// 4 is length of // '.tgz' }//w w w . ja va 2 s. co m File tarFile = new File(gzFile.getParentFile(), tarFileName); FileOutputStream tarOut = new FileOutputStream(tarFile); GzipCompressorInputStream gzIn = new GzipCompressorInputStream( new BufferedInputStream(new FileInputStream(gzFile))); IOUtils.copy(gzIn, tarOut); tarOut.close(); gzIn.close(); unTar(tarFile, ungzipDir); // remove the temporary tarFile FileUtils.deleteQuietly(tarFile); }
From source file:net.orpiske.ssps.common.archive.CompressedArchiveUtils.java
/** * Decompress a file/*from w ww . j a v a 2 s . c o m*/ * @param source the source file to be uncompressed * @param destination the destination directory * @return the number of bytes read * @throws IOException for lower level I/O errors */ public static long gzDecompress(File source, File destination) throws IOException { FileOutputStream out; prepareDestination(destination); out = new FileOutputStream(destination); FileInputStream fin = null; BufferedInputStream bin = null; GzipCompressorInputStream gzIn = null; try { fin = new FileInputStream(source); bin = new BufferedInputStream(fin); gzIn = new GzipCompressorInputStream(bin); IOUtils.copy(gzIn, out); gzIn.close(); fin.close(); bin.close(); out.close(); } catch (IOException e) { IOUtils.closeQuietly(out); IOUtils.closeQuietly(fin); IOUtils.closeQuietly(bin); IOUtils.closeQuietly(gzIn); throw e; } return gzIn.getBytesRead(); }
From source file:com.bahmanm.karun.Utils.java
/** * Extracts a gzip'ed tar archive./*from w w w . j a va 2 s. co m*/ * * @param archivePath Path to archive * @param destDir Destination directory * @throws IOException */ public synchronized static void extractTarGz(String archivePath, File destDir) throws IOException, ArchiveException { // copy File tarGzFile = File.createTempFile("karuntargz", "", destDir); copyFile(archivePath, tarGzFile.getAbsolutePath()); // decompress File tarFile = File.createTempFile("karuntar", "", destDir); FileInputStream fin = new FileInputStream(tarGzFile); BufferedInputStream bin = new BufferedInputStream(fin); FileOutputStream fout = new FileOutputStream(tarFile); GzipCompressorInputStream gzIn = new GzipCompressorInputStream(bin); final byte[] buffer = new byte[1024]; int n = 0; while (-1 != (n = gzIn.read(buffer))) { fout.write(buffer, 0, n); } bin.close(); fin.close(); gzIn.close(); fout.close(); // extract final InputStream is = new FileInputStream(tarFile); ArchiveInputStream ain = new ArchiveStreamFactory().createArchiveInputStream("tar", is); TarArchiveEntry entry = null; while ((entry = (TarArchiveEntry) ain.getNextEntry()) != null) { OutputStream out; if (entry.isDirectory()) { File f = new File(destDir, entry.getName()); f.mkdirs(); continue; } else out = new FileOutputStream(new File(destDir, entry.getName())); IOUtils.copy(ain, out); out.close(); } ain.close(); is.close(); }
From source file:com.zenome.bundlebus.Util.java
public static boolean unZip(@NonNull final File aGzFile, @NonNull final File aOutputFile) throws FileNotFoundException, IOException { Log.d(TAG, "gz filename : " + aGzFile); Log.d(TAG, "output file : " + aOutputFile); if (!GzipUtils.isCompressedFilename(aGzFile.getAbsolutePath())) { Log.d(TAG, "This file is not compressed file : " + aGzFile.getAbsolutePath()); return false; }/*from w ww . j av a 2s .com*/ final FileInputStream fis = new FileInputStream(aGzFile); BufferedInputStream in = new BufferedInputStream(fis); FileOutputStream out = new FileOutputStream(aOutputFile); GzipCompressorInputStream gzIn = new GzipCompressorInputStream(in); final byte[] buffer = new byte[4096]; int n = 0; while (-1 != (n = gzIn.read(buffer))) { out.write(buffer, 0, n); } out.close(); gzIn.close(); return true; }
From source file:com.goldmansachs.kata2go.tools.utils.TarGz.java
public static void write(InputStream tarGzInputStream, Path outTarGz) throws IOException { GzipCompressorInputStream gzipStream = new GzipCompressorInputStream(tarGzInputStream); FileOutputStream fios = new FileOutputStream(outTarGz.toFile()); int buffersize = 1024; final byte[] buffer = new byte[buffersize]; int n = 0;/* www .ja va 2 s.co m*/ while (-1 != (n = gzipStream.read(buffer))) { fios.write(buffer, 0, n); } fios.close(); gzipStream.close(); }
From source file:msec.org.GzipUtil.java
static public void unzip(String srcFile) throws Exception { GzipCompressorInputStream in = new GzipCompressorInputStream(new FileInputStream(srcFile)); int index = srcFile.indexOf(".gz"); String destFile = ""; if (index == srcFile.length() - 3) { destFile = srcFile.substring(0, index); } else {/*from w ww . j a v a 2s .c o m*/ destFile = srcFile + ".decompress"; } FileOutputStream out = new FileOutputStream(destFile); byte[] buf = new byte[10240]; while (true) { int len = in.read(buf); if (len <= 0) { break; } out.write(buf, 0, len); } out.flush(); out.close(); in.close(); }
From source file:com.dubture.symfony.core.util.UncompressUtils.java
/** * Uncompress a gzip archive and returns the file where it has been * extracted.//w w w.j a v a 2 s .c o m * * @param archiveFile The archive file to uncompress * @param outputDirectory The output directory where to put the uncompressed archive * * @return The output file where the archive has been uncompressed * * @throws IOException When a problem occurs with either the input or output stream */ public static File uncompressGzipArchive(File archiveFile, File outputDirectory) throws IOException { FileInputStream fileInputStream = new FileInputStream(archiveFile); BufferedInputStream bufferedInputStream = new BufferedInputStream(fileInputStream); GzipCompressorInputStream gzipInputStream = new GzipCompressorInputStream(bufferedInputStream); String tarArchiveFilename = GzipUtils.getUncompressedFilename(archiveFile.getName()); File outputFile = new File(outputDirectory, tarArchiveFilename); FileOutputStream outputStream = new FileOutputStream(outputFile); int byteReadCount = 0; final byte[] data = new byte[BUFFER_SIZE]; try { while ((byteReadCount = gzipInputStream.read(data, 0, BUFFER_SIZE)) != -1) { outputStream.write(data, 0, byteReadCount); } } finally { outputStream.close(); gzipInputStream.close(); } return outputFile; }
From source file:net.rwx.maven.asciidoc.utils.FileUtils.java
public static String uncompress(InputStream is, String destination) throws IOException { BufferedInputStream in = new BufferedInputStream(is); GzipCompressorInputStream gzIn = new GzipCompressorInputStream(in); TarArchiveInputStream tarInput = new TarArchiveInputStream(gzIn); TarArchiveEntry entry = tarInput.getNextTarEntry(); do {/*from w ww . j a v a2 s. c om*/ File f = new File(destination + "/" + entry.getName()); FileUtils.forceMkdir(f.getParentFile()); if (!f.isDirectory()) { OutputStream os = new FileOutputStream(f); byte[] content = new byte[(int) entry.getSize()]; int byteRead = 0; while (byteRead < entry.getSize()) { byteRead += tarInput.read(content, byteRead, content.length - byteRead); os.write(content, 0, byteRead); } os.close(); forceDeleteOnExit(f); } entry = tarInput.getNextTarEntry(); } while (entry != null); gzIn.close(); return destination; }
From source file:com.goldmansachs.kata2go.tools.utils.TarGz.java
public static void decompress(InputStream tarGzInputStream, Path outDir) throws IOException { GzipCompressorInputStream gzipStream = new GzipCompressorInputStream(tarGzInputStream); TarArchiveInputStream tarInput = new TarArchiveInputStream(gzipStream); TarArchiveEntry entry;// w w w. ja v a2 s . com int bufferSize = 1024; while ((entry = (TarArchiveEntry) tarInput.getNextEntry()) != null) { String entryName = entry.getName(); // strip out the leading directory like the --strip tar argument String entryNameWithoutLeadingDir = entryName.substring(entryName.indexOf("/") + 1); if (entryNameWithoutLeadingDir.isEmpty()) { continue; } Path outFile = outDir.resolve(entryNameWithoutLeadingDir); if (entry.isDirectory()) { outFile.toFile().mkdirs(); continue; } int count; byte data[] = new byte[bufferSize]; BufferedOutputStream fios = new BufferedOutputStream(new FileOutputStream(outFile.toFile()), bufferSize); while ((count = tarInput.read(data, 0, bufferSize)) != -1) { fios.write(data, 0, count); } fios.close(); } tarInput.close(); gzipStream.close(); }