Example usage for org.apache.commons.compress.compressors.gzip GzipCompressorInputStream GzipCompressorInputStream

List of usage examples for org.apache.commons.compress.compressors.gzip GzipCompressorInputStream GzipCompressorInputStream

Introduction

In this page you can find the example usage for org.apache.commons.compress.compressors.gzip GzipCompressorInputStream GzipCompressorInputStream.

Prototype

public GzipCompressorInputStream(InputStream inputStream) throws IOException 

Source Link

Document

Constructs a new GZip compressed input stream by the referenced InputStream.

Usage

From source file:eval.dataset.ParseWikiLog.java

public static void main(String[] ss) throws FileNotFoundException, ParserConfigurationException, IOException {
    FileInputStream fin = new FileInputStream("data/enwiki-20151201-pages-logging.xml.gz");
    GzipCompressorInputStream gzIn = new GzipCompressorInputStream(fin);
    InputStreamReader reader = new InputStreamReader(gzIn);
    BufferedReader br = new BufferedReader(reader);
    PrintWriter pw = new PrintWriter(new FileWriter("data/user_page.txt"));
    pw.println(//from   w w  w.  j a  va  2 s  .co  m
            "#list of user names and pages that they have edited, deleted or created. These info are mined from logitems of enwiki-20150304-pages-logging.xml.gz");
    TreeMap<String, Set<String>> userPageList = new TreeMap();
    TreeSet<String> pageList = new TreeSet();
    int counterEntry = 0;
    String currentUser = null;
    String currentPage = null;
    try {
        for (String line = br.readLine(); line != null; line = br.readLine()) {

            if (line.trim().equals("</logitem>")) {
                counterEntry++;
                if (currentUser != null && currentPage != null) {
                    updateMap(userPageList, currentUser, currentPage);
                    pw.println(currentUser + "\t" + currentPage);
                    pageList.add(currentPage);
                }
                currentUser = null;
                currentPage = null;
            } else if (line.trim().startsWith("<username>")) {
                currentUser = line.trim().split(">")[1].split("<")[0].replace(" ", "_");

            } else if (line.trim().startsWith("<logtitle>")) {
                String content = line.trim().split(">")[1].split("<")[0];
                if (content.split(":").length == 1) {
                    currentPage = content.replace(" ", "_");
                }
            }
        }
    } catch (IOException ex) {
        Logger.getLogger(ParseWikiLog.class.getName()).log(Level.SEVERE, null, ex);
    }
    pw.println("#analysed " + counterEntry + " entries of wikipesia log file");
    pw.println("#gathered a list of unique user of size " + userPageList.size());
    pw.println("#gathered a list of pages of size " + pageList.size());
    pw.close();
    gzIn.close();

    PrintWriter pwUser = new PrintWriter(new FileWriter("data/user_list_page_edited.txt"));
    pwUser.println(
            "#list of unique users and pages that they have edited, extracted from logitems of enwiki-20150304-pages-logging.xml.gz");
    for (String user : userPageList.keySet()) {
        pwUser.print(user);
        Set<String> getList = userPageList.get(user);
        for (String page : getList) {
            pwUser.print("\t" + page);
        }
        pwUser.println();
    }
    pwUser.close();

    PrintWriter pwPage = new PrintWriter(new FileWriter("data/all_pages.txt"));
    pwPage.println("#list of the unique pages that are extracted from enwiki-20150304-pages-logging.xml.gz");
    for (String page : pageList) {
        pwPage.println(page);
    }
    pwPage.close();
    System.out.println("#analysed " + counterEntry + " entries of wikipesia log file");
    System.out.println("#gathered a list of unique user of size " + userPageList.size());
    System.out.println("#gathered a list of pages of size " + pageList.size());
}

From source file:com.openshift.client.utils.TarFileUtils.java

public static boolean hasGitFolder(InputStream inputStream) throws IOException {
    TarArchiveInputStream tarInputStream = null;
    try {//ww w . jav a2  s .  c o  m
        boolean gitFolderPresent = false;
        tarInputStream = new TarArchiveInputStream(new GzipCompressorInputStream(inputStream));
        for (TarArchiveEntry entry = null; (entry = tarInputStream.getNextTarEntry()) != null;) {
            if (GIT_FOLDER_NAME.equals(entry.getName()) && entry.isDirectory()) {
                gitFolderPresent = true;
                break;
            }
        }
        return gitFolderPresent;
    } finally {
        StreamUtils.close(tarInputStream);
    }
}

From source file:com.twitter.heron.downloader.Extractor.java

static void extract(InputStream in, Path destination) throws IOException {
    try (final BufferedInputStream bufferedInputStream = new BufferedInputStream(in);
            final GzipCompressorInputStream gzipInputStream = new GzipCompressorInputStream(
                    bufferedInputStream);
            final TarArchiveInputStream tarInputStream = new TarArchiveInputStream(gzipInputStream)) {
        final String destinationAbsolutePath = destination.toFile().getAbsolutePath();

        TarArchiveEntry entry;//www.j  a va 2  s  .  c o m
        while ((entry = (TarArchiveEntry) tarInputStream.getNextEntry()) != null) {
            if (entry.isDirectory()) {
                File f = Paths.get(destinationAbsolutePath, entry.getName()).toFile();
                f.mkdirs();
            } else {
                Path fileDestinationPath = Paths.get(destinationAbsolutePath, entry.getName());

                Files.copy(tarInputStream, fileDestinationPath, StandardCopyOption.REPLACE_EXISTING);
            }
        }
    }
}

From source file:edu.umd.umiacs.clip.tools.io.SerializationTools.java

public static Object deserialize(String path) {

    try (FileInputStream is = new FileInputStream(path);
            ObjectInputStream in = new ObjectInputStream(
                    path.endsWith(".bz2") ? new BZip2CompressorInputStream(is)
                            : path.endsWith(".gz") ? new GzipCompressorInputStream(is) : is)) {
        return in.readObject();
    } catch (IOException | ClassNotFoundException e) {
        e.printStackTrace();//from ww  w . j a v a  2 s.c  o  m
    }
    return null;
}

From source file:msec.org.GzipUtil.java

static public void unzip(String srcFile) throws Exception {
    GzipCompressorInputStream in = new GzipCompressorInputStream(new FileInputStream(srcFile));
    int index = srcFile.indexOf(".gz");
    String destFile = "";
    if (index == srcFile.length() - 3) {
        destFile = srcFile.substring(0, index);
    } else {/*  ww  w . j a  v a 2s.c  o  m*/
        destFile = srcFile + ".decompress";
    }
    FileOutputStream out = new FileOutputStream(destFile);
    byte[] buf = new byte[10240];
    while (true) {
        int len = in.read(buf);
        if (len <= 0) {
            break;
        }
        out.write(buf, 0, len);
    }
    out.flush();
    out.close();
    in.close();
}

From source file:com.dubture.symfony.core.util.UncompressUtils.java

/**
 * Uncompress a gzip archive and returns the file where it has been
 * extracted./*from   ww w.ja  v a2s  .c  om*/
 *
 * @param archiveFile The archive file to uncompress
 * @param outputDirectory The output directory where to put the uncompressed archive
 *
 * @return The output file where the archive has been uncompressed
 *
 * @throws IOException When a problem occurs with either the input or output stream
 */
public static File uncompressGzipArchive(File archiveFile, File outputDirectory) throws IOException {
    FileInputStream fileInputStream = new FileInputStream(archiveFile);
    BufferedInputStream bufferedInputStream = new BufferedInputStream(fileInputStream);
    GzipCompressorInputStream gzipInputStream = new GzipCompressorInputStream(bufferedInputStream);

    String tarArchiveFilename = GzipUtils.getUncompressedFilename(archiveFile.getName());
    File outputFile = new File(outputDirectory, tarArchiveFilename);
    FileOutputStream outputStream = new FileOutputStream(outputFile);

    int byteReadCount = 0;
    final byte[] data = new byte[BUFFER_SIZE];

    try {
        while ((byteReadCount = gzipInputStream.read(data, 0, BUFFER_SIZE)) != -1) {
            outputStream.write(data, 0, byteReadCount);
        }
    } finally {
        outputStream.close();
        gzipInputStream.close();
    }

    return outputFile;
}

From source file:de.flapdoodle.embed.process.extract.TgzExtractor.java

protected ArchiveWrapper archiveStream(File source) throws IOException {
    FileInputStream fin = new FileInputStream(source);
    BufferedInputStream in = new BufferedInputStream(fin);
    GzipCompressorInputStream gzIn = new GzipCompressorInputStream(in);

    TarArchiveInputStream tarIn = new TarArchiveInputStream(gzIn);
    return new TarArchiveWrapper(tarIn);
}

From source file:com.ttech.cordovabuild.infrastructure.archive.ArchiveUtils.java

public static void extractFiles(InputStream is, Path localPath) {
    ArchiveStreamFactory archiveStreamFactory = new ArchiveStreamFactory();
    try {/*from  w ww  .  j a v  a  2 s. co m*/
        Files.createDirectories(localPath);
    } catch (IOException e) {
        throw new ArchiveExtractionException(e);
    }
    try (ArchiveInputStream ais = archiveStreamFactory.createArchiveInputStream(is);) {
        extractArchive(localPath, ais);
    } catch (ArchiveException e) {
        LOGGER.info("archiveFactory could not determine archive file type probably tar.gz");
        try (ArchiveInputStream ais = new TarArchiveInputStream(new GzipCompressorInputStream(is))) {
            extractArchive(localPath, ais);
        } catch (IOException e1) {
            throw new ArchiveExtractionException(e1);
        }
    } catch (IOException e) {
        throw new ArchiveExtractionException(e);
    }

}

From source file:com.amazonaws.codepipeline.jenkinsplugin.ExtractionTools.java

private static void extractTarGz(final File source, final File destination) throws IOException {
    try (final ArchiveInputStream tarGzArchiveInputStream = new TarArchiveInputStream(
            new GzipCompressorInputStream(new FileInputStream(source)))) {
        extractArchive(destination, tarGzArchiveInputStream);
    }/*from ww w. jav  a  2s  .c  om*/
}

From source file:net.orpiske.ssps.common.archive.CompressedArchiveUtils.java

/**
 * Decompress a file/*from w  w  w .  ja v  a 2 s  .  c o  m*/
 * @param source the source file to be uncompressed
 * @param destination the destination directory
 * @return the number of bytes read
 * @throws IOException for lower level I/O errors
 */
public static long gzDecompress(File source, File destination) throws IOException {
    FileOutputStream out;

    prepareDestination(destination);
    out = new FileOutputStream(destination);

    FileInputStream fin = null;
    BufferedInputStream bin = null;
    GzipCompressorInputStream gzIn = null;

    try {
        fin = new FileInputStream(source);
        bin = new BufferedInputStream(fin);
        gzIn = new GzipCompressorInputStream(bin);

        IOUtils.copy(gzIn, out);

        gzIn.close();

        fin.close();
        bin.close();
        out.close();
    } catch (IOException e) {
        IOUtils.closeQuietly(out);

        IOUtils.closeQuietly(fin);
        IOUtils.closeQuietly(bin);
        IOUtils.closeQuietly(gzIn);

        throw e;
    }

    return gzIn.getBytesRead();
}