Example usage for org.apache.commons.compress.archivers.tar TarArchiveEntry getFile

List of usage examples for org.apache.commons.compress.archivers.tar TarArchiveEntry getFile

Introduction

In this page you can find the example usage for org.apache.commons.compress.archivers.tar TarArchiveEntry getFile.

Prototype

public File getFile() 

Source Link

Document

Get this entry's file.

Usage

From source file:cgs_lda_multicore.DataModel.DataPreparation.java

public static LDADataset readDatasetCORE(String fileName) throws Exception {
    try {/* ww  w. j  av  a 2 s  .  co  m*/
        // Read document file.
        BufferedReader reader = null;

        if (fileName.endsWith(".tar.gz")) {
            // This case read from .tar.gz file.
            TarArchiveInputStream tAIS = new TarArchiveInputStream(
                    new GZIPInputStream(new FileInputStream(fileName)));
            TarArchiveEntry tarArchiveEntry;

            while ((tarArchiveEntry = tAIS.getNextTarEntry()) != null) {
                if (tarArchiveEntry.isFile()) {
                    reader = new BufferedReader(
                            new InputStreamReader(new FileInputStream(tarArchiveEntry.getFile()), "UTF-8"));
                    String line;

                    while ((line = reader.readLine()) != null) {
                        // Process line, each line is a json of a document.
                    }
                    reader.close();
                }
            }
            tAIS.close();
        }
        return null;
    } catch (Exception e) {
        System.out.println("Read Dataset Error: " + e.getMessage());
        e.printStackTrace();
        return null;
    }
}