Example usage for org.apache.commons.io IOUtils readLines

List of usage examples for org.apache.commons.io IOUtils readLines

Introduction

In this page you can find the example usage for org.apache.commons.io IOUtils readLines.

Prototype

public static List readLines(Reader input) throws IOException 

Source Link

Document

Get the contents of a Reader as a list of Strings, one entry per line.

Usage

From source file:it.unimi.di.big.mg4j.document.WarcDocumentSequence.java

public static void main(String[] args) throws Exception {

    SimpleJSAP jsap = new SimpleJSAP(WarcDocumentSequence.class.getName(),
            "Saves a serialised Warc document sequence based on a set of file names.",
            new Parameter[] {
                    new FlaggedOption("factory", JSAP.CLASS_PARSER, IdentityDocumentFactory.class.getName(),
                            JSAP.NOT_REQUIRED, 'f', "factory",
                            "A document factory with a standard constructor."),
                    new FlaggedOption("property", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'p',
                            "property", "A 'key=value' specification, or the name of a property file")
                                    .setAllowMultipleDeclarations(true),
                    new Switch("gzip", 'z', "gzip",
                            "Expect gzip-ed WARC content (files should end in .warc.gz)."),
                    new FlaggedOption("bufferSize", JSAP.INTSIZE_PARSER, DEFAULT_BUFFER_SIZE, JSAP.NOT_REQUIRED,
                            'b', "buffer-size", "The size of an I/O buffer."),
                    new UnflaggedOption("sequence", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The filename for the serialized sequence."),
                    new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED,
                            JSAP.GREEDY,
                            "A list of basename files that will be indexed. If missing, a list of files will be read from standard input.") });

    final JSAPResult jsapResult = jsap.parse(args);
    if (jsap.messagePrinted())
        System.exit(1);//w  w w  .  j a v a  2s.c  o  m

    final DocumentFactory factory = PropertyBasedDocumentFactory.getInstance(jsapResult.getClass("factory"),
            jsapResult.getStringArray("property"));
    final boolean isGZipped = jsapResult.getBoolean("gzip");

    String[] file = jsapResult.getStringArray("basename");
    if (file.length == 0)
        file = IOUtils.readLines(System.in).toArray(new String[0]);
    if (file.length == 0)
        LOGGER.warn("Empty fileset");

    BinIO.storeObject(new WarcDocumentSequence(file, factory, isGZipped, jsapResult.getInt("bufferSize")),
            jsapResult.getString("sequence"));
}

From source file:edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore.NQuadLineSplitterTest.java

@BeforeClass
public static void readTestData() throws IOException {
    InputStream stream = NQuadLineSplitterTest.class.getResourceAsStream("NQuadLineSplitterTest.nq");
    testData = IOUtils.readLines(stream);
}

From source file:gaffer.example.gettingstarted.util.DataUtils.java

public static List<String> loadData(final InputStream dataStream) {
    List<String> lines = null;
    try {//from www  .  j a v a  2 s  .com
        lines = IOUtils.readLines(dataStream);
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeQuietly(dataStream);
    }
    return lines;
}

From source file:com.intropro.prairie.utils.FileUtils.java

public static List<String> readLineInDirectory(File file) throws IOException {
    List<String> result = new LinkedList<>();
    for (String child : file.list()) {
        File childFile = new File(file, child);
        if (childFile.isDirectory()) {
            result.addAll(readLineInDirectory(childFile));
        } else {//from w  w w .j  av a2 s . c om
            result.addAll(IOUtils.readLines(new FileInputStream(childFile)));
        }
    }
    return result;
}

From source file:com.linkedin.pinot.perf.ForwardIndexWriterBenchmark.java

public static void convertRawToForwardIndex(File rawFile) throws Exception {
    List<String> lines = IOUtils.readLines(new FileReader(rawFile));
    int totalDocs = lines.size();
    int max = Integer.MIN_VALUE;
    int maxNumberOfMultiValues = Integer.MIN_VALUE;
    int totalNumValues = 0;
    int data[][] = new int[totalDocs][];
    for (int i = 0; i < lines.size(); i++) {
        String line = lines.get(i);
        String[] split = line.split(",");
        totalNumValues = totalNumValues + split.length;
        if (split.length > maxNumberOfMultiValues) {
            maxNumberOfMultiValues = split.length;
        }//from   w  w w  .j av  a2 s .com
        data[i] = new int[split.length];
        for (int j = 0; j < split.length; j++) {
            String token = split[j];
            int val = Integer.parseInt(token);
            data[i][j] = val;
            if (val > max) {
                max = val;
            }
        }
    }
    int maxBitsNeeded = (int) Math.ceil(Math.log(max) / Math.log(2));
    int size = 2048;
    int[] offsets = new int[size];
    int bitMapSize = 0;
    File outputFile = new File("output.mv.fwd");

    FixedBitMultiValueWriter fixedBitSkipListSCMVWriter = new FixedBitMultiValueWriter(outputFile, totalDocs,
            totalNumValues, maxBitsNeeded);

    for (int i = 0; i < totalDocs; i++) {
        fixedBitSkipListSCMVWriter.setIntArray(i, data[i]);
        if (i % size == size - 1) {
            MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(offsets);
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            DataOutputStream dos = new DataOutputStream(bos);
            rr1.serialize(dos);
            dos.close();
            // System.out.println("Chunk " + i / size + " bitmap size:" + bos.size());
            bitMapSize += bos.size();
        } else if (i == totalDocs - 1) {
            MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(Arrays.copyOf(offsets, i % size));
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            DataOutputStream dos = new DataOutputStream(bos);
            rr1.serialize(dos);
            dos.close();
            // System.out.println("Chunk " + i / size + " bitmap size:" + bos.size());
            bitMapSize += bos.size();
        }
    }
    fixedBitSkipListSCMVWriter.close();
    System.out.println("Output file size:" + outputFile.length());
    System.out.println("totalNumberOfDoc\t\t\t:" + totalDocs);
    System.out.println("totalNumberOfValues\t\t\t:" + totalNumValues);
    System.out.println("chunk size\t\t\t\t:" + size);
    System.out.println("Num chunks\t\t\t\t:" + totalDocs / size);
    int numChunks = totalDocs / size + 1;
    int totalBits = (totalNumValues * maxBitsNeeded);
    int dataSizeinBytes = (totalBits + 7) / 8;

    System.out.println("Raw data size with fixed bit encoding\t:" + dataSizeinBytes);
    System.out.println("\nPer encoding size");
    System.out.println();
    System.out.println("size (offset + length)\t\t\t:" + ((totalDocs * (4 + 4)) + dataSizeinBytes));
    System.out.println();
    System.out.println("size (offset only)\t\t\t:" + ((totalDocs * (4)) + dataSizeinBytes));
    System.out.println();
    System.out.println("bitMapSize\t\t\t\t:" + bitMapSize);
    System.out.println("size (with bitmap)\t\t\t:" + (bitMapSize + (numChunks * 4) + dataSizeinBytes));

    System.out.println();
    System.out.println("Custom Bitset\t\t\t\t:" + (totalNumValues + 7) / 8);
    System.out.println("size (with custom bitset)\t\t\t:"
            + (((totalNumValues + 7) / 8) + (numChunks * 4) + dataSizeinBytes));
}

From source file:com.github.DataLoader.java

public static Map<String, Repository> loadRepositories() throws IOException {
    final Map<String, Repository> repositories = new HashMap<String, Repository>();
    final Map<String, String> relationships = new HashMap<String, String>();

    // First, discover all the repositories.
    for (final Object o : IOUtils
            .readLines(DataLoader.class.getClassLoader().getResourceAsStream("data/repos.txt"))) {
        final String line = (String) o;

        final String[] mainParts = line.trim().split(":");
        final String repo_id = mainParts[0];

        final String[] repo_data = mainParts[1].split(",");
        final String created_at = repo_data[1];
        final String parent_id = repo_data.length == 3 ? repo_data[2] : null;

        final String[] fullName = repo_data[0].split("/");
        final String owner = fullName[0];
        final String name = fullName[1];

        repositories.put(repo_id, new Repository(repo_id, owner, name, created_at));

        if (parent_id != null) {
            relationships.put(repo_id, parent_id);
        }//w w  w  .  j  av a2 s  .c  o  m
    }

    // Now that all the repositories have been loaded, establish any parent-child relationships.
    for (final Map.Entry<String, String> pair : relationships.entrySet()) {
        repositories.get(pair.getKey()).setParent(repositories.get(pair.getValue()));
    }

    final Map<String, Watcher> watchers = new HashMap<String, Watcher>();
    for (final Object o : IOUtils
            .readLines(DataLoader.class.getClassLoader().getResourceAsStream("data/data.txt"))) {
        final String line = (String) o;

        final String[] mainParts = line.trim().split(":");
        final String user_id = mainParts[0];
        final String repo_id = mainParts[1];

        final Watcher watcher = watchers.get(user_id) == null ? new Watcher(user_id) : watchers.get(user_id);
        repositories.get(repo_id).associate(watcher);
    }

    return repositories;
}

From source file:com.doculibre.constellio.utils.license.ApplyLicenseUtils.java

@SuppressWarnings("unchecked")
private static List<String> readLines(File file) throws IOException {
    FileInputStream is = new FileInputStream(file);
    List<String> lines = IOUtils.readLines(is);
    IOUtils.closeQuietly(is);//w w  w  .j  ava 2  s  .  c  om
    return lines;
}

From source file:com.alexholmes.hadooputils.io.FileUtils.java

/**
 * Read the contents of the supplied file into a list.
 *
 * @param fs a Hadoop file system/*from   w ww  .ja v  a  2 s.  co  m*/
 * @param p  the file path
 * @return array of lines in the file
 * @throws java.io.IOException if something goes wrong
 */
public static List<String> readLines(final FileSystem fs, final Path p) throws IOException {
    InputStream stream = fs.open(p);
    try {
        return IOUtils.readLines(stream);
    } finally {
        stream.close();
    }
}

From source file:dz.jtsgen.processor.helper.OutputHelper.java

public static List<String> findSourceLine(Compilation c, String packageName, String fileName, Pattern pattern)
        throws IOException {
    Assert.assertTrue(c.generatedFile(StandardLocation.SOURCE_OUTPUT, packageName, fileName).isPresent());
    JavaFileObject jfo = c.generatedFile(StandardLocation.SOURCE_OUTPUT, packageName, fileName).get();
    try (Reader r = jfo.openReader(false)) {
        return IOUtils.readLines(r).stream().filter((x) -> pattern.matcher(x).find())
                .collect(Collectors.toList());
    }//from   w  ww . j a  v a 2  s  .  com
}

From source file:com.splunk.shuttl.testutil.SplunkTestUtils.java

public static List<String> readSearchResults(InputStream results) {
    try {//from ww  w  . j  a  v a2  s  . c  o  m
        return IOUtils.readLines(results);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}