Example usage for org.apache.commons.io IOUtils lineIterator

Introduction

In this page you can find the example usage for org.apache.commons.io IOUtils lineIterator.

Prototype

public static LineIterator lineIterator(Reader reader)

Source Link

Document

Return an Iterator for the lines in a Reader.

Usage

From source file:org.datavec.api.records.reader.impl.LineRecordReader.java

@Override
public boolean hasNext() {
    if (iter != null && iter.hasNext()) {
        return true;
    } else {//ww  w  . jav a 2 s . c  o  m
        if (locations != null && !(inputSplit instanceof StringSplit) && splitIndex < locations.length - 1) {
            splitIndex++;
            lineIndex = 0; //New split -> reset line count
            try {
                close();
                iter = IOUtils.lineIterator(new InputStreamReader(locations[splitIndex].toURL().openStream()));
                onLocationOpen(locations[splitIndex]);
            } catch (IOException e) {
                e.printStackTrace();
            }

            return iter.hasNext();
        }

        return false;
    }
}

From source file:org.datavec.api.records.reader.impl.LineRecordReader.java

protected Iterator<String> getIterator(int location) {
    Iterator<String> iterator = null;
    if (inputSplit instanceof StringSplit) {
        StringSplit stringSplit = (StringSplit) inputSplit;
        iterator = Collections.singletonList(stringSplit.getData()).listIterator();
    } else if (inputSplit instanceof InputStreamInputSplit) {
        InputStream is = ((InputStreamInputSplit) inputSplit).getIs();
        if (is != null) {
            iterator = IOUtils.lineIterator(new InputStreamReader(is));
        }/*from www . j a  va  2  s.  co  m*/
    } else {
        this.locations = inputSplit.locations();
        if (locations != null && locations.length > 0) {
            InputStream inputStream;
            try {
                inputStream = locations[location].toURL().openStream();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            iterator = IOUtils.lineIterator(new InputStreamReader(inputStream));
        }
    }
    if (iterator == null)
        throw new UnsupportedOperationException("Unknown input split: " + inputSplit);
    return iterator;
}

From source file:org.datavec.api.records.reader.impl.LineRecordReader.java

@Override
public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException {
    //First: create a sorted list of the RecordMetaData
    List<Triple<Integer, RecordMetaDataLine, List<Writable>>> list = new ArrayList<>();
    Set<URI> uris = new HashSet<>();
    Iterator<RecordMetaData> iter = recordMetaDatas.iterator();
    int count = 0;
    while (iter.hasNext()) {
        RecordMetaData rmd = iter.next();
        if (!(rmd instanceof RecordMetaDataLine)) {
            throw new IllegalArgumentException(
                    "Invalid metadata; expected RecordMetaDataLine instance; got: " + rmd);
        }//from   w ww.jav  a 2  s  .  com
        list.add(new Triple<>(count++, (RecordMetaDataLine) rmd, (List<Writable>) null));
        if (rmd.getURI() != null)
            uris.add(rmd.getURI());
    }
    List<URI> sortedURIs = null;
    if (uris.size() > 0) {
        sortedURIs = new ArrayList<>(uris);
        Collections.sort(sortedURIs);
    }

    //Sort by URI first (if possible - don't always have URIs though, for String split etc), then sort by line number:
    Collections.sort(list, new Comparator<Triple<Integer, RecordMetaDataLine, List<Writable>>>() {
        @Override
        public int compare(Triple<Integer, RecordMetaDataLine, List<Writable>> o1,
                Triple<Integer, RecordMetaDataLine, List<Writable>> o2) {
            if (o1.getSecond().getURI() != null) {
                if (!o1.getSecond().getURI().equals(o2.getSecond().getURI())) {
                    return o1.getSecond().getURI().compareTo(o2.getSecond().getURI());
                }
            }
            return Integer.compare(o1.getSecond().getLineNumber(), o2.getSecond().getLineNumber());
        }
    });

    if (uris.size() > 0 && sortedURIs != null) {
        //URIs case - possibly with multiple URIs
        Iterator<Triple<Integer, RecordMetaDataLine, List<Writable>>> metaIter = list.iterator(); //Currently sorted by URI, then line number

        URI currentURI = sortedURIs.get(0);
        Iterator<String> currentUriIter = IOUtils
                .lineIterator(new InputStreamReader(currentURI.toURL().openStream()));
        int currentURIIdx = 0; //Index of URI
        int currentLineIdx = 0; //Index of the line for the current URI
        String line = currentUriIter.next();
        while (metaIter.hasNext()) {
            Triple<Integer, RecordMetaDataLine, List<Writable>> t = metaIter.next();
            URI thisURI = t.getSecond().getURI();
            int nextLineIdx = t.getSecond().getLineNumber();

            //First: find the right URI for this record...
            while (!currentURI.equals(thisURI)) {
                //Iterate to the next URI
                currentURIIdx++;
                if (currentURIIdx >= sortedURIs.size()) {
                    //Should never happen
                    throw new IllegalStateException(
                            "Count not find URI " + thisURI + " in URIs list: " + sortedURIs);
                }
                currentURI = sortedURIs.get(currentURIIdx);
                currentLineIdx = 0;
                if (currentURI.equals(thisURI)) {
                    //Found the correct URI for this MetaData instance
                    closeIfRequired(currentUriIter);
                    currentUriIter = IOUtils
                            .lineIterator(new InputStreamReader(currentURI.toURL().openStream()));
                    line = currentUriIter.next();
                }
            }

            //Have the correct URI/iter open -> scan to the required line
            while (currentLineIdx < nextLineIdx && currentUriIter.hasNext()) {
                line = currentUriIter.next();
                currentLineIdx++;
            }
            if (currentLineIdx < nextLineIdx && !currentUriIter.hasNext()) {
                throw new IllegalStateException("Could not get line " + nextLineIdx + " from URI " + currentURI
                        + ": has only " + currentLineIdx + " lines");
            }
            t.setThird(Collections.<Writable>singletonList(new Text(line)));
        }
    } else {
        //Not URI based: String split, etc
        Iterator<String> iterator = getIterator(0);
        Iterator<Triple<Integer, RecordMetaDataLine, List<Writable>>> metaIter = list.iterator();
        int currentLineIdx = 0;
        String line = iterator.next();
        while (metaIter.hasNext()) {
            Triple<Integer, RecordMetaDataLine, List<Writable>> t = metaIter.next();
            int nextLineIdx = t.getSecond().getLineNumber();
            while (currentLineIdx < nextLineIdx && iterator.hasNext()) {
                line = iterator.next();
                currentLineIdx++;
            }
            t.setThird(Collections.<Writable>singletonList(new Text(line)));
        }
        closeIfRequired(iterator);
    }

    //Now, sort by the original (request) order:
    Collections.sort(list, new Comparator<Triple<Integer, RecordMetaDataLine, List<Writable>>>() {
        @Override
        public int compare(Triple<Integer, RecordMetaDataLine, List<Writable>> o1,
                Triple<Integer, RecordMetaDataLine, List<Writable>> o2) {
            return Integer.compare(o1.getFirst(), o2.getFirst());
        }
    });

    //And return...
    List<Record> out = new ArrayList<>();
    for (Triple<Integer, RecordMetaDataLine, List<Writable>> t : list) {
        out.add(new org.datavec.api.records.impl.Record(t.getThird(), t.getSecond()));
    }
    return out;
}

From source file:org.deeplearning4j.models.embeddings.loader.WordVectorSerializer.java

/**
 * Loads an in memory cache from the given path (sets syn0 and the vocab)
 *
 * @param vectorsFile the path of the file to load
 * @return a Pair holding the lookup table and the vocab cache.
 * @throws FileNotFoundException if the input file does not exist
 *///from  www  .  j  a v  a 2  s .c  o  m
public static Pair<InMemoryLookupTable, VocabCache> loadTxt(File vectorsFile)
        throws FileNotFoundException, UnsupportedEncodingException {
    BufferedReader reader = new BufferedReader(
            new InputStreamReader(new FileInputStream(vectorsFile), "UTF-8"));
    AbstractCache cache = new AbstractCache<>();

    LineIterator iter = IOUtils.lineIterator(reader);
    String line = null;
    boolean hasHeader = false;
    if (iter.hasNext()) {
        line = iter.nextLine(); // skip header line
        //look for spaces
        if (!line.contains(" ")) {
            log.debug("Skipping first line");
            hasHeader = true;
        } else {
            // we should check for something that looks like proper word vectors here. i.e: 1 word at the 0 position, and bunch of floats further
            String[] split = line.split(" ");
            try {
                long[] header = new long[split.length];
                for (int x = 0; x < split.length; x++) {
                    header[x] = Long.parseLong(split[x]);
                }
                if (split.length < 4)
                    hasHeader = true;
                // now we know, if that's all ints - it's just a header
                // [0] - number of words
                // [1] - vectorSize
                // [2] - number of documents <-- DL4j-only value
                if (split.length == 3)
                    cache.incrementTotalDocCount(header[2]);

                printOutProjectedMemoryUse(header[0], (int) header[1], 1);

                hasHeader = true;

                try {
                    reader.close();
                } catch (Exception ex) {
                }
            } catch (Exception e) {
                // if any conversion exception hits - that'll be considered header
                hasHeader = false;

            }
        }

    }

    //reposition buffer to be one line ahead
    if (hasHeader) {
        line = "";
        iter.close();
        reader = new BufferedReader(new FileReader(vectorsFile));
        iter = IOUtils.lineIterator(reader);
        iter.nextLine();
    }

    List<INDArray> arrays = new ArrayList<>();
    while (iter.hasNext()) {
        if (line.isEmpty())
            line = iter.nextLine();
        String[] split = line.split(" ");
        String word = decodeB64(split[0]); //split[0].replaceAll(whitespaceReplacement, " ");
        VocabWord word1 = new VocabWord(1.0, word);

        word1.setIndex(cache.numWords());

        cache.addToken(word1);

        cache.addWordToIndex(word1.getIndex(), word);

        cache.putVocabWord(word);

        float[] vector = new float[split.length - 1];

        for (int i = 1; i < split.length; i++) {
            vector[i - 1] = Float.parseFloat(split[i]);
        }

        INDArray row = Nd4j.create(vector);

        arrays.add(row);

        // workaround for skipped first row
        line = "";
    }

    INDArray syn = Nd4j.vstack(arrays);

    InMemoryLookupTable lookupTable = (InMemoryLookupTable) new InMemoryLookupTable.Builder()
            .vectorLength(arrays.get(0).columns()).useAdaGrad(false).cache(cache).useHierarchicSoftmax(false)
            .build();
    if (Nd4j.ENFORCE_NUMERICAL_STABILITY)
        Nd4j.clearNans(syn);

    lookupTable.setSyn0(syn);

    iter.close();

    try {
        reader.close();
    } catch (Exception e) {
    }

    return new Pair<>(lookupTable, (VocabCache) cache);
}

From source file:org.deeplearning4j.models.glove.CoOccurrences.java

/**
 * Load from an input stream with the following format:
 * w1 w2 score//w  w  w  .j  a v a  2  s.c  o  m
 * @param from the input stream to read from
 * @return the co occurrences based on the input stream
 */
public static CoOccurrences load(InputStream from) {
    CoOccurrences ret = new CoOccurrences();
    ret.coOccurrences = new ArrayList<>();
    CounterMap<String, String> counter = new CounterMap<>();
    Reader inputStream = new InputStreamReader(from);
    LineIterator iter = IOUtils.lineIterator(inputStream);
    String line;
    while ((iter.hasNext())) {
        line = iter.nextLine();
        String[] split = line.split(" ");
        if (split.length < 3)
            continue;
        //no empty keys
        if (split[0].isEmpty() || split[1].isEmpty())
            continue;

        ret.coOccurrences.add(new Pair<>(split[0], split[1]));

        counter.incrementCount(split[0], split[1], Double.parseDouble(split[2]));

    }

    ret.coOCurreneCounts = counter;
    return ret;

}

From source file:org.deeplearning4j.models.word2vec.wordstore.inmemory.InMemoryLookupCache.java

/**
 * Load a look up cache from an input stream
 * delimited by \n/*from  ww w  . j a  v a2  s. com*/
 * @param from the input stream to read from
 * @return the in memory lookup cache
 */
public static InMemoryLookupCache load(InputStream from) {
    Reader inputStream = new InputStreamReader(from);
    LineIterator iter = IOUtils.lineIterator(inputStream);
    String line;
    InMemoryLookupCache ret = new InMemoryLookupCache();
    int count = 0;
    while ((iter.hasNext())) {
        line = iter.nextLine();
        if (line.isEmpty())
            continue;
        ret.incrementWordCount(line);
        VocabWord word = new VocabWord(1.0, line);
        word.setIndex(count);
        ret.addToken(word);
        ret.addWordToIndex(count, line);
        ret.putVocabWord(line);
        count++;

    }

    return ret;
}

From source file:org.eclim.installer.ant.ShutdownTask.java

/**
 * Executes this task./*from   w  ww  .j  a v a  2  s  .c o  m*/
 */
@SuppressWarnings("unchecked")
public void execute() throws BuildException {
    Gson gson = new Gson();
    FileReader reader = null;
    try {
        File instances = new File(System.getProperty("user.home") + "/.eclim/.eclimd_instances");
        int count = 0;
        if (instances.exists()) {
            reader = new FileReader(instances);
            for (Iterator<String> ii = IOUtils.lineIterator(reader); ii.hasNext();) {
                count++;

                String line = ii.next();
                String name = null;
                int port = 9091;

                if (line.startsWith("{")) {
                    Instance instance = gson.fromJson(line, Instance.class);
                    name = instance.workspace + ':' + instance.port;
                    port = instance.port;
                } else {
                    name = line;
                    port = Integer.parseInt(line.replaceFirst(".*:", ""));
                }

                try {
                    log("Shutting down eclimd: " + name);
                    shutdown(port);
                } catch (Exception e) {
                    log("Unable to shut down eclimd (" + name + "): " + e.getClass().getName() + " - "
                            + e.getMessage());
                }
            }
        }

        // if no registered instances found, try shutting down the default port to
        // account for users on old eclim versions
        if (count == 0) {
            try {
                shutdown(9091);
            } catch (Exception e) {
                log("Unable to shut down eclimd (9091): " + e.getClass().getName() + " - " + e.getMessage());
            }
        }
    } catch (FileNotFoundException fnfe) {
        log("Unable to locate eclimd instances file.");
    } finally {
        IOUtils.closeQuietly(reader);
    }
}

From source file:org.eclipse.smarthome.extensionservice.marketplace.internal.BindingExtensionHandler.java

private Map<String, Long> loadInstalledBindingsMap() {
    File dataFile = bundleContext.getDataFile(BINDING_FILE);
    if (dataFile != null && dataFile.exists()) {
        try (FileReader reader = new FileReader(dataFile)) {
            LineIterator lineIterator = IOUtils.lineIterator(reader);
            Map<String, Long> map = new HashMap<>();
            while (lineIterator.hasNext()) {
                String line = lineIterator.nextLine();
                String[] parts = line.split(";");
                if (parts.length == 2) {
                    try {
                        map.put(parts[0], Long.valueOf(parts[1]));
                    } catch (NumberFormatException e) {
                        logger.debug("Cannot parse '{}' as a number in file {} - ignoring it.", parts[1],
                                dataFile.getName());
                    }/*from w  w  w .j a v  a  2  s  .  co  m*/
                } else {
                    logger.debug("Invalid line in file {} - ignoring it:\n{}", dataFile.getName(), line);
                }
            }
            return map;
        } catch (IOException e) {
            logger.debug("File '{}' for installed bindings does not exist.", dataFile.getName());
            // ignore and just return an empty map
        }
    }
    return new HashMap<>();
}

From source file:org.eclipse.smarthome.extensionservice.marketplace.internal.BundleExtensionHandler.java

private Map<String, Long> loadInstalledBundlesFile(File dataFile) {
    try (FileReader reader = new FileReader(dataFile)) {
        LineIterator lineIterator = IOUtils.lineIterator(reader);
        Map<String, Long> map = new HashMap<>();
        while (lineIterator.hasNext()) {
            String line = lineIterator.nextLine();
            String[] parts = line.split(";");
            if (parts.length == 2) {
                try {
                    map.put(parts[0], Long.valueOf(parts[1]));
                } catch (NumberFormatException e) {
                    logger.debug("Cannot parse '{}' as a number in file {} - ignoring it.", parts[1],
                            dataFile.getName());
                }/*w  w w  .  ja va 2s . c o  m*/
            } else {
                logger.debug("Invalid line in file {} - ignoring it:\n{}", dataFile.getName(), line);
            }
        }
        return map;
    } catch (IOException e) {
        logger.debug("File '{}' for installed bundles does not exist.", dataFile.getName());
        // ignore and just return an empty map
    }
    return new HashMap<>();
}

From source file:org.jasig.ssp.web.api.reports.AbstractReportControllerIntegrationTest.java

protected void expectReportBodyLines(List<String> expectedReportBodyLines, MockHttpServletResponse response,
        Predicate<String> firstBodyRowRule) throws UnsupportedEncodingException {
    final List<String> actualReportBodyLines = new ArrayList<String>();
    final String csvReport = response.getContentAsString();
    final LineIterator lineIterator = IOUtils.lineIterator(new CharSequenceReader(csvReport));
    boolean accumulatingActualBodyLines = false;
    if (firstBodyRowRule == null)
        accumulatingActualBodyLines = true;
    while (lineIterator.hasNext()) {
        String line = lineIterator.next();
        if (accumulatingActualBodyLines || firstBodyRowRule.apply(line)) {
            accumulatingActualBodyLines = true;
            actualReportBodyLines.add(line);
        }/*from  ww w .j ava2 s .  co m*/
    }

    assertStringCollectionsEqual(expectedReportBodyLines, actualReportBodyLines);
}