List of usage examples for org.apache.commons.io IOUtils lineIterator
public static LineIterator lineIterator(Reader reader)
Reader
. From source file:org.datavec.api.records.reader.impl.LineRecordReader.java
@Override public boolean hasNext() { if (iter != null && iter.hasNext()) { return true; } else {//ww w . jav a 2 s . c o m if (locations != null && !(inputSplit instanceof StringSplit) && splitIndex < locations.length - 1) { splitIndex++; lineIndex = 0; //New split -> reset line count try { close(); iter = IOUtils.lineIterator(new InputStreamReader(locations[splitIndex].toURL().openStream())); onLocationOpen(locations[splitIndex]); } catch (IOException e) { e.printStackTrace(); } return iter.hasNext(); } return false; } }
From source file:org.datavec.api.records.reader.impl.LineRecordReader.java
protected Iterator<String> getIterator(int location) { Iterator<String> iterator = null; if (inputSplit instanceof StringSplit) { StringSplit stringSplit = (StringSplit) inputSplit; iterator = Collections.singletonList(stringSplit.getData()).listIterator(); } else if (inputSplit instanceof InputStreamInputSplit) { InputStream is = ((InputStreamInputSplit) inputSplit).getIs(); if (is != null) { iterator = IOUtils.lineIterator(new InputStreamReader(is)); }/*from www . j a va 2 s. co m*/ } else { this.locations = inputSplit.locations(); if (locations != null && locations.length > 0) { InputStream inputStream; try { inputStream = locations[location].toURL().openStream(); } catch (IOException e) { throw new RuntimeException(e); } iterator = IOUtils.lineIterator(new InputStreamReader(inputStream)); } } if (iterator == null) throw new UnsupportedOperationException("Unknown input split: " + inputSplit); return iterator; }
From source file:org.datavec.api.records.reader.impl.LineRecordReader.java
@Override public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException { //First: create a sorted list of the RecordMetaData List<Triple<Integer, RecordMetaDataLine, List<Writable>>> list = new ArrayList<>(); Set<URI> uris = new HashSet<>(); Iterator<RecordMetaData> iter = recordMetaDatas.iterator(); int count = 0; while (iter.hasNext()) { RecordMetaData rmd = iter.next(); if (!(rmd instanceof RecordMetaDataLine)) { throw new IllegalArgumentException( "Invalid metadata; expected RecordMetaDataLine instance; got: " + rmd); }//from w ww.jav a 2 s . com list.add(new Triple<>(count++, (RecordMetaDataLine) rmd, (List<Writable>) null)); if (rmd.getURI() != null) uris.add(rmd.getURI()); } List<URI> sortedURIs = null; if (uris.size() > 0) { sortedURIs = new ArrayList<>(uris); Collections.sort(sortedURIs); } //Sort by URI first (if possible - don't always have URIs though, for String split etc), then sort by line number: Collections.sort(list, new Comparator<Triple<Integer, RecordMetaDataLine, List<Writable>>>() { @Override public int compare(Triple<Integer, RecordMetaDataLine, List<Writable>> o1, Triple<Integer, RecordMetaDataLine, List<Writable>> o2) { if (o1.getSecond().getURI() != null) { if (!o1.getSecond().getURI().equals(o2.getSecond().getURI())) { return o1.getSecond().getURI().compareTo(o2.getSecond().getURI()); } } return Integer.compare(o1.getSecond().getLineNumber(), o2.getSecond().getLineNumber()); } }); if (uris.size() > 0 && sortedURIs != null) { //URIs case - possibly with multiple URIs Iterator<Triple<Integer, RecordMetaDataLine, List<Writable>>> metaIter = list.iterator(); //Currently sorted by URI, then line number URI currentURI = sortedURIs.get(0); Iterator<String> currentUriIter = IOUtils .lineIterator(new InputStreamReader(currentURI.toURL().openStream())); int currentURIIdx = 0; //Index of URI int currentLineIdx = 0; //Index of the line for the current URI String line = currentUriIter.next(); while (metaIter.hasNext()) { Triple<Integer, RecordMetaDataLine, List<Writable>> t = metaIter.next(); URI thisURI = t.getSecond().getURI(); int nextLineIdx = t.getSecond().getLineNumber(); //First: find the right URI for this record... while (!currentURI.equals(thisURI)) { //Iterate to the next URI currentURIIdx++; if (currentURIIdx >= sortedURIs.size()) { //Should never happen throw new IllegalStateException( "Count not find URI " + thisURI + " in URIs list: " + sortedURIs); } currentURI = sortedURIs.get(currentURIIdx); currentLineIdx = 0; if (currentURI.equals(thisURI)) { //Found the correct URI for this MetaData instance closeIfRequired(currentUriIter); currentUriIter = IOUtils .lineIterator(new InputStreamReader(currentURI.toURL().openStream())); line = currentUriIter.next(); } } //Have the correct URI/iter open -> scan to the required line while (currentLineIdx < nextLineIdx && currentUriIter.hasNext()) { line = currentUriIter.next(); currentLineIdx++; } if (currentLineIdx < nextLineIdx && !currentUriIter.hasNext()) { throw new IllegalStateException("Could not get line " + nextLineIdx + " from URI " + currentURI + ": has only " + currentLineIdx + " lines"); } t.setThird(Collections.<Writable>singletonList(new Text(line))); } } else { //Not URI based: String split, etc Iterator<String> iterator = getIterator(0); Iterator<Triple<Integer, RecordMetaDataLine, List<Writable>>> metaIter = list.iterator(); int currentLineIdx = 0; String line = iterator.next(); while (metaIter.hasNext()) { Triple<Integer, RecordMetaDataLine, List<Writable>> t = metaIter.next(); int nextLineIdx = t.getSecond().getLineNumber(); while (currentLineIdx < nextLineIdx && iterator.hasNext()) { line = iterator.next(); currentLineIdx++; } t.setThird(Collections.<Writable>singletonList(new Text(line))); } closeIfRequired(iterator); } //Now, sort by the original (request) order: Collections.sort(list, new Comparator<Triple<Integer, RecordMetaDataLine, List<Writable>>>() { @Override public int compare(Triple<Integer, RecordMetaDataLine, List<Writable>> o1, Triple<Integer, RecordMetaDataLine, List<Writable>> o2) { return Integer.compare(o1.getFirst(), o2.getFirst()); } }); //And return... List<Record> out = new ArrayList<>(); for (Triple<Integer, RecordMetaDataLine, List<Writable>> t : list) { out.add(new org.datavec.api.records.impl.Record(t.getThird(), t.getSecond())); } return out; }
From source file:org.deeplearning4j.models.embeddings.loader.WordVectorSerializer.java
/** * Loads an in memory cache from the given path (sets syn0 and the vocab) * * @param vectorsFile the path of the file to load * @return a Pair holding the lookup table and the vocab cache. * @throws FileNotFoundException if the input file does not exist *///from www . j a v a 2 s .c o m public static Pair<InMemoryLookupTable, VocabCache> loadTxt(File vectorsFile) throws FileNotFoundException, UnsupportedEncodingException { BufferedReader reader = new BufferedReader( new InputStreamReader(new FileInputStream(vectorsFile), "UTF-8")); AbstractCache cache = new AbstractCache<>(); LineIterator iter = IOUtils.lineIterator(reader); String line = null; boolean hasHeader = false; if (iter.hasNext()) { line = iter.nextLine(); // skip header line //look for spaces if (!line.contains(" ")) { log.debug("Skipping first line"); hasHeader = true; } else { // we should check for something that looks like proper word vectors here. i.e: 1 word at the 0 position, and bunch of floats further String[] split = line.split(" "); try { long[] header = new long[split.length]; for (int x = 0; x < split.length; x++) { header[x] = Long.parseLong(split[x]); } if (split.length < 4) hasHeader = true; // now we know, if that's all ints - it's just a header // [0] - number of words // [1] - vectorSize // [2] - number of documents <-- DL4j-only value if (split.length == 3) cache.incrementTotalDocCount(header[2]); printOutProjectedMemoryUse(header[0], (int) header[1], 1); hasHeader = true; try { reader.close(); } catch (Exception ex) { } } catch (Exception e) { // if any conversion exception hits - that'll be considered header hasHeader = false; } } } //reposition buffer to be one line ahead if (hasHeader) { line = ""; iter.close(); reader = new BufferedReader(new FileReader(vectorsFile)); iter = IOUtils.lineIterator(reader); iter.nextLine(); } List<INDArray> arrays = new ArrayList<>(); while (iter.hasNext()) { if (line.isEmpty()) line = iter.nextLine(); String[] split = line.split(" "); String word = decodeB64(split[0]); //split[0].replaceAll(whitespaceReplacement, " "); VocabWord word1 = new VocabWord(1.0, word); word1.setIndex(cache.numWords()); cache.addToken(word1); cache.addWordToIndex(word1.getIndex(), word); cache.putVocabWord(word); float[] vector = new float[split.length - 1]; for (int i = 1; i < split.length; i++) { vector[i - 1] = Float.parseFloat(split[i]); } INDArray row = Nd4j.create(vector); arrays.add(row); // workaround for skipped first row line = ""; } INDArray syn = Nd4j.vstack(arrays); InMemoryLookupTable lookupTable = (InMemoryLookupTable) new InMemoryLookupTable.Builder() .vectorLength(arrays.get(0).columns()).useAdaGrad(false).cache(cache).useHierarchicSoftmax(false) .build(); if (Nd4j.ENFORCE_NUMERICAL_STABILITY) Nd4j.clearNans(syn); lookupTable.setSyn0(syn); iter.close(); try { reader.close(); } catch (Exception e) { } return new Pair<>(lookupTable, (VocabCache) cache); }
From source file:org.deeplearning4j.models.glove.CoOccurrences.java
/** * Load from an input stream with the following format: * w1 w2 score//w w w .j a v a 2 s.c o m * @param from the input stream to read from * @return the co occurrences based on the input stream */ public static CoOccurrences load(InputStream from) { CoOccurrences ret = new CoOccurrences(); ret.coOccurrences = new ArrayList<>(); CounterMap<String, String> counter = new CounterMap<>(); Reader inputStream = new InputStreamReader(from); LineIterator iter = IOUtils.lineIterator(inputStream); String line; while ((iter.hasNext())) { line = iter.nextLine(); String[] split = line.split(" "); if (split.length < 3) continue; //no empty keys if (split[0].isEmpty() || split[1].isEmpty()) continue; ret.coOccurrences.add(new Pair<>(split[0], split[1])); counter.incrementCount(split[0], split[1], Double.parseDouble(split[2])); } ret.coOCurreneCounts = counter; return ret; }
From source file:org.deeplearning4j.models.word2vec.wordstore.inmemory.InMemoryLookupCache.java
/** * Load a look up cache from an input stream * delimited by \n/*from ww w . j a v a2 s. com*/ * @param from the input stream to read from * @return the in memory lookup cache */ public static InMemoryLookupCache load(InputStream from) { Reader inputStream = new InputStreamReader(from); LineIterator iter = IOUtils.lineIterator(inputStream); String line; InMemoryLookupCache ret = new InMemoryLookupCache(); int count = 0; while ((iter.hasNext())) { line = iter.nextLine(); if (line.isEmpty()) continue; ret.incrementWordCount(line); VocabWord word = new VocabWord(1.0, line); word.setIndex(count); ret.addToken(word); ret.addWordToIndex(count, line); ret.putVocabWord(line); count++; } return ret; }
From source file:org.eclim.installer.ant.ShutdownTask.java
/** * Executes this task./*from w ww .j a v a 2 s .c o m*/ */ @SuppressWarnings("unchecked") public void execute() throws BuildException { Gson gson = new Gson(); FileReader reader = null; try { File instances = new File(System.getProperty("user.home") + "/.eclim/.eclimd_instances"); int count = 0; if (instances.exists()) { reader = new FileReader(instances); for (Iterator<String> ii = IOUtils.lineIterator(reader); ii.hasNext();) { count++; String line = ii.next(); String name = null; int port = 9091; if (line.startsWith("{")) { Instance instance = gson.fromJson(line, Instance.class); name = instance.workspace + ':' + instance.port; port = instance.port; } else { name = line; port = Integer.parseInt(line.replaceFirst(".*:", "")); } try { log("Shutting down eclimd: " + name); shutdown(port); } catch (Exception e) { log("Unable to shut down eclimd (" + name + "): " + e.getClass().getName() + " - " + e.getMessage()); } } } // if no registered instances found, try shutting down the default port to // account for users on old eclim versions if (count == 0) { try { shutdown(9091); } catch (Exception e) { log("Unable to shut down eclimd (9091): " + e.getClass().getName() + " - " + e.getMessage()); } } } catch (FileNotFoundException fnfe) { log("Unable to locate eclimd instances file."); } finally { IOUtils.closeQuietly(reader); } }
From source file:org.eclipse.smarthome.extensionservice.marketplace.internal.BindingExtensionHandler.java
private Map<String, Long> loadInstalledBindingsMap() { File dataFile = bundleContext.getDataFile(BINDING_FILE); if (dataFile != null && dataFile.exists()) { try (FileReader reader = new FileReader(dataFile)) { LineIterator lineIterator = IOUtils.lineIterator(reader); Map<String, Long> map = new HashMap<>(); while (lineIterator.hasNext()) { String line = lineIterator.nextLine(); String[] parts = line.split(";"); if (parts.length == 2) { try { map.put(parts[0], Long.valueOf(parts[1])); } catch (NumberFormatException e) { logger.debug("Cannot parse '{}' as a number in file {} - ignoring it.", parts[1], dataFile.getName()); }/*from w w w .j a v a 2 s . co m*/ } else { logger.debug("Invalid line in file {} - ignoring it:\n{}", dataFile.getName(), line); } } return map; } catch (IOException e) { logger.debug("File '{}' for installed bindings does not exist.", dataFile.getName()); // ignore and just return an empty map } } return new HashMap<>(); }
From source file:org.eclipse.smarthome.extensionservice.marketplace.internal.BundleExtensionHandler.java
private Map<String, Long> loadInstalledBundlesFile(File dataFile) { try (FileReader reader = new FileReader(dataFile)) { LineIterator lineIterator = IOUtils.lineIterator(reader); Map<String, Long> map = new HashMap<>(); while (lineIterator.hasNext()) { String line = lineIterator.nextLine(); String[] parts = line.split(";"); if (parts.length == 2) { try { map.put(parts[0], Long.valueOf(parts[1])); } catch (NumberFormatException e) { logger.debug("Cannot parse '{}' as a number in file {} - ignoring it.", parts[1], dataFile.getName()); }/*w w w . ja va 2s . c o m*/ } else { logger.debug("Invalid line in file {} - ignoring it:\n{}", dataFile.getName(), line); } } return map; } catch (IOException e) { logger.debug("File '{}' for installed bundles does not exist.", dataFile.getName()); // ignore and just return an empty map } return new HashMap<>(); }
From source file:org.jasig.ssp.web.api.reports.AbstractReportControllerIntegrationTest.java
protected void expectReportBodyLines(List<String> expectedReportBodyLines, MockHttpServletResponse response, Predicate<String> firstBodyRowRule) throws UnsupportedEncodingException { final List<String> actualReportBodyLines = new ArrayList<String>(); final String csvReport = response.getContentAsString(); final LineIterator lineIterator = IOUtils.lineIterator(new CharSequenceReader(csvReport)); boolean accumulatingActualBodyLines = false; if (firstBodyRowRule == null) accumulatingActualBodyLines = true; while (lineIterator.hasNext()) { String line = lineIterator.next(); if (accumulatingActualBodyLines || firstBodyRowRule.apply(line)) { accumulatingActualBodyLines = true; actualReportBodyLines.add(line); }/*from ww w .j ava2 s . co m*/ } assertStringCollectionsEqual(expectedReportBodyLines, actualReportBodyLines); }