List of usage examples for org.apache.commons.io LineIterator hasNext
public boolean hasNext()
Reader
has more lines. From source file:com.seniorproject.semanticweb.services.WebServices.java
public ArrayList<String> replaceWithPrefix(String filepath) throws IOException { File file = new File(filepath); LineIterator it = FileUtils.lineIterator(file, "UTF-8"); ArrayList<String> results = new ArrayList<>(); try {/*from w w w . j a v a 2 s. c o m*/ while (it.hasNext()) { String content = it.nextLine(); content = content.replace("^^<http://www.w3.org/2001/XMLSchema#int>", ""); content = content.replace("<http://xmlns.com/foaf/0.1/page>\n", ""); content = content.replace("<http://www.w3.org/2002/07/owl#sameAs>\n", ""); content = content.replace("<http://www.w3.org/2000/01/rdf-schema#label>\n", ""); content = content.replace("<http://dbpedia.org/property/hasPhotoCollection>\n", ""); content = content.replace("<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>\n", ""); content = content.replace("<http://www.w3.org/2002/07/owl#", "owl:"); content = content.replace("<http://www.w3.org/2001/XMLSchema#", "xsd:"); content = content.replace("<http://www.w3.org/2000/01/rdf-schema#", "rdfs:"); content = content.replace("<http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf:"); content = content.replace("<http://xmlns.com/foaf/0.1/", "foaf:"); content = content.replace("<http://data.linkedmdb.org/resource/oddlinker/", "oddlinker:"); content = content.replace("<file:/C:/d2r-server-0.4/mapping.n3#", "map:"); content = content.replace("<http://data.linkedmdb.org/resource/movie/", "movie:"); content = content.replace("<http://data.linkedmdb.org/resource/", "db:"); content = content.replace("<http://dbpedia.org/property/", "dbpedia:"); content = content.replace("<http://www.w3.org/2004/02/skos/core#", "skos:"); content = content.replace("<http://purl.org/dc/terms/", "dc:"); content = content.replace(">", ""); content = content.replace("<", ""); results.add(content); } } finally { LineIterator.closeQuietly(it); } return results; }
From source file:net.orzo.lib.Files.java
/** * Obtains an iterator which reads provided file (specified by path) line by * line. Iterator can be accessed by a classic method pair <i>hasNext()</li> * and <i>next()</i>./*from w w w. j av a2 s.com*/ */ public FileIterator<Object> fileReader(final String path, final String encoding) throws IOException { final LineIterator itr = FileUtils.lineIterator(new File(path), encoding); return new FileIterator<Object>() { @Override public boolean hasNext() { return itr.hasNext(); } @Override public Object next() { return itr.nextLine(); // TODO wrapping??? } @Override public void remove() { itr.remove(); } public void close() { itr.close(); } public String getPath() { if (File.separator.equals("/")) { return path; } else { return path.replace(File.separator, "/"); } } }; }
From source file:edu.cornell.med.icb.goby.modes.EmpiricalPMode.java
private int countLines(String inputFilename) throws FileNotFoundException { int lineCount = 0; LineIterator it = new LineIterator(new FileReader(inputFilename)); while (it.hasNext()) { Object next = it.next();//from w ww. j av a2 s .co m lineCount++; } it.close(); return lineCount; }
From source file:com.nts.alphamale.monitor.EventMonitor.java
/** * "adb [-s serial] shell getevent -lt" ? ? ? ?. * @see <a href="https://source.android.com/devices/input/getevent.html">Getevent</a> * @param li//from w ww .j a va 2 s . c o m * @throws InterruptedException */ public void eventLogAnalysis(LineIterator li) throws Exception { boolean tracking = false; int multiCount = 0; List<EventLog> evtLogList = new ArrayList<EventLog>(); Map<Integer, List<Point>> multiSlot = new HashMap<Integer, List<Point>>(); while (li.hasNext()) { String readLine = li.nextLine().trim(); Matcher m = p.matcher(readLine); if (m.find()) { EventLog event = new EventLog(m); if (readLine.contains("EV_KEY")) { makeKeyEvent(event); evtLogList.clear(); } if (event.getAbsLabel().equals("ABS_MT_TRACKING_ID") && event.getAbsValue() != Integer.MAX_VALUE) { if (!multiSlot.containsKey(multiCount)) multiSlot.put(multiCount, new ArrayList<Point>()); multiCount++; tracking = true; } if (event.getAbsLabel().equals("ABS_MT_TRACKING_ID") && event.getAbsValue() == Integer.MAX_VALUE) { multiCount--; if (multiCount == 0) { tracking = false; if (!evtLogList.isEmpty()) { makeMultiTrackingEvent(multiSlot, evtLogList); } } } if (tracking == true) { if (event.getAbsLabel().contains("ABS_MT_POSITION") || event.getAbsLabel().contains("ABS_MT_SLOT")) evtLogList.add(event); } } } }
From source file:edu.cornell.med.icb.goby.reads.PicardFastaIndexedSequence.java
public PicardFastaIndexedSequence(final String filename) throws FileNotFoundException { delegate = new IndexedFastaSequenceFile(new File(filename)); indexDelegate = new FastaSequenceIndex(new File(filename + ".fai")); final int numContigs = indexDelegate.size(); if (!delegate.isIndexed()) throw new FileNotFoundException("An fasta idx index must be found for filename " + filename); lengths = new int[numContigs]; names = new String[numContigs]; basesPerLine = new long[numContigs]; final LineIterator lineIt = new LineIterator(new FileReader(filename + ".fai")); // collect the contig names by parsing the text fai file. For some bizarre reason neither the // IndexedFastaSequenceFile class nor the FastaSequenceIndex class expose the contig names, yet // contig name is the parameter expected to get data from the sequences! int index = 0; while (lineIt.hasNext()) { final String line = lineIt.nextLine(); final String[] tokens = line.split("[\\s]"); names[index] = tokens[0];/*w ww .j a v a2s . c o m*/ namesToIndices.put(tokens[0], index); lengths[index] = Integer.parseInt(tokens[1]); basesPerLine[index] = Long.parseLong(tokens[2]); index++; } }
From source file:de.tudarmstadt.ukp.clarin.webanno.tsv.WebannoTsv1Reader.java
/** * Iterate through all lines and get available annotations<br> * First column is sentence number and a blank new line marks end of a sentence<br> * The Second column is the token <br> * The third column is the lemma annotation <br> * The fourth column is the POS annotation <br> * The fifth column is used for Named Entity annotations (Multiple annotations separeted by | * character) <br>// w w w. ja v a 2 s .c o m * The sixth column is the origin token number of dependency parsing <br> * The seventh column is the function/type of the dependency parsing <br> * eighth and ninth columns are undefined currently */ private void setAnnotations(InputStream aIs, String aEncoding, StringBuilder text, Map<Integer, String> tokens, Map<Integer, String> pos, Map<Integer, String> lemma, Map<Integer, String> namedEntity, Map<Integer, String> dependencyFunction, Map<Integer, Integer> dependencyDependent, List<Integer> firstTokenInSentence) throws IOException { int tokenNumber = 0; boolean first = true; int base = 0; LineIterator lineIterator = IOUtils.lineIterator(aIs, aEncoding); boolean textFound = false; StringBuffer tmpText = new StringBuffer(); while (lineIterator.hasNext()) { String line = lineIterator.next().trim(); if (line.startsWith("#text=")) { text.append(line.substring(6) + "\n"); textFound = true; continue; } if (line.startsWith("#")) { continue;// it is a comment line } int count = StringUtils.countMatches(line, "\t"); if (line.isEmpty()) { continue; } if (count != 9) {// not a proper TSV file getUimaContext().getLogger().log(Level.INFO, "This is not a valid TSV File"); throw new IOException(fileName + " This is not a valid TSV File"); } StringTokenizer lineTk = new StringTokenizer(line, "\t"); if (first) { tokenNumber = Integer.parseInt(line.substring(0, line.indexOf("\t"))); firstTokenInSentence.add(tokenNumber); first = false; } else { int lineNumber = Integer.parseInt(line.substring(0, line.indexOf("\t"))); if (lineNumber == 1) { base = tokenNumber; firstTokenInSentence.add(base); } tokenNumber = base + Integer.parseInt(line.substring(0, line.indexOf("\t"))); } while (lineTk.hasMoreElements()) { lineTk.nextToken(); String token = lineTk.nextToken(); // for backward compatibility tmpText.append(token + " "); tokens.put(tokenNumber, token); lemma.put(tokenNumber, lineTk.nextToken()); pos.put(tokenNumber, lineTk.nextToken()); String ne = lineTk.nextToken(); lineTk.nextToken();// make it compatible with prev WebAnno TSV reader namedEntity.put(tokenNumber, (ne.equals("_") || ne.equals("-")) ? "O" : ne); String dependentValue = lineTk.nextToken(); if (NumberUtils.isDigits(dependentValue)) { int dependent = Integer.parseInt(dependentValue); dependencyDependent.put(tokenNumber, dependent == 0 ? 0 : base + dependent); dependencyFunction.put(tokenNumber, lineTk.nextToken()); } else { lineTk.nextToken(); } lineTk.nextToken(); lineTk.nextToken(); } } if (!textFound) { text.append(tmpText); } }
From source file:es.ua.dlsi.lexicalinformation.Corpus.java
/** * Method that retrieves all the lines containing a given surface form in the * corpus.//w w w . j a v a 2 s . c o m * @param word Word to be searched in the corpus * @return Returns the set of lines containing a given surface form in the * corpus. */ public Set<String> GetAllExamples(String word) { Set<String> examples = new LinkedHashSet<String>(); LineIterator corpus_it = null; try { corpus_it = FileUtils.lineIterator(new File(this.path)); } catch (FileNotFoundException ex) { System.err.println("Error while trying to open '" + this.path + "' file."); System.exit(-1); } catch (IOException ex) { System.err.println("Error while reading '" + this.path + "' file."); System.exit(-1); } while (corpus_it.hasNext()) { String line = corpus_it.nextLine(); //If the surface form appears in the sentence... if (line.matches("^" + word + " .*") || line.matches(".* " + word + "$") || line.matches(".* " + word + " .*")) { examples.add(line); } } corpus_it.close(); return examples; }
From source file:es.ua.dlsi.lexicalinformation.Corpus.java
/** * Method that retrieves all the lines in the corpus containing any of the * surface forms produced by a given candidate. * @param c Candidate generating the surface forms to be searched * @param dic Dictionary form which the candidate is extracted * @return Returns all the lines in the corpus containing any of the surface forms * produced by a given candidate//from w w w . jav a2s .com */ public Set<String> GetAllExamplesOfInflections(Candidate c, Dictionary dic) { Set<String> inflectedwordforms = c.GetSurfaceForms(dic); Set<String> examples = new LinkedHashSet<String>(); LineIterator corpus_it = null; try { corpus_it = FileUtils.lineIterator(new File(this.path)); } catch (FileNotFoundException ex) { System.err.println("Error while trying to open '" + this.path + "' file."); System.exit(-1); } catch (IOException ex) { System.err.println("Error while reading '" + this.path + "' file."); System.exit(-1); } while (corpus_it.hasNext()) { String line = corpus_it.nextLine(); for (String word : inflectedwordforms) { //If the surface form appears in the sentence... if (line.matches("^" + word + " .*") || line.matches(".* " + word + "$") || line.matches(".* " + word + " .*")) { examples.add(line); } } } corpus_it.close(); return examples; }
From source file:eu.annocultor.converters.geonames.GeonamesCsvToRdf.java
void features() throws Exception { System.out.println("Parsing features"); // load country-continent match countryToContinent.load(/*w w w . j a va 2s.c o m*/ (new GeonamesCsvToRdf("EU")).getClass().getResourceAsStream("/country-to-continent.properties")); createDirsForContinents(); long counter = 0; LineIterator it = FileUtils.lineIterator(new File(root, "allCountries.txt"), "UTF-8"); try { while (it.hasNext()) { String text = it.nextLine(); String[] fields = text.split("\t"); if (fields.length != 19) { throw new Exception("Field names mismatch on " + text); } // progress counter++; if (counter % 100000 == 0) { System.out.print("*"); } String country = fields[countryCode]; String continent = countryToContinent.getProperty(country); if (continent != null && continent.startsWith(continentToConvert)) { String id = fields[geonameid]; String uri = NS_GEONAMES_INSTANCES + id + "/"; String featureCodeField = fields[featureClass] + "." + fields[featureCode]; String populationValue = fields[population]; if (includeRecordInConversion(featureCodeField, populationValue)) { boolean isDescriptionOfCountry = featureCodeField.startsWith("A.PCLI"); if (!fields[name].isEmpty()) { write(country, new Triple(uri, SKOS.LABEL_PREFERRED, new LiteralValue(fields[name]), null), isDescriptionOfCountry); } // String altLabels[] = fields[alternatenames].split(","); // for (String altLabel : altLabels) { // write(country, new Triple(uri, SKOS.LABEL_ALT, new LiteralValue(altLabel), null)); // } Collection<LiteralValue> altLabelCollection = altLabels.getCollection(id); if (altLabelCollection != null) { for (LiteralValue xmlValue : altLabelCollection) { write(country, new Triple(uri, SKOS.LABEL_ALT, xmlValue, null), isDescriptionOfCountry); } altLabels.remove(id); } Collection<String> linkCollection = links.getCollection(id); if (linkCollection != null) { for (String link : linkCollection) { // write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "link"), new LiteralValue(link), null)); } linkCollection.remove(fields[geonameid]); } if (fields[population].length() > 1) { write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "population"), new LiteralValue(fields[population]), null), isDescriptionOfCountry); } if (!fields[longitude].isEmpty()) { write(country, new Triple(uri, new Property(NS_WGS_SCHEMA + "long"), new LiteralValue(fields[longitude]), null), isDescriptionOfCountry); } if (!fields[latitude].isEmpty()) { write(country, new Triple(uri, new Property(NS_WGS_SCHEMA + "lat"), new LiteralValue(fields[latitude]), null), isDescriptionOfCountry); } if (!featureCodeField.isEmpty()) { write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "division"), new ResourceValue(NS_GEONAMES_ONTOLOGY + featureCodeField), null), isDescriptionOfCountry); } if (!country.isEmpty()) { write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "country"), new LiteralValue(country), null), isDescriptionOfCountry); } // alt label as country code if (featureCodeField.startsWith("A.PCL")) { write(country, new Triple(uri, SKOS.LABEL_ALT, new LiteralValue(country), null), isDescriptionOfCountry); } for (String broaderUri : allParents(uri, country)) { write(country, new Triple(uri, Concepts.DCTEMRS.IS_PART_OF, new ResourceValue(broaderUri), null), isDescriptionOfCountry); } // if (!fields[admin1code].isEmpty()) { // write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "admin1"), new LiteralValue(fields[admin1code]), null), isDescriptionOfCountry); // } // if (!fields[admin2code].isEmpty()) { // write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "admin2"), new LiteralValue(fields[admin2code]), null), isDescriptionOfCountry); // } // if (!fields[admin3code].isEmpty()) { // write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "admin3"), new LiteralValue(fields[admin3code]), null), isDescriptionOfCountry); // } // if (!fields[admin4code].isEmpty()) { // write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "admin4"), new LiteralValue(fields[admin4code]), null), isDescriptionOfCountry); // } } } } } finally { LineIterator.closeQuietly(it); } System.out.println("Finished conversion, flushing and closing output files"); System.out.flush(); for (Object country : countryToContinent.keySet()) { SesameWriter bf = files.get(country.toString()); if (bf != null) { bf.endRDF(); } } if (allCountries != null) { allCountries.endRDF(); } }
From source file:net.mindengine.blogix.web.tiles.TilesContainer.java
private TileLine readAllTileLines(File file) throws IOException { LineIterator it = FileUtils.lineIterator(file, "UTF-8"); /**/*from ww w . j ava 2 s. c om*/ * Setting a root tile which will be a container for all tiles */ TileLine rootTileLine = new TileLine(); rootTileLine.indentation = -1; TileLine currentTileLine = rootTileLine; try { while (it.hasNext()) { String line = it.nextLine(); TileLine tileLine = readKeyValue(currentTileLine, line); if (tileLine != null) { currentTileLine = tileLine; } } } finally { LineIterator.closeQuietly(it); } return rootTileLine; }