List of usage examples for org.apache.commons.io LineIterator close
public void close()
Reader
quietly. From source file:com.level3.hiper.dyconn.network.device.Repository.java
public synchronized void load(String fn) throws IOException { fileName = fn;//from w w w.jav a2 s . c o m // entry from devices.txt // OVTR IRNG4838I7001 10.248.253.155 2c 161 0v3rtur31sg LineIterator it = FileUtils.lineIterator(new File(fn), "UTF-8"); deviceInfoMap.clear(); try { while (it.hasNext()) { String line = it.nextLine(); if ("".equals(line)) continue; if (line.startsWith("#")) continue; String[] parts = line.split("\\s+"); String name = parts[1]; deviceInfoMap.put(name, new Info(parts[0], parts[1], parts[2], parts[3], parts[4])); } } finally { it.close(); } }
From source file:edu.cornell.med.icb.goby.modes.EmpiricalPMode.java
private int countLines(String inputFilename) throws FileNotFoundException { int lineCount = 0; LineIterator it = new LineIterator(new FileReader(inputFilename)); while (it.hasNext()) { Object next = it.next();//from w w w.j av a 2 s. c om lineCount++; } it.close(); return lineCount; }
From source file:eu.eexcess.domaindetection.wordnet.XwndReader.java
public void read(File file) throws IOException { String domain = FilenameUtils.getBaseName(file.getName()); File cacheFile = new File(file.getPath() + ".cache"); if (!cacheFile.exists()) { BinaryOutputStream bos = new BinaryOutputStream(new FileOutputStream(cacheFile)); System.out.println("Read in the Extended WordNet Domains file: " + file); LineIterator iterator = new LineIterator(new FileReader(file)); while (iterator.hasNext()) { String line = iterator.nextLine(); String[] tokens = line.split("\t"); String synset = tokens[0]; double weight = Double.parseDouble(tokens[1]); String[] ssid = synset.split("-"); int nr = Integer.parseInt(ssid[0]); POS pos = POS.getPOSForKey(ssid[1]); bos.writeInt(nr);/*from www .ja v a2s . co m*/ bos.writeSmallInt(pos.getId()); bos.writeInt(Float.floatToIntBits((float) weight)); } iterator.close(); bos.close(); } System.out.println("Read in the Extended WordNet Domains cache file: " + file); FileInputStream fStream = new FileInputStream(cacheFile); BinaryInputStream bis = new BinaryInputStream(fStream); while (bis.available() > 0) { int nr = bis.readInt(); int key = bis.readSmallInt(); POS pos = POS.getPOSForId(key); String synset = String.format("%08d-%s", nr, pos.getKey()); double weight = Float.intBitsToFloat(bis.readInt()); DomainAssignment assignment = new DomainAssignment(domain, weight); Set<DomainAssignment> domains = synsetToDomains.get(synset); if (domains == null) { domains = new TreeSet<DomainAssignment>(); synsetToDomains.put(synset, domains); } domains.add(assignment); } fStream.close(); bis.close(); }
From source file:de.tudarmstadt.lt.lm.app.GenerateNgrams.java
public static File generateNgrams(File src_dir, AbstractStringProvider prvdr, int from_cardinality, int to_cardinality, boolean overwrite) { final File ngram_file = new File(src_dir, String.format("%s.%s", src_dir.getName(), "ngrams.txt.gz")); int n_b = from_cardinality, n_e = to_cardinality; if (ngram_file.exists()) { LOG.info("Output file already exists: '{}'.", ngram_file.getAbsolutePath()); if (overwrite) { ngram_file.delete();/* ww w.j a va2 s .com*/ LOG.info("Overwriting file: '{}'.", ngram_file.getAbsolutePath()); } else return ngram_file; } File[] src_files = src_dir.listFiles(new FileFilter() { @Override public boolean accept(File f) { return f.isFile() && f.getName().endsWith(".txt") && (!f.equals(ngram_file)); } }); String[] basenames = new String[src_files.length]; for (int i = 0; i < basenames.length; i++) basenames[i] = src_files[i].getName(); LOG.info(String.format("Reading txt files from dir: '%s'; Files: %s.", src_dir.getAbsolutePath(), StringUtils.abbreviate(Arrays.toString(basenames), 200))); LOG.info(String.format("Writing ngrams to file: '%s'.", ngram_file.getAbsolutePath())); PrintWriter pw = null; try { pw = new PrintWriter( new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(ngram_file)), "UTF-8")); } catch (IOException e) { LOG.error("Could not open writer for file: '{}'.", ngram_file.getAbsolutePath(), e); return null; } long num_ngrams = 0l; List<String>[] ngrams = null; for (int i = 0; i < src_files.length; i++) { File src_file = src_files[i]; LOG.info("Processing file {} / {} ('{}')", i + 1, src_files.length, src_file.getAbsolutePath()); long num_ngrams_f = 0l; try { LineIterator liter = new LineIterator( new BufferedReader(new InputStreamReader(new FileInputStream(src_file), "UTF-8"))); int lc = 0; while (liter.hasNext()) { if (++lc % 1000 == 0) LOG.debug("Processing line {} ({})", lc, src_file); String line = liter.next(); for (String sentence : prvdr.splitSentences(line)) { for (int n = n_b; n <= n_e; n++) { ngrams = null; try { List<String> tokens = prvdr.tokenizeSentence(sentence); if (tokens.isEmpty()) continue; ngrams = AbstractLanguageModel.getNgramSequence(tokens, n); } catch (Exception e) { LOG.warn( "Could not get ngram of cardinality {} from String '{}' in line '{}' from file '{}'.", n, StringUtils.abbreviate(line, 100), lc, src_file.getAbsolutePath()); continue; } for (List<String> ngram : ngrams) pw.println(StringUtils.join(ngram, " ")); pw.flush(); num_ngrams_f += ngrams.length; } } } liter.close(); } catch (Exception e) { LOG.warn("Could not read file '{}'.", src_file.getAbsolutePath(), e); } LOG.debug("Generated {} ngrams from file {}.", num_ngrams_f, src_file); num_ngrams += num_ngrams_f; } if (pw != null) pw.close(); LOG.info("Generated {} ngrams.", num_ngrams); return ngram_file; }
From source file:eu.eexcess.domaindetection.wordnet.WordnetDomainsReader.java
public void read(File file) throws IOException { System.out.println("Read in the original WordNet Domains file: " + file); LineIterator iterator = new LineIterator(new FileReader(file)); while (iterator.hasNext()) { String line = iterator.nextLine(); String[] tokens = line.split("[\t\\ ]"); String synset = tokens[0]; for (int i = 1; i < tokens.length; i++) { DomainAssignment assignment = new DomainAssignment(tokens[i], 1); Set<DomainAssignment> domains = synsetToDomains.get(synset); if (domains == null) { domains = new TreeSet<DomainAssignment>(); synsetToDomains.put(synset, domains); }//from w ww.jav a 2 s . com domains.add(assignment); } } iterator.close(); }
From source file:net.orzo.lib.Files.java
/** * Obtains an iterator which reads provided file (specified by path) line by * line. Iterator can be accessed by a classic method pair <i>hasNext()</li> * and <i>next()</i>./*from www. j ava 2 s.c om*/ */ public FileIterator<Object> fileReader(final String path, final String encoding) throws IOException { final LineIterator itr = FileUtils.lineIterator(new File(path), encoding); return new FileIterator<Object>() { @Override public boolean hasNext() { return itr.hasNext(); } @Override public Object next() { return itr.nextLine(); // TODO wrapping??? } @Override public void remove() { itr.remove(); } public void close() { itr.close(); } public String getPath() { if (File.separator.equals("/")) { return path; } else { return path.replace(File.separator, "/"); } } }; }
From source file:es.ua.dlsi.lexicalinformation.Corpus.java
/** * Method that retrieves all the lines containing a given surface form in the * corpus./*from ww w . j a v a 2 s. c om*/ * @param word Word to be searched in the corpus * @return Returns the set of lines containing a given surface form in the * corpus. */ public Set<String> GetAllExamples(String word) { Set<String> examples = new LinkedHashSet<String>(); LineIterator corpus_it = null; try { corpus_it = FileUtils.lineIterator(new File(this.path)); } catch (FileNotFoundException ex) { System.err.println("Error while trying to open '" + this.path + "' file."); System.exit(-1); } catch (IOException ex) { System.err.println("Error while reading '" + this.path + "' file."); System.exit(-1); } while (corpus_it.hasNext()) { String line = corpus_it.nextLine(); //If the surface form appears in the sentence... if (line.matches("^" + word + " .*") || line.matches(".* " + word + "$") || line.matches(".* " + word + " .*")) { examples.add(line); } } corpus_it.close(); return examples; }
From source file:net.sf.sprockets.database.sqlite.DbOpenHelper.java
/** * Execute the statements in the resource script on the database. Each statement must end with a * semicolon./*from w ww. j av a 2 s. c om*/ */ private void execScript(SQLiteDatabase db, Resources res, int script) throws IOException { LineIterator lines = IOUtils.lineIterator(res.openRawResource(script), UTF_8); StringBuilder sql = new StringBuilder(2048); // enough capacity for a long statement try { // read each (potentially multi-line) statement and execute them one at a time while (lines.hasNext()) { String line = lines.next().trim(); int length = line.length(); if (length > 0) { sql.append(line).append("\n"); if (line.charAt(length - 1) == ';') { // statement loaded db.execSQL(sql.toString()); sql.setLength(0); // reset builder for a new statement } } } } finally { lines.close(); } }
From source file:es.ua.dlsi.lexicalinformation.Corpus.java
/** * Method that retrieves all the lines in the corpus containing any of the * surface forms produced by a given candidate. * @param c Candidate generating the surface forms to be searched * @param dic Dictionary form which the candidate is extracted * @return Returns all the lines in the corpus containing any of the surface forms * produced by a given candidate//from w w w .j a va 2 s . co m */ public Set<String> GetAllExamplesOfInflections(Candidate c, Dictionary dic) { Set<String> inflectedwordforms = c.GetSurfaceForms(dic); Set<String> examples = new LinkedHashSet<String>(); LineIterator corpus_it = null; try { corpus_it = FileUtils.lineIterator(new File(this.path)); } catch (FileNotFoundException ex) { System.err.println("Error while trying to open '" + this.path + "' file."); System.exit(-1); } catch (IOException ex) { System.err.println("Error while reading '" + this.path + "' file."); System.exit(-1); } while (corpus_it.hasNext()) { String line = corpus_it.nextLine(); for (String word : inflectedwordforms) { //If the surface form appears in the sentence... if (line.matches("^" + word + " .*") || line.matches(".* " + word + "$") || line.matches(".* " + word + " .*")) { examples.add(line); } } } corpus_it.close(); return examples; }
From source file:eu.eexcess.domaindetection.wordnet.WordnetDomainsReader.java
/** * @param wordNetFile/* w w w . j av a2 s. co m*/ * @throws FileNotFoundException */ public void readDefinition(File wordNetFile) throws FileNotFoundException { File file = new File(wordNetFile.getParentFile(), "wn-domains-3.2-tree.csv"); LineIterator iterator = new LineIterator(new FileReader(file)); String[] currentParents = new String[4]; while (iterator.hasNext()) { String line = iterator.nextLine(); String[] tokens = line.split("[,]"); int depth = -1; for (int i = 0; i < tokens.length; i++) { if (!tokens[i].trim().isEmpty()) { depth = i; break; } } String domain = tokens[depth].trim().toLowerCase(Locale.US); if (depth >= 0) { Map<String, Double> parentToWeight = domainToParentDomainToWeight.get(domain); if (parentToWeight == null) { parentToWeight = new LinkedHashMap<String, Double>(); domainToParentDomainToWeight.put(domain, parentToWeight); } for (int i = 0; i < depth; i++) { double weight = 1.0 / ((depth - i + 1) * (depth - i + 1)); String parent = currentParents[i]; parentToWeight.put(parent, weight); } currentParents[depth] = domain; for (int i = depth + 1; i < 4; i++) { currentParents[i] = null; } } else { domainToParentDomainToWeight.put(domain, null); } } iterator.close(); }