List of usage examples for org.apache.commons.io LineIterator LineIterator
public LineIterator(final Reader reader) throws IllegalArgumentException
Reader
. From source file:de.tudarmstadt.lt.seg.sentence.SentenceSplitterTest.java
@Test public void ruleSplitterLineTest() { ISentenceSplitter sentenceSplitter = new RuleSplitter(); ITokenizer tokenizer = new EmptySpaceTokenizer(); StringWriter s = new StringWriter(); PrintWriter w = new PrintWriter(s); LineIterator liter = new LineIterator(new BufferedReader(new StringReader(TEST_TEXT))); for (long lc = 0; liter.hasNext();) { if (++lc % 1000 == 0) System.err.format("Processing line %d %n", lc); Segmenter.split_and_tokenize(new StringReader(liter.next()), String.format("%s:%d", "TEST_TEXT", lc), sentenceSplitter, tokenizer, 2, 0, false, false, "\n", "\n", "\n", w); }//from w w w . j a va2 s .c o m System.out.println(s.toString()); }
From source file:net.mikaboshi.intra_mart.tools.log_stats.parser.LogFileReader.java
private void openLogFile() throws IllegalArgumentException, UnsupportedEncodingException, FileNotFoundException { File logFile = this.logFiles.get(this.iLogFiles); this.logParser.setLogFile(logFile); this.lineIterator = new LineIterator(getFileReader(logFile)); logger.info("Open log file : " + logFile.getPath()); this.iLine = 0; }
From source file:eu.eexcess.sourceselection.redde.indexer.topterm.TopTermToWNDomain.java
TreeNode<String> inflateDomainTree() throws FileNotFoundException { LineIterator iterator = new LineIterator(new FileReader(wordnetCSVTreeFile)); String[] currentBranch = new String[5]; currentBranch[0] = rootNodeName;/* w ww. j a va2 s . com*/ while (iterator.hasNext()) { // read current node and store its parents String line = iterator.nextLine(); String[] tokensInLine = line.split(tokenDelimiter); int depth = -1; for (int i = 0; i < tokensInLine.length; i++) { tokensInLine[i] = tokensInLine[i].trim(); if (!tokensInLine[i].isEmpty()) { depth = i; currentBranch[1 + depth] = tokensInLine[i]; } } // clear tail for (int tail = depth + 2; tail < currentBranch.length; tail++) { currentBranch[tail] = null; } // reconstruct and append the missing branch according to the // current tree ValueTreeNode<String> branch = null; for (int branchDepth = currentBranch.length; branchDepth > 0; branchDepth--) { String nodeName = currentBranch[branchDepth - 1]; if (nodeName == null) { continue; } Set<TreeNode<String>> result = new HashSet<TreeNode<String>>(); ValueTreeNode.findFirstNode(nodeName, wnDomainTree, result); TreeNode<String> nodeInTree = null; if (result.iterator().hasNext()) { nodeInTree = result.iterator().next(); } // if node tree -> add branch to tree if (nodeInTree != null) { if (branch != null) { nodeInTree.addChild(branch); branch = null; } break; // if node ! tree -> reconstruct the branch until the mount // point is clear } else { ValueTreeNode<String> newParent = new ValueTreeNode<String>(); newParent.setName(nodeName); if (branch != null) { newParent.addChild(branch); } branch = newParent; } } } iterator.close(); return wnDomainTree; }
From source file:de.tudarmstadt.lt.lm.app.GenerateNgrams.java
public static File generateNgrams(File src_dir, AbstractStringProvider prvdr, int from_cardinality, int to_cardinality, boolean overwrite) { final File ngram_file = new File(src_dir, String.format("%s.%s", src_dir.getName(), "ngrams.txt.gz")); int n_b = from_cardinality, n_e = to_cardinality; if (ngram_file.exists()) { LOG.info("Output file already exists: '{}'.", ngram_file.getAbsolutePath()); if (overwrite) { ngram_file.delete();/*from ww w .j a va2 s . co m*/ LOG.info("Overwriting file: '{}'.", ngram_file.getAbsolutePath()); } else return ngram_file; } File[] src_files = src_dir.listFiles(new FileFilter() { @Override public boolean accept(File f) { return f.isFile() && f.getName().endsWith(".txt") && (!f.equals(ngram_file)); } }); String[] basenames = new String[src_files.length]; for (int i = 0; i < basenames.length; i++) basenames[i] = src_files[i].getName(); LOG.info(String.format("Reading txt files from dir: '%s'; Files: %s.", src_dir.getAbsolutePath(), StringUtils.abbreviate(Arrays.toString(basenames), 200))); LOG.info(String.format("Writing ngrams to file: '%s'.", ngram_file.getAbsolutePath())); PrintWriter pw = null; try { pw = new PrintWriter( new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(ngram_file)), "UTF-8")); } catch (IOException e) { LOG.error("Could not open writer for file: '{}'.", ngram_file.getAbsolutePath(), e); return null; } long num_ngrams = 0l; List<String>[] ngrams = null; for (int i = 0; i < src_files.length; i++) { File src_file = src_files[i]; LOG.info("Processing file {} / {} ('{}')", i + 1, src_files.length, src_file.getAbsolutePath()); long num_ngrams_f = 0l; try { LineIterator liter = new LineIterator( new BufferedReader(new InputStreamReader(new FileInputStream(src_file), "UTF-8"))); int lc = 0; while (liter.hasNext()) { if (++lc % 1000 == 0) LOG.debug("Processing line {} ({})", lc, src_file); String line = liter.next(); for (String sentence : prvdr.splitSentences(line)) { for (int n = n_b; n <= n_e; n++) { ngrams = null; try { List<String> tokens = prvdr.tokenizeSentence(sentence); if (tokens.isEmpty()) continue; ngrams = AbstractLanguageModel.getNgramSequence(tokens, n); } catch (Exception e) { LOG.warn( "Could not get ngram of cardinality {} from String '{}' in line '{}' from file '{}'.", n, StringUtils.abbreviate(line, 100), lc, src_file.getAbsolutePath()); continue; } for (List<String> ngram : ngrams) pw.println(StringUtils.join(ngram, " ")); pw.flush(); num_ngrams_f += ngrams.length; } } } liter.close(); } catch (Exception e) { LOG.warn("Could not read file '{}'.", src_file.getAbsolutePath(), e); } LOG.debug("Generated {} ngrams from file {}.", num_ngrams_f, src_file); num_ngrams += num_ngrams_f; } if (pw != null) pw.close(); LOG.info("Generated {} ngrams.", num_ngrams); return ngram_file; }
From source file:au.org.ala.names.search.ALANameIndexer.java
public void init() throws Exception { tnse = new TaxonNameSoundEx(); // init the known homonyms LineIterator lines = new LineIterator( new BufferedReader(new InputStreamReader(this.getClass().getClassLoader() .getResource("au/org/ala/propertystore/known_homonyms.txt").openStream(), "ISO-8859-1"))); LineIterator blines = new LineIterator(new BufferedReader( new InputStreamReader(this.getClass().getClassLoader().getResource("blacklist.txt").openStream()))); try {/*www .jav a 2 s . c o m*/ //load known homonyms while (lines.hasNext()) { String line = lines.nextLine().trim(); knownHomonyms.add(line.toUpperCase()); } //load the blacklist while (blines.hasNext()) { String line = blines.nextLine().trim(); if (!line.startsWith("#") && StringUtils.isNotBlank(line)) blacklist.add(line); } } catch (Exception e) { e.printStackTrace(); } finally { lines.close(); blines.close(); } }
From source file:com.shopzilla.hadoop.testing.hdfs.DFSCluster.java
public void processData(final Path path, final Function<String, Void> lineProcessor) throws IOException { final Function<Path, Void> pathProcessor = new Function<Path, Void>() { @Override/*from w w w.j a va 2s . co m*/ public Void apply(Path path) { try { final FSDataInputStream in = miniDFSCluster.getFileSystem().open(path); final LineIterator lineIterator = new LineIterator(new InputStreamReader(in)); while (lineIterator.hasNext()) { lineProcessor.apply(lineIterator.next()); } lineIterator.close(); return null; } catch (final Exception ex) { throw new RuntimeException(ex); } } }; processPaths(path, new Function<Path, Void>() { @Override public Void apply(Path input) { pathProcessor.apply(input); return null; } }); }
From source file:de.tudarmstadt.lt.lm.app.FilterLines.java
void runSequential(Reader r) { long l = 0;/*from w w w . jav a 2 s . c o m*/ for (LineIterator liter = new LineIterator(r); liter.hasNext();) { if (++l % 5000 == 0) LOG.info("processing line {}.", l); String line = liter.next(); processLine(line); } }
From source file:com.shopzilla.hadoop.mapreduce.MiniMRClusterContext.java
public void processData(final Path path, final Function<String, Void> lineProcessor) throws IOException { final Function<Path, Void> pathProcessor = new Function<Path, Void>() { @Override// w w w . j ava2s . co m public Void apply(Path path) { try { FSDataInputStream in = miniDFSCluster.getFileSystem().open(path); LineIterator lineIterator = new LineIterator(new InputStreamReader(in)); while (lineIterator.hasNext()) { lineProcessor.apply(lineIterator.next()); } lineIterator.close(); } catch (Exception ex) { throw new RuntimeException(ex); } return null; } }; processPaths(path, new Function<Path, Void>() { @Override public Void apply(Path input) { pathProcessor.apply(input); return null; } }); }
From source file:com.shopzilla.hadoop.testing.hdfs.DFSCluster.java
public void processDataRecursive(final Path path, final Function<String, Void> lineProcessor) throws IOException { final Function<Path, Void> pathProcessor = new Function<Path, Void>() { @Override//from w ww .j a v a 2 s . c o m public Void apply(Path path) { try { final FSDataInputStream in = miniDFSCluster.getFileSystem().open(path); final LineIterator lineIterator = new LineIterator(new InputStreamReader(in)); while (lineIterator.hasNext()) { lineProcessor.apply(lineIterator.next()); } lineIterator.close(); return null; } catch (final Exception ex) { throw new RuntimeException(ex); } } }; processPathsRecursive(path, new Function<Path, Void>() { @Override public Void apply(Path input) { pathProcessor.apply(input); return null; } }); }
From source file:csv.to.sql.parser.mainMenu.java
private void btnParseActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_btnParseActionPerformed // TODO add your handling code here: String filePath = this.selectedFile.getPath(); filePath = filePath.replace(".csv", " "); File resultFile = new File(this.validFilePath(filePath + "csvTo.sql")); BufferedReader br = null;//from w ww.j a v a 2s.co m BufferedWriter bw = null; try { if (resultFile.createNewFile()) { String currLine = null; br = new BufferedReader(new FileReader(this.selectedFile)); bw = new BufferedWriter(new FileWriter(resultFile)); bw.write("INSERT INTO " + this.selectedFile.getName().replace(".csv", "") + " " + this.formatFields(br.readLine()).replace('"', '`') + " VALUES\n"); LineIterator it = new LineIterator(br); boolean lineStatus = it.hasNext(); while (lineStatus) { currLine = it.next(); bw.write(this.formatFields(currLine) + ((lineStatus = it.hasNext()) ? ",\n" : ";")); } this.resultOk = true; } } catch (IOException ex) { this.resultOk = false; System.out.println("Error al crear el archivo: " + ex.getMessage()); } finally { try { if (br != null & bw != null) { br.close(); bw.close(); } } catch (IOException ex) { Logger.getLogger(mainMenu.class.getName()).log(Level.SEVERE, null, ex); } JOptionPane.showMessageDialog(this, "Parse " + (this.resultOk ? "Successful!" : "Error!")); this.selectedFile = null; this.lblFile.setText("No File Selected!"); this.btnOpenFile.setEnabled(true); this.btnParse.setEnabled(false); } }