List of usage examples for org.apache.commons.io LineIterator next
public Object next()
Reader
. From source file:com.shopzilla.hadoop.mapreduce.MiniMRClusterContext.java
public void processDataRecursive(final Path path, final Function<String, Void> lineProcessor) throws IOException { final Function<Path, Void> pathProcessor = new Function<Path, Void>() { @Override//ww w . ja v a 2 s . c o m public Void apply(Path path) { try { FSDataInputStream in = miniDFSCluster.getFileSystem().open(path); LineIterator lineIterator = new LineIterator(new InputStreamReader(in)); while (lineIterator.hasNext()) { lineProcessor.apply(lineIterator.next()); } lineIterator.close(); } catch (Exception ex) { throw new RuntimeException(ex); } return null; } }; processPathsRecursive(path, new Function<Path, Void>() { @Override public Void apply(Path input) { pathProcessor.apply(input); return null; } }); }
From source file:fr.gael.dhus.server.http.webapp.search.controller.SearchController.java
/** * Provides the openSearch description file via /search/description API. * * @param res response//from w ww . j a v a 2 s . co m * @throws IOException if file description cannot be accessed */ @PreAuthorize("hasRole('ROLE_SEARCH')") @RequestMapping(value = "/description") public void search(HttpServletResponse res) throws IOException { String url = configurationManager.getServerConfiguration().getExternalUrl(); if (url != null && url.endsWith("/")) { url = url.substring(0, url.length() - 1); } String long_name = configurationManager.getNameConfiguration().getLongName(); String short_name = configurationManager.getNameConfiguration().getShortName(); String contact_mail = configurationManager.getSupportConfiguration().getMail(); InputStream is = ClassLoader.getSystemResourceAsStream(DESCRIPTION_FILE); if (is == null) { throw new IOException("Cannot find \"" + DESCRIPTION_FILE + "\" OpenSearch description file."); } LineIterator li = IOUtils.lineIterator(is, "UTF-8"); try (ServletOutputStream os = res.getOutputStream()) { while (li.hasNext()) { String line = li.next(); // Last line? -> the iterator eats LF if (li.hasNext()) { line = line + "\n"; } line = line.replace("[dhus_server]", url); if (long_name != null) { line = line.replace("[dhus_long_name]", long_name); } if (short_name != null) { line = line.replace("[dhus_short_name]", short_name); } if (contact_mail != null) { line = line.replace("[dhus_contact_mail]", contact_mail); } os.write(line.getBytes()); } } finally { IOUtils.closeQuietly(is); LineIterator.closeQuietly(li); } }
From source file:com.sangupta.httptools.DownloadUrlCommand.java
@Override public void run() { File file = new File(this.urlFile); if (file == null || !file.exists()) { System.out.println("URL file cannot be found."); return;//w w w .j a v a2 s . c om } if (!file.isFile()) { System.out.println("URL file does not represent a valid file."); return; } if (this.numThreads <= 0 || this.numThreads > 50) { System.out.println("Number of assigned threads should be between 1 and 50"); return; } outputDir = new File(this.outputFolder); if (outputDir.exists() && !outputDir.isDirectory()) { System.out.println("Output folder does not represent a valid directory"); return; } if (!outputDir.exists()) { outputDir.mkdirs(); } // try and parse and read all URLs int line = 1; try { LineIterator iterator = FileUtils.lineIterator(file); while (iterator.hasNext()) { ++line; String readURL = iterator.next(); createURLTask(readURL); } } catch (IOException e) { System.out.println("Unable to read URLs from the file at line: " + line); return; } // all set - create number of threads // and start fetching ExecutorService service = Executors.newFixedThreadPool(this.numThreads); final long start = System.currentTimeMillis(); for (Runnable runnable : this.downloadTasks) { service.submit(runnable); } // intialize some variables this.numTasks = this.downloadTasks.size(); this.downloadTasks.clear(); if (this.numTasks > 1000) { this.splitFolders = true; } // shutdown shutdownAndAwaitTermination(service); final long end = System.currentTimeMillis(); // everything done System.out.println(this.downloadTasks.size() + " urls downloaded in " + (end - start) + " millis."); }
From source file:net.sf.sprockets.database.sqlite.DbOpenHelper.java
/** * Execute the statements in the resource script on the database. Each statement must end with a * semicolon.//w ww .ja v a 2 s . c o m */ private void execScript(SQLiteDatabase db, Resources res, int script) throws IOException { LineIterator lines = IOUtils.lineIterator(res.openRawResource(script), UTF_8); StringBuilder sql = new StringBuilder(2048); // enough capacity for a long statement try { // read each (potentially multi-line) statement and execute them one at a time while (lines.hasNext()) { String line = lines.next().trim(); int length = line.length(); if (length > 0) { sql.append(line).append("\n"); if (line.charAt(length - 1) == ';') { // statement loaded db.execSQL(sql.toString()); sql.setLength(0); // reset builder for a new statement } } } } finally { lines.close(); } }
From source file:de.tudarmstadt.lt.lm.app.Ngrams.java
public void run_across_sentences(Reader r, String f) { LineIterator liter = new LineIterator(r); for (long lc = 0; liter.hasNext();) { if (++lc % 1000 == 0) LOG.info("Processing line {}:{}", f, lc); try {// ww w . j av a 2s .c o m String line = liter.next(); if (line.trim().isEmpty()) continue; List<String> sentences = _prvdr.splitSentences(line); if (sentences == null || sentences.isEmpty()) continue; for (String sentence : sentences) { if (sentence == null || sentence.isEmpty()) continue; List<String> tokens = null; try { tokens = _prvdr.tokenizeSentence(sentence); if (tokens == null || tokens.isEmpty()) continue; } catch (Exception e) { LOG.warn("Could not get tokens from from String '{}' in line '{}' from file '{}'.", StringUtils.abbreviate(line, 100), lc, f); continue; } for (String word : tokens) { if (word == null || word.trim().isEmpty()) continue; _ngram.add(word); for (int n = Math.max(_ngram.size() - _order_to, 0); n <= Math .min(_ngram.size() - _order_from, _ngram.size() - 1); n++) _pout.println(StringUtils.join(_ngram.subList(n, _ngram.size()), " ")); _num_ngrams++; } _pout.flush(); } } catch (Exception e) { LOG.warn("Could not process line '{}' in file '{}'.", lc, f); } } }
From source file:de.tudarmstadt.lt.lm.app.Ngrams.java
public void run_within_sentences(Reader r, String f) { LineIterator liter = new LineIterator(r); for (long lc = 0; liter.hasNext();) { if (++lc % 1000 == 0) LOG.info("Processing line {}:{}", f, lc); try {//w w w .j a v a2s . c om String line = liter.next(); if (line.trim().isEmpty()) continue; List<String> sentences = _prvdr.splitSentences(line); if (sentences == null || sentences.isEmpty()) continue; for (String sentence : sentences) { if (sentence == null || sentence.trim().isEmpty()) continue; for (int n = _order_from; n <= _order_to; n++) { List<String>[] ngrams = null; try { List<String> tokens = _prvdr.tokenizeSentence(sentence); if (tokens == null || tokens.isEmpty()) continue; ngrams = _prvdr.getNgramSequence(tokens, n); if (ngrams == null || ngrams.length < 1) continue; } catch (Exception e) { LOG.warn( "Could not get ngram of cardinality {} from String '{}' in line '{}' from file '{}'.", n, StringUtils.abbreviate(line, 100), lc, f); continue; } for (List<String> ngram : ngrams) { if (ngram == null || ngram.isEmpty()) continue; _pout.println(StringUtils.join(ngram, " ")); } _pout.flush(); _num_ngrams += ngrams.length; } } } catch (Exception e) { LOG.warn("Could not process line '{}' in file '{}'.", lc, f); } } }
From source file:com.adobe.acs.tools.tag_maker.impl.TagMakerServlet.java
private InputStream stripLineEnds(InputStream is, String charset, char chartoStrip) throws IOException { log.debug("Stripping [ {} ] from the end of lines.", chartoStrip); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final PrintStream printStream = new PrintStream(baos); final LineIterator lineIterator = IOUtils.lineIterator(is, charset); while (lineIterator.hasNext()) { String line = StringUtils.stripToNull(lineIterator.next()); if (line != null) { line = StringUtils.stripEnd(line, String.valueOf(chartoStrip)); printStream.println(line);//w ww.ja v a2 s . c om } } return new ByteArrayInputStream(baos.toByteArray()); }
From source file:com.sangupta.murmur.MurmurEnglishTest.java
/** * The main core logic for all testing./*from w ww .j av a 2s . c om*/ * * @param outputFileName * @param function * @throws IOException */ private void testHashes(String outputFileName, StringHashFunction function) throws IOException { LineIterator iterator = FileUtils.lineIterator(new File(BASE_PATH + "/english-wordlist.txt")); LineIterator results = FileUtils.lineIterator(new File(BASE_PATH + "/" + outputFileName)); int matched = 0; int total = 0; while (iterator.hasNext()) { String line = iterator.next(); byte[] bytes = line.getBytes(); String computed = function.getHash(bytes); String actual = results.next(); if (actual.contains(",")) { // result has multiple values String[] act = actual.split(","); String[] com = computed.split(","); if (act.length == com.length) { boolean allMatch = true; for (int index = 0; index < act.length; index++) { allMatch = allMatch & bigMatch(act[index], com[index]); } if (allMatch) { matched++; } } } else { // result has only a single value if (actual.equals(computed)) { matched++; } else { if (bigMatch(actual, computed)) { matched++; } } } total++; } Assert.assertEquals("Total number of hashes did not match", total, matched); }
From source file:de.tudarmstadt.ukp.clarin.webanno.automation.util.AutomationUtil.java
public static void addTabSepTrainDocument(MiraTemplate aTemplate, RepositoryService aRepository, AutomationService aAutomationService) throws IOException, UIMAException, ClassNotFoundException, AutomationException { File miraDir = aAutomationService.getMiraDir(aTemplate.getTrainFeature()); if (!miraDir.exists()) { FileUtils.forceMkdir(miraDir);/*www.java 2 s . c om*/ } AutomationStatus status = aAutomationService.getAutomationStatus(aTemplate); boolean documentChanged = false; for (SourceDocument document : aAutomationService .listTabSepDocuments(aTemplate.getTrainFeature().getProject())) { if (!document.isProcessed()) { documentChanged = true; break; } } if (!documentChanged) { return; } for (SourceDocument sourceDocument : aAutomationService .listTabSepDocuments(aTemplate.getTrainFeature().getProject())) { if (sourceDocument.getFeature() != null) { // This is a target layer train document continue; } File trainFile = new File(miraDir, sourceDocument.getId() + sourceDocument.getProject().getId() + ".train"); BufferedWriter trainOut = new BufferedWriter(new FileWriter(trainFile)); File tabSepFile = new File(aRepository.getDocumentFolder(sourceDocument), sourceDocument.getName()); LineIterator it = IOUtils.lineIterator(new FileReader(tabSepFile)); while (it.hasNext()) { String line = it.next(); if (line.trim().equals("")) { trainOut.append("\n"); } else { StringTokenizer st = new StringTokenizer(line, "\t"); if (st.countTokens() != 2) { trainOut.close(); throw new AutomationException("This is not a valid TAB-SEP document"); } trainOut.append(getMiraLineForTabSep(st.nextToken(), st.nextToken())); } } sourceDocument.setProcessed(false); status.setTrainDocs(status.getTrainDocs() - 1); trainOut.close(); } }
From source file:edu.cornell.med.icb.goby.modes.EmpiricalPMode.java
private int countLines(String inputFilename) throws FileNotFoundException { int lineCount = 0; LineIterator it = new LineIterator(new FileReader(inputFilename)); while (it.hasNext()) { Object next = it.next(); lineCount++;//www . j a v a 2s.c o m } it.close(); return lineCount; }