List of usage examples for org.apache.commons.io IOUtils lineIterator
public static LineIterator lineIterator(InputStream input, String encoding) throws IOException
InputStream
, using the character encoding specified (or default encoding if null). From source file:de.tudarmstadt.ukp.argumentation.data.roomfordebate.DataFetcher.java
public static void main(String[] args) throws Exception { File crawledPagesFolder = new File(args[0]); if (!crawledPagesFolder.exists()) { crawledPagesFolder.mkdirs();//from ww w.j a va 2s . c om } File outputFolder = new File(args[1]); if (!outputFolder.exists()) { outputFolder.mkdirs(); } // read links from text file final String urlsResourceName = "roomfordebate-urls.txt"; InputStream urlsStream = DataFetcher.class.getClassLoader().getResourceAsStream(urlsResourceName); if (urlsStream == null) { throw new IOException("Cannot find resource " + urlsResourceName + " on the classpath"); } // read list of urls List<String> urls = new ArrayList<>(); LineIterator iterator = IOUtils.lineIterator(urlsStream, "utf-8"); while (iterator.hasNext()) { // ignore commented url (line starts with #) String line = iterator.nextLine(); if (!line.startsWith("#") && !line.trim().isEmpty()) { urls.add(line.trim()); } } // download all crawlPages(urls, crawledPagesFolder); List<File> files = new ArrayList<>(FileUtils.listFiles(crawledPagesFolder, null, false)); Collections.sort(files, new Comparator<File>() { @Override public int compare(File o1, File o2) { return o1.getName().compareTo(o2.getName()); } }); int idCounter = 0; for (File file : files) { NYTimesCommentsScraper commentsScraper = new NYTimesCommentsScraper(); NYTimesArticleExtractor extractor = new NYTimesArticleExtractor(); String html = FileUtils.readFileToString(file, "utf-8"); idCounter++; File outputFileArticle = new File(outputFolder, String.format("Cx%03d.txt", idCounter)); File outputFileComments = new File(outputFolder, String.format("Dx%03d.txt", idCounter)); try { List<Comment> comments = commentsScraper.extractComments(html); Article article = extractor.extractArticle(html); saveArticleToText(article, outputFileArticle); System.out.println("Saved to " + outputFileArticle); saveCommentsToText(comments, outputFileComments, article); System.out.println("Saved to " + outputFileComments); } catch (IOException ex) { System.err.println(file.getName() + "\n" + ex.getMessage()); } } }
From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.statistics.StatisticsTableCreator.java
public static Table<String, String, Long> loadTable(InputStream stream) throws IOException { Table<String, String, Long> result = TreeBasedTable.create(); LineIterator lineIterator = IOUtils.lineIterator(stream, "utf-8"); while (lineIterator.hasNext()) { String line = lineIterator.next(); System.out.println(line); String[] split = line.split("\t"); String language = split[0]; String license = split[1]; Long documents = Long.valueOf(split[2]); Long tokens = Long.valueOf(split[3]); result.put(language, "docs " + license, documents); result.put(language, "tokens " + license, tokens); }//from w ww . j a v a 2 s .co m return result; }
From source file:de.rnd7.kata.reversi.logic.ai.AIMatrix.java
public static AIMatrix fromResource(final String name) throws IOException { final AIMatrix matrix = new AIMatrix(); try (InputStream input = AIMatrix.class.getResourceAsStream(name)) { final LineIterator iterator = IOUtils.lineIterator(input, CharEncoding.UTF_8); int lineNumber = 0; while (iterator.hasNext()) { processLine(matrix, lineNumber++, iterator.next()); }/*w w w . j a v a2 s. c o m*/ } return matrix; }
From source file:com.icantrap.collections.dawg.TrieValidationTest.java
@Before public void before() throws IOException { assumeThat(System.getProperty("RUN_VALIDATION"), is("on")); LineIterator iter = IOUtils.lineIterator(getClass().getResourceAsStream("/TWL06.txt"), null); dawgBuilder = new DawgBuilder(); while (iter.hasNext()) dawgBuilder.add(iter.next());//from w w w. ja v a 2 s . c o m LineIterator.closeQuietly(iter); System.out.println("Uncompressed: " + dawgBuilder.nodeCount() + " nodes"); StopWatch stopWatch = new StopWatch(); stopWatch.start(); dawgBuilder.build(); stopWatch.stop(); System.out.println("Time to compress: " + stopWatch.getTime() + " ms."); System.out.println("Compressed: " + dawgBuilder.nodeCount() + " nodes"); }
From source file:com.icantrap.collections.dawg.DawgValidationTest.java
@Test public void containsAllWords() throws IOException { LineIterator iter = IOUtils.lineIterator(getClass().getResourceAsStream("/TWL06.txt"), null); StopWatch stopWatch = new StopWatch(); stopWatch.start();//from w w w .j a va 2 s . co m while (iter.hasNext()) { String word = iter.next(); assertTrue("Missing word (" + word + ")", dawg.contains(word)); } stopWatch.stop(); System.out.println("Time to query: " + stopWatch.getTime() + " ms."); LineIterator.closeQuietly(iter); }
From source file:com.icantrap.collections.dawg.TrieValidationTest.java
@Test public void containsAllWords() throws IOException { LineIterator iter = IOUtils.lineIterator(getClass().getResourceAsStream("/TWL06.txt"), null); StopWatch stopWatch = new StopWatch(); stopWatch.start();/*from w w w .j a v a 2 s . c om*/ while (iter.hasNext()) { String word = iter.next(); assertTrue("Missing word (" + word + ")", dawgBuilder.contains(word)); } stopWatch.stop(); System.out.println("Time to query: " + stopWatch.getTime() + " ms."); LineIterator.closeQuietly(iter); }
From source file:net.pms.io.OutputTextLogger.java
public void run() { LineIterator it = null;//from w ww . ja v a2 s.co m try { it = IOUtils.lineIterator(inputStream, "UTF-8"); while (it.hasNext()) { String line = it.nextLine(); logger.debug(line); } } catch (IOException ioe) { logger.debug("Error consuming input stream: {}", ioe.getMessage()); } catch (IllegalStateException ise) { logger.debug("Error reading from closed input stream: {}", ise.getMessage()); } finally { LineIterator.closeQuietly(it); // clean up all associated resources } }
From source file:net.orpiske.sfs.filter.dictionary.spell.DefaultDictionary.java
public DefaultDictionary() { InputStream stream = getClass().getResourceAsStream("/dictionaries/pt/port-big.dic"); Iterator<String> i = null; try {/*from www . jav a 2 s . c o m*/ i = IOUtils.lineIterator(stream, Charset.defaultCharset()); while (i.hasNext()) { String line = i.next(); if (line.startsWith("#")) { continue; } DictionaryEntry entry = DictionaryEntry.fromString(line); if (entry == null) { continue; } if (entry.getCategory() == DictionaryEntry.Category.OTHER) { continue; } if (logger.isTraceEnabled()) { logger.trace("Adding entry " + entry.getWord() + " to the cache"); } hashSet.add(entry); } } catch (IOException e) { /* * We all know this is not really true, but this should never * actually happen. If it does, we're screwed o.O */ logger.error("Unhandled I/O exception: " + e.getMessage(), e); throw new DictionaryReadException(e); } finally { IOUtils.closeQuietly(stream); } }
From source file:modelinspector.collectors.WordlistMatchCollector.java
public WordlistMatchCollector(String aName, String aLanguage, boolean aCaseSensitive, int aCutoff, String aFile, String aEncoding) {// w w w. j a v a2s . c o m name = aName; baseVocabulary = new HashSet<>(); caseSensitive = aCaseSensitive; language = new Locale(aLanguage); cutoff = aCutoff; try (InputStream is = new FileInputStream(aFile)) { LineIterator i = IOUtils.lineIterator(is, aEncoding); while (i.hasNext()) { String[] fields = i.nextLine().split("\t"); if (fields.length > 1 && aCutoff > 0) { if (Integer.valueOf(fields[1]) < aCutoff) { continue; } } String word = aCaseSensitive ? fields[0] : fields[0].toLowerCase(language); baseVocabulary.add(word); } } catch (IOException e) { throw new RuntimeException(e); } originalBaseVocabularySize = baseVocabulary.size(); }
From source file:gobblin.source.extractor.filebased.SingleFileDownloader.java
@SuppressWarnings("unchecked") public Iterator<D> downloadFile(String file) throws IOException { log.info("Beginning to download file: " + file); try {/* w w w. j a v a 2 s . c o m*/ InputStream inputStream = this.fileBasedExtractor.getCloser() .register(this.fileBasedExtractor.getFsHelper().getFileStream(file)); Iterator<D> fileItr = (Iterator<D>) IOUtils.lineIterator(inputStream, ConfigurationKeys.DEFAULT_CHARSET_ENCODING); if (this.fileBasedExtractor.isShouldSkipFirstRecord() && fileItr.hasNext()) { fileItr.next(); } return fileItr; } catch (FileBasedHelperException e) { throw new IOException("Exception while downloading file " + file + " with message " + e.getMessage(), e); } }