List of usage examples for org.apache.commons.io IOUtils lineIterator
public static LineIterator lineIterator(InputStream input, String encoding) throws IOException
InputStream, using the character encoding specified (or default encoding if null). From source file:de.tudarmstadt.ukp.argumentation.data.roomfordebate.DataFetcher.java
public static void main(String[] args) throws Exception { File crawledPagesFolder = new File(args[0]); if (!crawledPagesFolder.exists()) { crawledPagesFolder.mkdirs();//from ww w.j a va 2s . c om } File outputFolder = new File(args[1]); if (!outputFolder.exists()) { outputFolder.mkdirs(); } // read links from text file final String urlsResourceName = "roomfordebate-urls.txt"; InputStream urlsStream = DataFetcher.class.getClassLoader().getResourceAsStream(urlsResourceName); if (urlsStream == null) { throw new IOException("Cannot find resource " + urlsResourceName + " on the classpath"); } // read list of urls List<String> urls = new ArrayList<>(); LineIterator iterator = IOUtils.lineIterator(urlsStream, "utf-8"); while (iterator.hasNext()) { // ignore commented url (line starts with #) String line = iterator.nextLine(); if (!line.startsWith("#") && !line.trim().isEmpty()) { urls.add(line.trim()); } } // download all crawlPages(urls, crawledPagesFolder); List<File> files = new ArrayList<>(FileUtils.listFiles(crawledPagesFolder, null, false)); Collections.sort(files, new Comparator<File>() { @Override public int compare(File o1, File o2) { return o1.getName().compareTo(o2.getName()); } }); int idCounter = 0; for (File file : files) { NYTimesCommentsScraper commentsScraper = new NYTimesCommentsScraper(); NYTimesArticleExtractor extractor = new NYTimesArticleExtractor(); String html = FileUtils.readFileToString(file, "utf-8"); idCounter++; File outputFileArticle = new File(outputFolder, String.format("Cx%03d.txt", idCounter)); File outputFileComments = new File(outputFolder, String.format("Dx%03d.txt", idCounter)); try { List<Comment> comments = commentsScraper.extractComments(html); Article article = extractor.extractArticle(html); saveArticleToText(article, outputFileArticle); System.out.println("Saved to " + outputFileArticle); saveCommentsToText(comments, outputFileComments, article); System.out.println("Saved to " + outputFileComments); } catch (IOException ex) { System.err.println(file.getName() + "\n" + ex.getMessage()); } } }
From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.statistics.StatisticsTableCreator.java
public static Table<String, String, Long> loadTable(InputStream stream) throws IOException { Table<String, String, Long> result = TreeBasedTable.create(); LineIterator lineIterator = IOUtils.lineIterator(stream, "utf-8"); while (lineIterator.hasNext()) { String line = lineIterator.next(); System.out.println(line); String[] split = line.split("\t"); String language = split[0]; String license = split[1]; Long documents = Long.valueOf(split[2]); Long tokens = Long.valueOf(split[3]); result.put(language, "docs " + license, documents); result.put(language, "tokens " + license, tokens); }//from w ww . j a v a 2 s .co m return result; }
From source file:de.rnd7.kata.reversi.logic.ai.AIMatrix.java
public static AIMatrix fromResource(final String name) throws IOException { final AIMatrix matrix = new AIMatrix(); try (InputStream input = AIMatrix.class.getResourceAsStream(name)) { final LineIterator iterator = IOUtils.lineIterator(input, CharEncoding.UTF_8); int lineNumber = 0; while (iterator.hasNext()) { processLine(matrix, lineNumber++, iterator.next()); }/*w w w . j a v a2 s. c o m*/ } return matrix; }
From source file:com.icantrap.collections.dawg.TrieValidationTest.java
@Before public void before() throws IOException { assumeThat(System.getProperty("RUN_VALIDATION"), is("on")); LineIterator iter = IOUtils.lineIterator(getClass().getResourceAsStream("/TWL06.txt"), null); dawgBuilder = new DawgBuilder(); while (iter.hasNext()) dawgBuilder.add(iter.next());//from w w w. ja v a 2 s . c o m LineIterator.closeQuietly(iter); System.out.println("Uncompressed: " + dawgBuilder.nodeCount() + " nodes"); StopWatch stopWatch = new StopWatch(); stopWatch.start(); dawgBuilder.build(); stopWatch.stop(); System.out.println("Time to compress: " + stopWatch.getTime() + " ms."); System.out.println("Compressed: " + dawgBuilder.nodeCount() + " nodes"); }
From source file:com.icantrap.collections.dawg.DawgValidationTest.java
@Test public void containsAllWords() throws IOException { LineIterator iter = IOUtils.lineIterator(getClass().getResourceAsStream("/TWL06.txt"), null); StopWatch stopWatch = new StopWatch(); stopWatch.start();//from w w w .j a va 2 s . co m while (iter.hasNext()) { String word = iter.next(); assertTrue("Missing word (" + word + ")", dawg.contains(word)); } stopWatch.stop(); System.out.println("Time to query: " + stopWatch.getTime() + " ms."); LineIterator.closeQuietly(iter); }
From source file:com.icantrap.collections.dawg.TrieValidationTest.java
@Test public void containsAllWords() throws IOException { LineIterator iter = IOUtils.lineIterator(getClass().getResourceAsStream("/TWL06.txt"), null); StopWatch stopWatch = new StopWatch(); stopWatch.start();/*from w w w .j a v a 2 s . c om*/ while (iter.hasNext()) { String word = iter.next(); assertTrue("Missing word (" + word + ")", dawgBuilder.contains(word)); } stopWatch.stop(); System.out.println("Time to query: " + stopWatch.getTime() + " ms."); LineIterator.closeQuietly(iter); }
From source file:net.pms.io.OutputTextLogger.java
public void run() { LineIterator it = null;//from w ww . ja v a2 s.co m try { it = IOUtils.lineIterator(inputStream, "UTF-8"); while (it.hasNext()) { String line = it.nextLine(); logger.debug(line); } } catch (IOException ioe) { logger.debug("Error consuming input stream: {}", ioe.getMessage()); } catch (IllegalStateException ise) { logger.debug("Error reading from closed input stream: {}", ise.getMessage()); } finally { LineIterator.closeQuietly(it); // clean up all associated resources } }
From source file:net.orpiske.sfs.filter.dictionary.spell.DefaultDictionary.java
public DefaultDictionary() { InputStream stream = getClass().getResourceAsStream("/dictionaries/pt/port-big.dic"); Iterator<String> i = null; try {/*from www . jav a 2 s . c o m*/ i = IOUtils.lineIterator(stream, Charset.defaultCharset()); while (i.hasNext()) { String line = i.next(); if (line.startsWith("#")) { continue; } DictionaryEntry entry = DictionaryEntry.fromString(line); if (entry == null) { continue; } if (entry.getCategory() == DictionaryEntry.Category.OTHER) { continue; } if (logger.isTraceEnabled()) { logger.trace("Adding entry " + entry.getWord() + " to the cache"); } hashSet.add(entry); } } catch (IOException e) { /* * We all know this is not really true, but this should never * actually happen. If it does, we're screwed o.O */ logger.error("Unhandled I/O exception: " + e.getMessage(), e); throw new DictionaryReadException(e); } finally { IOUtils.closeQuietly(stream); } }
From source file:modelinspector.collectors.WordlistMatchCollector.java
public WordlistMatchCollector(String aName, String aLanguage, boolean aCaseSensitive, int aCutoff, String aFile, String aEncoding) {// w w w. j a v a2s . c o m name = aName; baseVocabulary = new HashSet<>(); caseSensitive = aCaseSensitive; language = new Locale(aLanguage); cutoff = aCutoff; try (InputStream is = new FileInputStream(aFile)) { LineIterator i = IOUtils.lineIterator(is, aEncoding); while (i.hasNext()) { String[] fields = i.nextLine().split("\t"); if (fields.length > 1 && aCutoff > 0) { if (Integer.valueOf(fields[1]) < aCutoff) { continue; } } String word = aCaseSensitive ? fields[0] : fields[0].toLowerCase(language); baseVocabulary.add(word); } } catch (IOException e) { throw new RuntimeException(e); } originalBaseVocabularySize = baseVocabulary.size(); }
From source file:gobblin.source.extractor.filebased.SingleFileDownloader.java
@SuppressWarnings("unchecked") public Iterator<D> downloadFile(String file) throws IOException { log.info("Beginning to download file: " + file); try {/* w w w. j a v a 2 s . c o m*/ InputStream inputStream = this.fileBasedExtractor.getCloser() .register(this.fileBasedExtractor.getFsHelper().getFileStream(file)); Iterator<D> fileItr = (Iterator<D>) IOUtils.lineIterator(inputStream, ConfigurationKeys.DEFAULT_CHARSET_ENCODING); if (this.fileBasedExtractor.isShouldSkipFirstRecord() && fileItr.hasNext()) { fileItr.next(); } return fileItr; } catch (FileBasedHelperException e) { throw new IOException("Exception while downloading file " + file + " with message " + e.getMessage(), e); } }