List of usage examples for org.apache.commons.io IOUtils lineIterator
public static LineIterator lineIterator(InputStream input, String encoding) throws IOException
InputStream
, using the character encoding specified (or default encoding if null). From source file:net.pms.io.OutputTextConsumer.java
public void run() { LineIterator it = null;/*from www . j a v a2 s. c o m*/ try { it = IOUtils.lineIterator(inputStream, "UTF-8"); while (it.hasNext()) { String line = it.nextLine(); if (line.length() > 0) { addLine(line); } if (log) { logger.debug(line); } } } catch (IOException ioe) { logger.debug("Error consuming input stream: {}", ioe.getMessage()); } catch (IllegalStateException ise) { logger.debug("Error reading from closed input stream: {}", ise.getMessage()); } finally { LineIterator.closeQuietly(it); // clean up all associated resources } }
From source file:gobblin.source.extractor.filebased.GZIPFileDownloader.java
@SuppressWarnings("unchecked") public Iterator<D> downloadFile(String file) throws IOException { log.info("Beginning to download gzip compressed file: " + file); try {/* www . j a v a 2s .c om*/ InputStream inputStream = this.fileBasedExtractor.getCloser() .register(this.fileBasedExtractor.getFsHelper().getFileStream(file)); Iterator<D> fileItr = (Iterator<D>) IOUtils.lineIterator(new GZIPInputStream(inputStream), ConfigurationKeys.DEFAULT_CHARSET_ENCODING); if (this.fileBasedExtractor.isShouldSkipFirstRecord() && fileItr.hasNext()) { fileItr.next(); } return fileItr; } catch (FileBasedHelperException e) { throw new IOException("Exception while downloading file " + file + " with message " + e.getMessage(), e); } }
From source file:modelinspector.collectors.RequireAllCollector.java
public RequireAllCollector(String aName, String aLanguage, File aFile, String aEncoding, boolean aCaseSensitive) { name = aName;//from w w w. j av a 2 s . c o m required = new HashSet<>(); language = new Locale(aLanguage); caseSensitive = aCaseSensitive; try (InputStream is = new FileInputStream(aFile)) { LineIterator i = IOUtils.lineIterator(is, aEncoding); while (i.hasNext()) { String[] fields = i.nextLine().split("\t"); String word = caseSensitive ? fields[0] : fields[0].toLowerCase(); required.add(word); } } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.romeikat.datamessie.core.base.util.StringUtil.java
public String removeLineSeparators(final String s) { if (s == null) { return null; }/* www . ja v a 2 s. co m*/ try { final StringBuilder sb = new StringBuilder(); final InputStream is = IOUtils.toInputStream(s, StandardCharsets.UTF_8); final LineIterator it = IOUtils.lineIterator(is, StandardCharsets.UTF_8); while (it.hasNext()) { if (sb.length() > 0) { sb.append(" "); } sb.append(it.nextLine()); } IOUtils.closeQuietly(is); return sb.toString(); } catch (final IOException e) { e.printStackTrace(); return null; } }
From source file:modelinspector.collectors.MostFrequentWordsCollector.java
public MostFrequentWordsCollector(String aLanguage, int aCutoff, boolean aCaseSensitive, String aFile, String aEncoding) {//from ww w. j av a2 s .c om cutoff = aCutoff; language = new Locale(aLanguage); String key = aLanguage + "-" + aCaseSensitive; setShowSample(true); wordList = wordLists.get(key); if (wordList == null) { wordList = new Object2IntOpenHashMap<>(); // The file read is sorted by frequency try (InputStream is = new FileInputStream(aFile)) { LineIterator i = IOUtils.lineIterator(is, aEncoding); int n = 1; while (i.hasNext()) { String[] fields = i.nextLine().split("\t"); String word = aCaseSensitive ? fields[0] : fields[0].toLowerCase(language); // System.out.println(word + " - " + n); // Record the word and its rank - since a word may appear in different // frequencies with different POSes, we need to make sure we don't overwrite // a frequent POS with an infrequent one. We only consider the most frequent // POS for a word. if (!wordList.containsKey(word)) { wordList.put(word, n); } n++; } } catch (IOException e) { throw new RuntimeException(e); } wordList.defaultReturnValue(Integer.MAX_VALUE); wordLists.put(key, wordList); } }
From source file:de.tudarmstadt.ukp.experiments.argumentation.clustering.ClusterCentroidsMain.java
public static TreeMap<Integer, Vector> computeClusterCentroids(String inputVectorsPath, String clusterOutputPath) throws IOException { TreeMap<Integer, Vector> result = new TreeMap<>(); Map<Integer, Integer> counts = new TreeMap<>(); // input for cluto File inputVectors = new File(inputVectorsPath); // resulting clusters File clutoClustersOutput = new File(clusterOutputPath); LineIterator clustersIterator = IOUtils.lineIterator(new FileInputStream(clutoClustersOutput), "utf-8"); LineIterator vectorsIterator = IOUtils.lineIterator(new FileInputStream(inputVectors), "utf-8"); // skip first line (number of clusters and vector size vectorsIterator.next();/*from w ww .jav a 2 s. com*/ while (clustersIterator.hasNext()) { String clusterString = clustersIterator.next(); String vectorString = vectorsIterator.next(); int clusterNumber = Integer.valueOf(clusterString); // now parse the vector DenseVector vector = ClusteringUtils.parseVector(vectorString); // if there is no resulting vector for the particular cluster, add this one if (!result.containsKey(clusterNumber)) { result.put(clusterNumber, vector); } else { // otherwise add this one to the previous one result.put(clusterNumber, result.get(clusterNumber).add(vector)); } // and update counts if (!counts.containsKey(clusterNumber)) { counts.put(clusterNumber, 0); } counts.put(clusterNumber, counts.get(clusterNumber) + 1); } // now compute average for each vector for (Map.Entry<Integer, Vector> entry : result.entrySet()) { // cluster number int clusterNumber = entry.getKey(); // get counts int count = counts.get(clusterNumber); // divide by count of vectors for each cluster (averaging) for (VectorEntry vectorEntry : entry.getValue()) { vectorEntry.set(vectorEntry.get() / (double) count); } } return result; }
From source file:com.adobe.acs.tools.csv.impl.CsvUtil.java
/** * Adds a populated terminating field to the ends of CSV entries. * If the last entry in a CSV row is empty, the CSV library has difficulty understanding that is the end of the row. * * @param is the CSV file as an inputstream * @param separator The field separator//from ww w .j a v a 2 s .c om * @param charset The charset * @return An inputstream that is the same as is, but each line has a populated line termination entry * @throws IOException */ public static InputStream terminateLines(final InputStream is, final char separator, final String charset) throws IOException { final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final PrintStream printStream = new PrintStream(baos); final LineIterator lineIterator = IOUtils.lineIterator(is, charset); while (lineIterator.hasNext()) { String line = StringUtils.stripToNull(lineIterator.next()); if (line != null) { line += separator + TERMINATED; printStream.println(line); } } return new ByteArrayInputStream(baos.toByteArray()); }
From source file:de.tudarmstadt.ukp.clarin.webanno.tcf.WebAnnoSemanticGraphReader.java
public void convertToCas(JCas aJCas, InputStream aIs, String aEncoding) throws IOException { StringBuilder text = new StringBuilder(); LineIterator lineIterator = IOUtils.lineIterator(aIs, aEncoding); int tokenBeginPosition = 0; while (lineIterator.hasNext()) { String line = lineIterator.next(); String[] contents = line.split("\t>\t|\tX\t"); int sentenceBegin = tokenBeginPosition; int chainBegin = tokenBeginPosition; int chainEnd = 0; StringTokenizer st = new StringTokenizer(contents[0]); while (st.hasMoreTokens()) { String content = st.nextToken(); Token outToken = new Token(aJCas, tokenBeginPosition, tokenBeginPosition + content.length()); outToken.addToIndexes();//ww w.j a va 2 s . c o m tokenBeginPosition = outToken.getEnd() + 1; chainEnd = tokenBeginPosition; text.append(content + " "); } CoreferenceChain chain = new CoreferenceChain(aJCas); CoreferenceLink link = new CoreferenceLink(aJCas, chainBegin, chainEnd - 1); link.setReferenceType("text"); link.addToIndexes(); chain.setFirst(link); if (line.contains("\t>\t")) { link.setReferenceRelation("entails"); Token outToken = new Token(aJCas, tokenBeginPosition, tokenBeginPosition + 1); outToken.addToIndexes(); tokenBeginPosition = outToken.getEnd() + 1; text.append("> "); } else { link.setReferenceRelation("do not entails"); Token outToken = new Token(aJCas, tokenBeginPosition, tokenBeginPosition + 1); outToken.addToIndexes(); tokenBeginPosition = outToken.getEnd() + 1; text.append("X "); } chainBegin = tokenBeginPosition; st = new StringTokenizer(contents[0]); while (st.hasMoreTokens()) { String content = st.nextToken(); Token outToken = new Token(aJCas, tokenBeginPosition, tokenBeginPosition + content.length()); outToken.addToIndexes(); tokenBeginPosition = outToken.getEnd() + 1; chainEnd = tokenBeginPosition; text.append(content + " "); } CoreferenceLink nextLink = new CoreferenceLink(aJCas, chainBegin, chainEnd - 1); nextLink.setReferenceType("hypothesis"); nextLink.addToIndexes(); link.setNext(nextLink); chain.addToIndexes(); text.append("\n"); Sentence outSentence = new Sentence(aJCas); outSentence.setBegin(sentenceBegin); outSentence.setEnd(tokenBeginPosition); outSentence.addToIndexes(); tokenBeginPosition = tokenBeginPosition + 1; sentenceBegin = tokenBeginPosition; } aJCas.setDocumentText(text.toString()); }
From source file:fr.itinerennes.bundler.tasks.framework.AbstractCountedCsvTask.java
@PostExec public void prependLineCount() throws IOException { LOGGER.debug("Inserting line count at file head: {}", lineCount); final File output = getOutputFile(); final File source = File.createTempFile("itr-", output.getName(), output.getParentFile()); source.delete();//from ww w.ja v a 2s .co m FileUtils.moveFile(output, source); InputStream from = null; BufferedWriter to = null; try { from = new FileInputStream(source); to = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output), CHARSET)); to.write(String.valueOf(lineCount)); to.newLine(); final LineIterator i = IOUtils.lineIterator(from, CHARSET.name()); while (i.hasNext()) { to.write(i.next()); to.newLine(); } } finally { IOUtils.closeQuietly(from); IOUtils.closeQuietly(to); FileUtils.deleteQuietly(source); } }
From source file:com.msopentech.odatajclient.engine.communication.request.batch.ODataBatchResponseManager.java
/** * Constructor.// w w w . ja v a2s . c o m * * @param res OData batch response. * @param expectedItems expected batch response items. */ public ODataBatchResponseManager(final ODataBatchResponse res, final List<ODataBatchResponseItem> expectedItems) { try { this.expectedItemsIterator = expectedItems.iterator(); this.batchLineIterator = new ODataBatchLineIterator( IOUtils.lineIterator(res.getRawResponse(), ODataConstants.UTF8)); // search for boundary batchBoundary = ODataBatchUtilities .getBoundaryFromHeader(res.getHeader(ODataHeaders.HeaderName.contentType)); LOG.debug("Retrieved batch response bondary '{}'", batchBoundary); } catch (IOException e) { LOG.error("Error parsing batch response", e); throw new IllegalStateException(e); } }