Example usage for org.apache.commons.io IOUtils lineIterator

List of usage examples for org.apache.commons.io IOUtils lineIterator

Introduction

In this page you can find the example usage for org.apache.commons.io IOUtils lineIterator.

Prototype

public static LineIterator lineIterator(InputStream input, String encoding) throws IOException 

Source Link

Document

Return an Iterator for the lines in an InputStream, using the character encoding specified (or default encoding if null).

Usage

From source file:net.pms.io.OutputTextConsumer.java

public void run() {
    LineIterator it = null;/*from  www .  j a  v a2  s.  c o  m*/

    try {
        it = IOUtils.lineIterator(inputStream, "UTF-8");

        while (it.hasNext()) {
            String line = it.nextLine();

            if (line.length() > 0) {
                addLine(line);
            }

            if (log) {
                logger.debug(line);
            }
        }
    } catch (IOException ioe) {
        logger.debug("Error consuming input stream: {}", ioe.getMessage());
    } catch (IllegalStateException ise) {
        logger.debug("Error reading from closed input stream: {}", ise.getMessage());
    } finally {
        LineIterator.closeQuietly(it); // clean up all associated resources
    }
}

From source file:gobblin.source.extractor.filebased.GZIPFileDownloader.java

@SuppressWarnings("unchecked")
public Iterator<D> downloadFile(String file) throws IOException {

    log.info("Beginning to download gzip compressed file: " + file);

    try {/* www .  j  a v a  2s  .c  om*/
        InputStream inputStream = this.fileBasedExtractor.getCloser()
                .register(this.fileBasedExtractor.getFsHelper().getFileStream(file));
        Iterator<D> fileItr = (Iterator<D>) IOUtils.lineIterator(new GZIPInputStream(inputStream),
                ConfigurationKeys.DEFAULT_CHARSET_ENCODING);
        if (this.fileBasedExtractor.isShouldSkipFirstRecord() && fileItr.hasNext()) {
            fileItr.next();
        }
        return fileItr;
    } catch (FileBasedHelperException e) {
        throw new IOException("Exception while downloading file " + file + " with message " + e.getMessage(),
                e);
    }
}

From source file:modelinspector.collectors.RequireAllCollector.java

public RequireAllCollector(String aName, String aLanguage, File aFile, String aEncoding,
        boolean aCaseSensitive) {
    name = aName;//from   w  w  w. j av a 2 s  .  c o  m
    required = new HashSet<>();
    language = new Locale(aLanguage);
    caseSensitive = aCaseSensitive;

    try (InputStream is = new FileInputStream(aFile)) {
        LineIterator i = IOUtils.lineIterator(is, aEncoding);
        while (i.hasNext()) {
            String[] fields = i.nextLine().split("\t");
            String word = caseSensitive ? fields[0] : fields[0].toLowerCase();
            required.add(word);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.romeikat.datamessie.core.base.util.StringUtil.java

public String removeLineSeparators(final String s) {
    if (s == null) {
        return null;
    }/*  www  .  ja  v  a  2  s.  co  m*/

    try {
        final StringBuilder sb = new StringBuilder();
        final InputStream is = IOUtils.toInputStream(s, StandardCharsets.UTF_8);
        final LineIterator it = IOUtils.lineIterator(is, StandardCharsets.UTF_8);
        while (it.hasNext()) {
            if (sb.length() > 0) {
                sb.append(" ");
            }
            sb.append(it.nextLine());
        }
        IOUtils.closeQuietly(is);
        return sb.toString();
    } catch (final IOException e) {
        e.printStackTrace();
        return null;
    }
}

From source file:modelinspector.collectors.MostFrequentWordsCollector.java

public MostFrequentWordsCollector(String aLanguage, int aCutoff, boolean aCaseSensitive, String aFile,
        String aEncoding) {//from ww  w. j av  a2 s .c  om
    cutoff = aCutoff;
    language = new Locale(aLanguage);

    String key = aLanguage + "-" + aCaseSensitive;

    setShowSample(true);

    wordList = wordLists.get(key);
    if (wordList == null) {
        wordList = new Object2IntOpenHashMap<>();
        // The file read is sorted by frequency
        try (InputStream is = new FileInputStream(aFile)) {
            LineIterator i = IOUtils.lineIterator(is, aEncoding);
            int n = 1;
            while (i.hasNext()) {
                String[] fields = i.nextLine().split("\t");
                String word = aCaseSensitive ? fields[0] : fields[0].toLowerCase(language);
                // System.out.println(word + " - " + n);
                // Record the word and its rank - since a word may appear in different
                // frequencies with different POSes, we need to make sure we don't overwrite
                // a frequent POS with an infrequent one. We only consider the most frequent
                // POS for a word.
                if (!wordList.containsKey(word)) {
                    wordList.put(word, n);
                }
                n++;
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        wordList.defaultReturnValue(Integer.MAX_VALUE);
        wordLists.put(key, wordList);
    }
}

From source file:de.tudarmstadt.ukp.experiments.argumentation.clustering.ClusterCentroidsMain.java

public static TreeMap<Integer, Vector> computeClusterCentroids(String inputVectorsPath,
        String clusterOutputPath) throws IOException {
    TreeMap<Integer, Vector> result = new TreeMap<>();
    Map<Integer, Integer> counts = new TreeMap<>();

    // input for cluto
    File inputVectors = new File(inputVectorsPath);

    // resulting clusters
    File clutoClustersOutput = new File(clusterOutputPath);

    LineIterator clustersIterator = IOUtils.lineIterator(new FileInputStream(clutoClustersOutput), "utf-8");

    LineIterator vectorsIterator = IOUtils.lineIterator(new FileInputStream(inputVectors), "utf-8");

    // skip first line (number of clusters and vector size
    vectorsIterator.next();/*from   w ww  .jav  a 2  s. com*/

    while (clustersIterator.hasNext()) {
        String clusterString = clustersIterator.next();
        String vectorString = vectorsIterator.next();

        int clusterNumber = Integer.valueOf(clusterString);

        // now parse the vector
        DenseVector vector = ClusteringUtils.parseVector(vectorString);

        // if there is no resulting vector for the particular cluster, add this one
        if (!result.containsKey(clusterNumber)) {
            result.put(clusterNumber, vector);
        } else {
            // otherwise add this one to the previous one
            result.put(clusterNumber, result.get(clusterNumber).add(vector));
        }

        // and update counts
        if (!counts.containsKey(clusterNumber)) {
            counts.put(clusterNumber, 0);
        }

        counts.put(clusterNumber, counts.get(clusterNumber) + 1);
    }

    // now compute average for each vector
    for (Map.Entry<Integer, Vector> entry : result.entrySet()) {
        // cluster number
        int clusterNumber = entry.getKey();
        // get counts
        int count = counts.get(clusterNumber);

        // divide by count of vectors for each cluster (averaging)
        for (VectorEntry vectorEntry : entry.getValue()) {
            vectorEntry.set(vectorEntry.get() / (double) count);
        }
    }

    return result;
}

From source file:com.adobe.acs.tools.csv.impl.CsvUtil.java

/**
 * Adds a populated terminating field to the ends of CSV entries.
 * If the last entry in a CSV row is empty, the CSV library has difficulty understanding that is the end of the row.
 *
 * @param is        the CSV file as an inputstream
 * @param separator The field separator//from   ww w  .j a  v a 2 s .c  om
 * @param charset   The charset
 * @return An inputstream that is the same as is, but each line has a populated line termination entry
 * @throws IOException
 */
public static InputStream terminateLines(final InputStream is, final char separator, final String charset)
        throws IOException {

    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final PrintStream printStream = new PrintStream(baos);

    final LineIterator lineIterator = IOUtils.lineIterator(is, charset);

    while (lineIterator.hasNext()) {
        String line = StringUtils.stripToNull(lineIterator.next());

        if (line != null) {
            line += separator + TERMINATED;
            printStream.println(line);
        }
    }

    return new ByteArrayInputStream(baos.toByteArray());
}

From source file:de.tudarmstadt.ukp.clarin.webanno.tcf.WebAnnoSemanticGraphReader.java

public void convertToCas(JCas aJCas, InputStream aIs, String aEncoding) throws IOException

{
    StringBuilder text = new StringBuilder();
    LineIterator lineIterator = IOUtils.lineIterator(aIs, aEncoding);
    int tokenBeginPosition = 0;
    while (lineIterator.hasNext()) {
        String line = lineIterator.next();
        String[] contents = line.split("\t>\t|\tX\t");
        int sentenceBegin = tokenBeginPosition;
        int chainBegin = tokenBeginPosition;
        int chainEnd = 0;
        StringTokenizer st = new StringTokenizer(contents[0]);
        while (st.hasMoreTokens()) {
            String content = st.nextToken();
            Token outToken = new Token(aJCas, tokenBeginPosition, tokenBeginPosition + content.length());
            outToken.addToIndexes();//ww  w.j  a  va  2 s . c  o m
            tokenBeginPosition = outToken.getEnd() + 1;
            chainEnd = tokenBeginPosition;
            text.append(content + " ");
        }

        CoreferenceChain chain = new CoreferenceChain(aJCas);
        CoreferenceLink link = new CoreferenceLink(aJCas, chainBegin, chainEnd - 1);
        link.setReferenceType("text");
        link.addToIndexes();
        chain.setFirst(link);

        if (line.contains("\t>\t")) {
            link.setReferenceRelation("entails");
            Token outToken = new Token(aJCas, tokenBeginPosition, tokenBeginPosition + 1);
            outToken.addToIndexes();
            tokenBeginPosition = outToken.getEnd() + 1;
            text.append("> ");
        } else {
            link.setReferenceRelation("do not entails");
            Token outToken = new Token(aJCas, tokenBeginPosition, tokenBeginPosition + 1);
            outToken.addToIndexes();
            tokenBeginPosition = outToken.getEnd() + 1;
            text.append("X ");
        }

        chainBegin = tokenBeginPosition;
        st = new StringTokenizer(contents[0]);
        while (st.hasMoreTokens()) {
            String content = st.nextToken();
            Token outToken = new Token(aJCas, tokenBeginPosition, tokenBeginPosition + content.length());
            outToken.addToIndexes();
            tokenBeginPosition = outToken.getEnd() + 1;
            chainEnd = tokenBeginPosition;
            text.append(content + " ");

        }
        CoreferenceLink nextLink = new CoreferenceLink(aJCas, chainBegin, chainEnd - 1);
        nextLink.setReferenceType("hypothesis");
        nextLink.addToIndexes();
        link.setNext(nextLink);
        chain.addToIndexes();
        text.append("\n");

        Sentence outSentence = new Sentence(aJCas);
        outSentence.setBegin(sentenceBegin);
        outSentence.setEnd(tokenBeginPosition);
        outSentence.addToIndexes();
        tokenBeginPosition = tokenBeginPosition + 1;
        sentenceBegin = tokenBeginPosition;
    }
    aJCas.setDocumentText(text.toString());
}

From source file:fr.itinerennes.bundler.tasks.framework.AbstractCountedCsvTask.java

@PostExec
public void prependLineCount() throws IOException {

    LOGGER.debug("Inserting line count at file head: {}", lineCount);

    final File output = getOutputFile();
    final File source = File.createTempFile("itr-", output.getName(), output.getParentFile());
    source.delete();//from   ww  w.ja v a 2s  .co  m
    FileUtils.moveFile(output, source);

    InputStream from = null;
    BufferedWriter to = null;
    try {
        from = new FileInputStream(source);
        to = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output), CHARSET));
        to.write(String.valueOf(lineCount));
        to.newLine();
        final LineIterator i = IOUtils.lineIterator(from, CHARSET.name());
        while (i.hasNext()) {
            to.write(i.next());
            to.newLine();
        }
    } finally {
        IOUtils.closeQuietly(from);
        IOUtils.closeQuietly(to);
        FileUtils.deleteQuietly(source);
    }
}

From source file:com.msopentech.odatajclient.engine.communication.request.batch.ODataBatchResponseManager.java

/**
 * Constructor.//  w  w w  . ja v  a2s . c  o  m
 *
 * @param res OData batch response.
 * @param expectedItems expected batch response items.
 */
public ODataBatchResponseManager(final ODataBatchResponse res,
        final List<ODataBatchResponseItem> expectedItems) {
    try {
        this.expectedItemsIterator = expectedItems.iterator();
        this.batchLineIterator = new ODataBatchLineIterator(
                IOUtils.lineIterator(res.getRawResponse(), ODataConstants.UTF8));

        // search for boundary
        batchBoundary = ODataBatchUtilities
                .getBoundaryFromHeader(res.getHeader(ODataHeaders.HeaderName.contentType));
        LOG.debug("Retrieved batch response bondary '{}'", batchBoundary);
    } catch (IOException e) {
        LOG.error("Error parsing batch response", e);
        throw new IllegalStateException(e);
    }
}