Example usage for org.apache.commons.io IOUtils lineIterator

Introduction

In this page you can find the example usage for org.apache.commons.io IOUtils lineIterator.

Prototype

public static LineIterator lineIterator(InputStream input, String encoding) throws IOException

Source Link

Document

Return an Iterator for the lines in an InputStream, using the character encoding specified (or default encoding if null).

Usage

From source file:io.druid.query.aggregation.AggregationTestHelper.java

public void createIndex(InputStream inputDataStream, String parserJson, String aggregators, File outDir,
        long minTimestamp, QueryGranularity gran, int maxRowCount) throws Exception {
    try {/* ww w  . j av  a2s  .  c o m*/
        StringInputRowParser parser = mapper.readValue(parserJson, StringInputRowParser.class);

        LineIterator iter = IOUtils.lineIterator(inputDataStream, "UTF-8");
        List<AggregatorFactory> aggregatorSpecs = mapper.readValue(aggregators,
                new TypeReference<List<AggregatorFactory>>() {
                });

        createIndex(iter, parser, aggregatorSpecs.toArray(new AggregatorFactory[0]), outDir, minTimestamp, gran,
                true, maxRowCount);
    } finally {
        Closeables.close(inputDataStream, true);
    }
}

From source file:edu.smu.tspell.wordnet.impl.file.SampleSentenceData.java

/**
 * Reads a data file and returns a map that encapsulates the file's
 * content.//ww w.  ja  va 2s  .  c  om
 * 
 * @param  fileName Name of the file from which to read key / value pairs.
 * @return Map that encapsulates the data read.
 * @throws IOException An error occurred reading the file.
 */
private Map createMap(String fileName) throws IOException {
    int index;
    String key;
    String value;

    //  Create a map to hold the results
    Map keySentences = new HashMap();
    //  Open the file and start reading it
    InputStream file = getClass().getResourceAsStream(PropertyNames.databaseDirectory + fileName);
    LineIterator iterator = IOUtils.lineIterator(file, null);
    //  Loop through all lines in the file
    while (iterator.hasNext()) {
        String line = iterator.nextLine();
        //  Find out where the key ends
        index = line.indexOf(KEY_TERMINATOR);
        //  Get the key and value
        key = line.substring(0, index);
        value = line.substring(index + 1).trim();
        //  Add them to the cache and read the next line
        putKeyValuePair(keySentences, key, value);
    }
    file.close();
    return keySentences;
}

From source file:de.tudarmstadt.ukp.clarin.webanno.tsv.WebannoTsv3Reader.java

/**
 * Iterate through lines and create span annotations accordingly. For
 * multiple span annotation, based on the position of the annotation in the
 * line, update only the end position of the annotation
 *//*from w  w w  .j ava  2  s .co m*/
private void setAnnotations(JCas aJCas, InputStream aIs, String aEncoding) throws IOException {

    // getting header information
    LineIterator lineIterator = IOUtils.lineIterator(aIs, aEncoding);
    int sentBegin = -1, sentEnd = 0;
    int prevSentEnd = 0;
    StringBuilder sentLineSb = new StringBuilder();
    String lastSent = "";
    while (lineIterator.hasNext()) {
        String line = lineIterator.next();
        if (line.startsWith("#T_")) {
            setLayerAndFeature(aJCas, line);
            continue;
        }

        if (line.startsWith("#Text=")) {
            if (sentLineSb.toString().isEmpty()) {
                sentLineSb.append(line.substring(line.indexOf("=") + 1));
            } else {
                sentLineSb.append(LF + line.substring(line.indexOf("=") + 1));
            }
            lastSent = sentLineSb.toString();
            continue;
        }
        if (line.startsWith("#FORMAT=")) {
            continue;
        }
        if (line.trim().isEmpty()) {
            if (!sentLineSb.toString().isEmpty()) {
                createSentence(aJCas, sentLineSb.toString(), sentBegin, sentEnd, prevSentEnd);
                prevSentEnd = sentEnd;
                sentBegin = -1;// reset for next sentence begin
                sentLineSb = new StringBuilder();
            }

            continue;
        }

        line = line.trim();
        int count = StringUtils.countMatches(line, "\t");

        if (columns != count) {
            throw new IOException(fileName + " This is not a valid TSV File. check this line: " + line);
        }

        String regex = "(?<!\\\\)*" + Pattern.quote(TAB);
        String[] lines = line.split(regex);

        int begin = Integer.parseInt(lines[1].split("-")[0]);
        int end = Integer.parseInt(lines[1].split("-")[1]);
        if (sentBegin == -1) {
            sentBegin = begin;
        }
        sentEnd = end;

        AnnotationUnit unit = createTokens(aJCas, lines, begin, end);

        int ind = 3;

        setAnnosPerTypePerUnit(lines, unit, ind);
    }

    // the last sentence
    if (!lastSent.isEmpty()) {
        createSentence(aJCas, lastSent, sentBegin, sentEnd, prevSentEnd);
    }

    Map<Type, Map<AnnotationUnit, List<AnnotationFS>>> annosPerTypePerUnit = new HashMap<>();
    setAnnosPerUnit(aJCas, annosPerTypePerUnit);
    addAnnotations(aJCas, annosPerTypePerUnit);
    addChainAnnotations(aJCas);
}

From source file:de.tudarmstadt.ukp.dkpro.core.io.penntree.PennTreebankCombinedReader.java

@Override
public void getNext(JCas aJCas) throws IOException, CollectionException {
    Resource res = nextFile();//from  w  w w  . j ava2s .  com
    initCas(aJCas.getCas(), res);

    try {
        posMappingProvider.configure(aJCas.getCas());
        constituentMappingProvider.configure(aJCas.getCas());
    } catch (AnalysisEngineProcessException e) {
        throw new IOException(e);
    }

    StringBuilder text = new StringBuilder();

    try (InputStream is = res.getInputStream()) {
        LineIterator li = IOUtils.lineIterator(is, encoding);

        while (li.hasNext()) {
            PennTreeNode tree = readTree(li);
            if (removeTraces) {
                doRemoveTraces(tree);
            }
            Constituent root = converter.convertPennTree(aJCas, text, tree);
            Sentence sentence = new Sentence(aJCas, root.getBegin(), root.getEnd());
            sentence.addToIndexes();
            text.append('\n');
        }
    }

    aJCas.setDocumentText(text.toString());
}

From source file:io.cloudslang.lang.tools.build.ArgumentProcessorUtils.java

public static Set<String> loadChangedItems(String filePath) throws IOException {
    Set<String> changedItems = new HashSet<>();
    try (InputStream fileInputStream = new FileInputStream(filePath)) {
        LineIterator lineIterator = IOUtils.lineIterator(fileInputStream, SlangSource.getCloudSlangCharset());
        while (lineIterator.hasNext()) {
            changedItems.add(lineIterator.next());
        }// w w w  .  java 2 s.c o m
    }
    return changedItems;
}

From source file:com.cheusov.Jrep.java

private static void processFile(InputStream in, String filename) throws IOException {
    Iterator<String> it;
    if (wholeContent) {
        String fileContent = IOUtils.toString(in, encoding);
        it = Arrays.asList(fileContent).iterator();
    } else {//  w ww.j  av  a  2 s  . c  om
        it = IOUtils.lineIterator(in, encoding);
    }

    int matchCount = 0;
    List<Pair<Integer, Integer>> startend = null;
    int lineNumber = 0;
    int lastMatchedLineNumber = 0;
    Map<Integer, String> lines = new HashMap<Integer, String>();
    while (it.hasNext()) {
        ++lineNumber;

        String line = (String) it.next();
        if (opt_B > 0) {
            lines.put(lineNumber, line);
            lines.remove(lineNumber - opt_B - 1);
        }

        boolean matched = false;
        boolean nextFile = false;

        if (!inverseMatch && !outputFilename && !opt_o && !opt_L && !opt_c)
            startend = new ArrayList<Pair<Integer, Integer>>();

        String lineToPrint = null;
        for (JrepPattern pattern : patterns) {
            int pos = 0;
            JrepMatchResult m = pattern.matcher(line);
            int lineLength = line.length();

            //                boolean nextLine = false;
            while (pos < lineLength && m.find(pos)) {
                matched = true;

                if (outputFilename || opt_c || opt_L || inverseMatch) {
                    break;
                } else if (opt_o) {
                    printlnWithPrefix(filename, getOutputString(line, m), lineNumber, ':');
                } else if (colorEscStart != null) {
                    startend.add(Pair.of(m.start(), m.end()));
                }

                pos = m.end();
                if (m.start() == m.end())
                    ++pos;
            }
        }

        matched ^= inverseMatch;

        if (matched) {
            if (exitStatus == 1)
                exitStatus = 0;

            if (!outputFilename && !opt_o && !opt_L && !opt_c) {
                if (colorEscStart == null || startend == null)
                    lineToPrint = line;
                else
                    lineToPrint = getLineToPrint(line, startend);
            }

            if (outputFilename) {
                println(filename);
                nextFile = true;
            }
        }

        if (lineToPrint != null) {
            for (int prevLineNumber = lineNumber - opt_B; prevLineNumber < lineNumber; ++prevLineNumber) {
                String prevLine = lines.get(prevLineNumber);
                if (prevLine != null) {
                    lines.remove(prevLineNumber);
                    printlnWithPrefix(filename, prevLine, prevLineNumber, '-');
                }
            }

            lastMatchedLineNumber = lineNumber;

            lines.remove(lineNumber);
            printlnWithPrefix(filename, lineToPrint, lineNumber, ':');
        } else if (lastMatchedLineNumber > 0 && lastMatchedLineNumber + opt_A >= lineNumber) {
            lines.remove(lineNumber);
            printlnWithPrefix(filename, line, lineNumber, '-');
        }

        if (matched) {
            ++matchCount;
            if (matchCount == opt_m)
                nextFile = true;
        }

        if (nextFile)
            break;
    }

    if (opt_L && matchCount == 0)
        println(filename);

    if (opt_c)
        printlnWithPrefix(filename, "" + matchCount, lineNumber, ':');
}

From source file:de.tudarmstadt.ukp.clarin.webanno.tsv.WebannoTsv1Reader.java

/**
 * Iterate through all lines and get available annotations<br>
 * First column is sentence number and a blank new line marks end of a sentence<br>
 * The Second column is the token <br>
 * The third column is the lemma annotation <br>
 * The fourth column is the POS annotation <br>
 * The fifth column is used for Named Entity annotations (Multiple annotations separeted by |
 * character) <br>//  w w w .  j av  a2  s .  co  m
 * The sixth column is the origin token number of dependency parsing <br>
 * The seventh column is the function/type of the dependency parsing <br>
 * eighth and ninth columns are undefined currently
 */
private void setAnnotations(InputStream aIs, String aEncoding, StringBuilder text, Map<Integer, String> tokens,
        Map<Integer, String> pos, Map<Integer, String> lemma, Map<Integer, String> namedEntity,
        Map<Integer, String> dependencyFunction, Map<Integer, Integer> dependencyDependent,
        List<Integer> firstTokenInSentence) throws IOException {
    int tokenNumber = 0;
    boolean first = true;
    int base = 0;

    LineIterator lineIterator = IOUtils.lineIterator(aIs, aEncoding);
    boolean textFound = false;
    StringBuffer tmpText = new StringBuffer();
    while (lineIterator.hasNext()) {
        String line = lineIterator.next().trim();
        if (line.startsWith("#text=")) {
            text.append(line.substring(6) + "\n");
            textFound = true;
            continue;
        }
        if (line.startsWith("#")) {
            continue;// it is a comment line
        }
        int count = StringUtils.countMatches(line, "\t");
        if (line.isEmpty()) {
            continue;
        }
        if (count != 9) {// not a proper TSV file
            getUimaContext().getLogger().log(Level.INFO, "This is not a valid TSV File");
            throw new IOException(fileName + " This is not a valid TSV File");
        }
        StringTokenizer lineTk = new StringTokenizer(line, "\t");

        if (first) {
            tokenNumber = Integer.parseInt(line.substring(0, line.indexOf("\t")));
            firstTokenInSentence.add(tokenNumber);
            first = false;
        } else {
            int lineNumber = Integer.parseInt(line.substring(0, line.indexOf("\t")));
            if (lineNumber == 1) {
                base = tokenNumber;
                firstTokenInSentence.add(base);
            }
            tokenNumber = base + Integer.parseInt(line.substring(0, line.indexOf("\t")));
        }

        while (lineTk.hasMoreElements()) {
            lineTk.nextToken();
            String token = lineTk.nextToken();

            // for backward compatibility
            tmpText.append(token + " ");

            tokens.put(tokenNumber, token);
            lemma.put(tokenNumber, lineTk.nextToken());
            pos.put(tokenNumber, lineTk.nextToken());
            String ne = lineTk.nextToken();
            lineTk.nextToken();// make it compatible with prev WebAnno TSV reader
            namedEntity.put(tokenNumber, (ne.equals("_") || ne.equals("-")) ? "O" : ne);
            String dependentValue = lineTk.nextToken();
            if (NumberUtils.isDigits(dependentValue)) {
                int dependent = Integer.parseInt(dependentValue);
                dependencyDependent.put(tokenNumber, dependent == 0 ? 0 : base + dependent);
                dependencyFunction.put(tokenNumber, lineTk.nextToken());
            } else {
                lineTk.nextToken();
            }
            lineTk.nextToken();
            lineTk.nextToken();
        }
    }
    if (!textFound) {
        text.append(tmpText);
    }
}

From source file:com.anrisoftware.globalpom.initfileparser.InitFileParserImpl.java

private LineIterator createLineIterator() throws InitFileParserException {
    try {//from   w  w  w . ja  v  a 2  s .com
        return IOUtils.lineIterator(stream, charset);
    } catch (IOException e) {
        throw log.lineIteratorError(this, e);
    }
}

From source file:com.adobe.acs.tools.tag_maker.impl.TagMakerServlet.java

private InputStream stripLineEnds(InputStream is, String charset, char chartoStrip) throws IOException {
    log.debug("Stripping [ {} ] from the end of lines.", chartoStrip);

    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final PrintStream printStream = new PrintStream(baos);

    final LineIterator lineIterator = IOUtils.lineIterator(is, charset);

    while (lineIterator.hasNext()) {
        String line = StringUtils.stripToNull(lineIterator.next());
        if (line != null) {
            line = StringUtils.stripEnd(line, String.valueOf(chartoStrip));
            printStream.println(line);// ww  w .j  a v  a  2 s  .  c  o m
        }
    }

    return new ByteArrayInputStream(baos.toByteArray());
}

From source file:net.sf.logsaw.dialect.pattern.APatternDialect.java

@Override
public void parse(ILogResource log, InputStream input, ILogEntryCollector collector) throws CoreException {
    Assert.isNotNull(log, "log"); //$NON-NLS-1$
    Assert.isNotNull(input, "input"); //$NON-NLS-1$
    Assert.isNotNull(collector, "collector"); //$NON-NLS-1$
    Assert.isTrue(isConfigured(), "Dialect should be configured by now"); //$NON-NLS-1$
    try {/*from   ww  w . j  a va 2  s  .co  m*/
        LogEntry currentEntry = null;
        IHasEncoding enc = (IHasEncoding) log.getAdapter(IHasEncoding.class);
        IHasLocale loc = (IHasLocale) log.getAdapter(IHasLocale.class);
        if (loc != null) {
            // Apply the locale
            getPatternTranslator().applyLocale(loc.getLocale(), rules);
        }
        IHasTimeZone tz = (IHasTimeZone) log.getAdapter(IHasTimeZone.class);
        if (tz != null) {
            // Apply the timezone
            getPatternTranslator().applyTimeZone(tz.getTimeZone(), rules);
        }
        LineIterator iter = IOUtils.lineIterator(input, enc.getEncoding());
        int minLinesPerEntry = getPatternTranslator().getMinLinesPerEntry();
        int lineNo = 0;
        int moreLinesToCome = 0;
        try {
            String line = null;
            while (iter.hasNext()) {
                lineNo++;

                if (minLinesPerEntry == 1) {
                    // Simple case
                    line = iter.nextLine();
                } else {
                    String s = iter.nextLine();
                    if (moreLinesToCome == 0) {
                        Matcher m = getInternalPatternFirstLine().matcher(s);
                        if (m.find()) {
                            // First line
                            line = s;
                            moreLinesToCome = minLinesPerEntry - 1;
                            continue;
                        } else {
                            // Some crazy stuff
                            line = s;
                        }
                    } else if (iter.hasNext() && (moreLinesToCome > 1)) {
                        // Some middle line
                        line += IOUtils.LINE_SEPARATOR + s;
                        moreLinesToCome--;
                        continue;
                    } else {
                        // Last line
                        line += IOUtils.LINE_SEPARATOR + s;
                        if (!iter.hasNext()) {
                            line += IOUtils.LINE_SEPARATOR;
                        }
                        moreLinesToCome = 0;
                    }
                }

                // Error handling
                List<IStatus> statuses = null;
                boolean fatal = false; // determines whether to interrupt parsing

                Matcher m = getInternalPatternFull().matcher(line);
                if (m.find()) {
                    // The next line matches, so flush the previous entry and continue
                    if (currentEntry != null) {
                        collector.collect(currentEntry);
                        currentEntry = null;
                    }
                    currentEntry = new LogEntry();
                    for (int i = 0; i < m.groupCount(); i++) {
                        try {
                            getPatternTranslator().extractField(currentEntry, getRules().get(i),
                                    m.group(i + 1));
                        } catch (CoreException e) {
                            // Mark for interruption
                            fatal = fatal || e.getStatus().matches(IStatus.ERROR);

                            // Messages will be displayed later
                            if (statuses == null) {
                                statuses = new ArrayList<IStatus>();
                            }
                            if (e.getStatus().isMultiStatus()) {
                                Collections.addAll(statuses, e.getStatus().getChildren());
                            } else {
                                statuses.add(e.getStatus());
                            }
                        }
                    }

                    // We encountered errors or warnings
                    if (statuses != null && !statuses.isEmpty()) {
                        currentEntry = null; // Stop propagation
                        IStatus status = new MultiStatus(PatternDialectPlugin.PLUGIN_ID, 0,
                                statuses.toArray(new IStatus[statuses.size()]),
                                NLS.bind(Messages.APatternDialect_error_failedToParseLine, lineNo), null);
                        if (fatal) {
                            // Interrupt parsing in case of error
                            throw new CoreException(status);
                        } else {
                            collector.addMessage(status);
                        }
                    }
                } else if (currentEntry != null) {
                    // Append to message
                    String msg = currentEntry.get(getFieldProvider().getMessageField());
                    currentEntry.put(getFieldProvider().getMessageField(), msg + IOUtils.LINE_SEPARATOR + line);
                }

                if (collector.isCanceled()) {
                    // Cancel parsing
                    break;
                }
            }

            if (currentEntry != null) {
                // Collect left over entry
                collector.collect(currentEntry);
            }
        } finally {
            LineIterator.closeQuietly(iter);
        }
    } catch (Exception e) {
        throw new CoreException(new Status(IStatus.ERROR, PatternDialectPlugin.PLUGIN_ID,
                NLS.bind(Messages.APatternDialect_error_failedToParseFile,
                        new Object[] { log.getName(), e.getLocalizedMessage() }),
                e));
    }
}