List of usage examples for org.apache.commons.io IOUtils lineIterator
public static LineIterator lineIterator(InputStream input, String encoding) throws IOException
InputStream
, using the character encoding specified (or default encoding if null). From source file:io.druid.query.aggregation.AggregationTestHelper.java
public void createIndex(InputStream inputDataStream, String parserJson, String aggregators, File outDir, long minTimestamp, QueryGranularity gran, int maxRowCount) throws Exception { try {/* ww w . j av a2s . c o m*/ StringInputRowParser parser = mapper.readValue(parserJson, StringInputRowParser.class); LineIterator iter = IOUtils.lineIterator(inputDataStream, "UTF-8"); List<AggregatorFactory> aggregatorSpecs = mapper.readValue(aggregators, new TypeReference<List<AggregatorFactory>>() { }); createIndex(iter, parser, aggregatorSpecs.toArray(new AggregatorFactory[0]), outDir, minTimestamp, gran, true, maxRowCount); } finally { Closeables.close(inputDataStream, true); } }
From source file:edu.smu.tspell.wordnet.impl.file.SampleSentenceData.java
/** * Reads a data file and returns a map that encapsulates the file's * content.//ww w. ja va 2s . c om * * @param fileName Name of the file from which to read key / value pairs. * @return Map that encapsulates the data read. * @throws IOException An error occurred reading the file. */ private Map createMap(String fileName) throws IOException { int index; String key; String value; // Create a map to hold the results Map keySentences = new HashMap(); // Open the file and start reading it InputStream file = getClass().getResourceAsStream(PropertyNames.databaseDirectory + fileName); LineIterator iterator = IOUtils.lineIterator(file, null); // Loop through all lines in the file while (iterator.hasNext()) { String line = iterator.nextLine(); // Find out where the key ends index = line.indexOf(KEY_TERMINATOR); // Get the key and value key = line.substring(0, index); value = line.substring(index + 1).trim(); // Add them to the cache and read the next line putKeyValuePair(keySentences, key, value); } file.close(); return keySentences; }
From source file:de.tudarmstadt.ukp.clarin.webanno.tsv.WebannoTsv3Reader.java
/** * Iterate through lines and create span annotations accordingly. For * multiple span annotation, based on the position of the annotation in the * line, update only the end position of the annotation *//*from w w w .j ava 2 s .co m*/ private void setAnnotations(JCas aJCas, InputStream aIs, String aEncoding) throws IOException { // getting header information LineIterator lineIterator = IOUtils.lineIterator(aIs, aEncoding); int sentBegin = -1, sentEnd = 0; int prevSentEnd = 0; StringBuilder sentLineSb = new StringBuilder(); String lastSent = ""; while (lineIterator.hasNext()) { String line = lineIterator.next(); if (line.startsWith("#T_")) { setLayerAndFeature(aJCas, line); continue; } if (line.startsWith("#Text=")) { if (sentLineSb.toString().isEmpty()) { sentLineSb.append(line.substring(line.indexOf("=") + 1)); } else { sentLineSb.append(LF + line.substring(line.indexOf("=") + 1)); } lastSent = sentLineSb.toString(); continue; } if (line.startsWith("#FORMAT=")) { continue; } if (line.trim().isEmpty()) { if (!sentLineSb.toString().isEmpty()) { createSentence(aJCas, sentLineSb.toString(), sentBegin, sentEnd, prevSentEnd); prevSentEnd = sentEnd; sentBegin = -1;// reset for next sentence begin sentLineSb = new StringBuilder(); } continue; } line = line.trim(); int count = StringUtils.countMatches(line, "\t"); if (columns != count) { throw new IOException(fileName + " This is not a valid TSV File. check this line: " + line); } String regex = "(?<!\\\\)*" + Pattern.quote(TAB); String[] lines = line.split(regex); int begin = Integer.parseInt(lines[1].split("-")[0]); int end = Integer.parseInt(lines[1].split("-")[1]); if (sentBegin == -1) { sentBegin = begin; } sentEnd = end; AnnotationUnit unit = createTokens(aJCas, lines, begin, end); int ind = 3; setAnnosPerTypePerUnit(lines, unit, ind); } // the last sentence if (!lastSent.isEmpty()) { createSentence(aJCas, lastSent, sentBegin, sentEnd, prevSentEnd); } Map<Type, Map<AnnotationUnit, List<AnnotationFS>>> annosPerTypePerUnit = new HashMap<>(); setAnnosPerUnit(aJCas, annosPerTypePerUnit); addAnnotations(aJCas, annosPerTypePerUnit); addChainAnnotations(aJCas); }
From source file:de.tudarmstadt.ukp.dkpro.core.io.penntree.PennTreebankCombinedReader.java
@Override public void getNext(JCas aJCas) throws IOException, CollectionException { Resource res = nextFile();//from w w w . j ava2s . com initCas(aJCas.getCas(), res); try { posMappingProvider.configure(aJCas.getCas()); constituentMappingProvider.configure(aJCas.getCas()); } catch (AnalysisEngineProcessException e) { throw new IOException(e); } StringBuilder text = new StringBuilder(); try (InputStream is = res.getInputStream()) { LineIterator li = IOUtils.lineIterator(is, encoding); while (li.hasNext()) { PennTreeNode tree = readTree(li); if (removeTraces) { doRemoveTraces(tree); } Constituent root = converter.convertPennTree(aJCas, text, tree); Sentence sentence = new Sentence(aJCas, root.getBegin(), root.getEnd()); sentence.addToIndexes(); text.append('\n'); } } aJCas.setDocumentText(text.toString()); }
From source file:io.cloudslang.lang.tools.build.ArgumentProcessorUtils.java
public static Set<String> loadChangedItems(String filePath) throws IOException { Set<String> changedItems = new HashSet<>(); try (InputStream fileInputStream = new FileInputStream(filePath)) { LineIterator lineIterator = IOUtils.lineIterator(fileInputStream, SlangSource.getCloudSlangCharset()); while (lineIterator.hasNext()) { changedItems.add(lineIterator.next()); }// w w w . java 2 s.c o m } return changedItems; }
From source file:com.cheusov.Jrep.java
private static void processFile(InputStream in, String filename) throws IOException { Iterator<String> it; if (wholeContent) { String fileContent = IOUtils.toString(in, encoding); it = Arrays.asList(fileContent).iterator(); } else {// w ww.j av a 2 s . c om it = IOUtils.lineIterator(in, encoding); } int matchCount = 0; List<Pair<Integer, Integer>> startend = null; int lineNumber = 0; int lastMatchedLineNumber = 0; Map<Integer, String> lines = new HashMap<Integer, String>(); while (it.hasNext()) { ++lineNumber; String line = (String) it.next(); if (opt_B > 0) { lines.put(lineNumber, line); lines.remove(lineNumber - opt_B - 1); } boolean matched = false; boolean nextFile = false; if (!inverseMatch && !outputFilename && !opt_o && !opt_L && !opt_c) startend = new ArrayList<Pair<Integer, Integer>>(); String lineToPrint = null; for (JrepPattern pattern : patterns) { int pos = 0; JrepMatchResult m = pattern.matcher(line); int lineLength = line.length(); // boolean nextLine = false; while (pos < lineLength && m.find(pos)) { matched = true; if (outputFilename || opt_c || opt_L || inverseMatch) { break; } else if (opt_o) { printlnWithPrefix(filename, getOutputString(line, m), lineNumber, ':'); } else if (colorEscStart != null) { startend.add(Pair.of(m.start(), m.end())); } pos = m.end(); if (m.start() == m.end()) ++pos; } } matched ^= inverseMatch; if (matched) { if (exitStatus == 1) exitStatus = 0; if (!outputFilename && !opt_o && !opt_L && !opt_c) { if (colorEscStart == null || startend == null) lineToPrint = line; else lineToPrint = getLineToPrint(line, startend); } if (outputFilename) { println(filename); nextFile = true; } } if (lineToPrint != null) { for (int prevLineNumber = lineNumber - opt_B; prevLineNumber < lineNumber; ++prevLineNumber) { String prevLine = lines.get(prevLineNumber); if (prevLine != null) { lines.remove(prevLineNumber); printlnWithPrefix(filename, prevLine, prevLineNumber, '-'); } } lastMatchedLineNumber = lineNumber; lines.remove(lineNumber); printlnWithPrefix(filename, lineToPrint, lineNumber, ':'); } else if (lastMatchedLineNumber > 0 && lastMatchedLineNumber + opt_A >= lineNumber) { lines.remove(lineNumber); printlnWithPrefix(filename, line, lineNumber, '-'); } if (matched) { ++matchCount; if (matchCount == opt_m) nextFile = true; } if (nextFile) break; } if (opt_L && matchCount == 0) println(filename); if (opt_c) printlnWithPrefix(filename, "" + matchCount, lineNumber, ':'); }
From source file:de.tudarmstadt.ukp.clarin.webanno.tsv.WebannoTsv1Reader.java
/** * Iterate through all lines and get available annotations<br> * First column is sentence number and a blank new line marks end of a sentence<br> * The Second column is the token <br> * The third column is the lemma annotation <br> * The fourth column is the POS annotation <br> * The fifth column is used for Named Entity annotations (Multiple annotations separeted by | * character) <br>// w w w . j av a2 s . co m * The sixth column is the origin token number of dependency parsing <br> * The seventh column is the function/type of the dependency parsing <br> * eighth and ninth columns are undefined currently */ private void setAnnotations(InputStream aIs, String aEncoding, StringBuilder text, Map<Integer, String> tokens, Map<Integer, String> pos, Map<Integer, String> lemma, Map<Integer, String> namedEntity, Map<Integer, String> dependencyFunction, Map<Integer, Integer> dependencyDependent, List<Integer> firstTokenInSentence) throws IOException { int tokenNumber = 0; boolean first = true; int base = 0; LineIterator lineIterator = IOUtils.lineIterator(aIs, aEncoding); boolean textFound = false; StringBuffer tmpText = new StringBuffer(); while (lineIterator.hasNext()) { String line = lineIterator.next().trim(); if (line.startsWith("#text=")) { text.append(line.substring(6) + "\n"); textFound = true; continue; } if (line.startsWith("#")) { continue;// it is a comment line } int count = StringUtils.countMatches(line, "\t"); if (line.isEmpty()) { continue; } if (count != 9) {// not a proper TSV file getUimaContext().getLogger().log(Level.INFO, "This is not a valid TSV File"); throw new IOException(fileName + " This is not a valid TSV File"); } StringTokenizer lineTk = new StringTokenizer(line, "\t"); if (first) { tokenNumber = Integer.parseInt(line.substring(0, line.indexOf("\t"))); firstTokenInSentence.add(tokenNumber); first = false; } else { int lineNumber = Integer.parseInt(line.substring(0, line.indexOf("\t"))); if (lineNumber == 1) { base = tokenNumber; firstTokenInSentence.add(base); } tokenNumber = base + Integer.parseInt(line.substring(0, line.indexOf("\t"))); } while (lineTk.hasMoreElements()) { lineTk.nextToken(); String token = lineTk.nextToken(); // for backward compatibility tmpText.append(token + " "); tokens.put(tokenNumber, token); lemma.put(tokenNumber, lineTk.nextToken()); pos.put(tokenNumber, lineTk.nextToken()); String ne = lineTk.nextToken(); lineTk.nextToken();// make it compatible with prev WebAnno TSV reader namedEntity.put(tokenNumber, (ne.equals("_") || ne.equals("-")) ? "O" : ne); String dependentValue = lineTk.nextToken(); if (NumberUtils.isDigits(dependentValue)) { int dependent = Integer.parseInt(dependentValue); dependencyDependent.put(tokenNumber, dependent == 0 ? 0 : base + dependent); dependencyFunction.put(tokenNumber, lineTk.nextToken()); } else { lineTk.nextToken(); } lineTk.nextToken(); lineTk.nextToken(); } } if (!textFound) { text.append(tmpText); } }
From source file:com.anrisoftware.globalpom.initfileparser.InitFileParserImpl.java
private LineIterator createLineIterator() throws InitFileParserException { try {//from w w w . ja v a 2 s .com return IOUtils.lineIterator(stream, charset); } catch (IOException e) { throw log.lineIteratorError(this, e); } }
From source file:com.adobe.acs.tools.tag_maker.impl.TagMakerServlet.java
private InputStream stripLineEnds(InputStream is, String charset, char chartoStrip) throws IOException { log.debug("Stripping [ {} ] from the end of lines.", chartoStrip); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final PrintStream printStream = new PrintStream(baos); final LineIterator lineIterator = IOUtils.lineIterator(is, charset); while (lineIterator.hasNext()) { String line = StringUtils.stripToNull(lineIterator.next()); if (line != null) { line = StringUtils.stripEnd(line, String.valueOf(chartoStrip)); printStream.println(line);// ww w .j a v a 2 s . c o m } } return new ByteArrayInputStream(baos.toByteArray()); }
From source file:net.sf.logsaw.dialect.pattern.APatternDialect.java
@Override public void parse(ILogResource log, InputStream input, ILogEntryCollector collector) throws CoreException { Assert.isNotNull(log, "log"); //$NON-NLS-1$ Assert.isNotNull(input, "input"); //$NON-NLS-1$ Assert.isNotNull(collector, "collector"); //$NON-NLS-1$ Assert.isTrue(isConfigured(), "Dialect should be configured by now"); //$NON-NLS-1$ try {/*from ww w . j a va 2 s .co m*/ LogEntry currentEntry = null; IHasEncoding enc = (IHasEncoding) log.getAdapter(IHasEncoding.class); IHasLocale loc = (IHasLocale) log.getAdapter(IHasLocale.class); if (loc != null) { // Apply the locale getPatternTranslator().applyLocale(loc.getLocale(), rules); } IHasTimeZone tz = (IHasTimeZone) log.getAdapter(IHasTimeZone.class); if (tz != null) { // Apply the timezone getPatternTranslator().applyTimeZone(tz.getTimeZone(), rules); } LineIterator iter = IOUtils.lineIterator(input, enc.getEncoding()); int minLinesPerEntry = getPatternTranslator().getMinLinesPerEntry(); int lineNo = 0; int moreLinesToCome = 0; try { String line = null; while (iter.hasNext()) { lineNo++; if (minLinesPerEntry == 1) { // Simple case line = iter.nextLine(); } else { String s = iter.nextLine(); if (moreLinesToCome == 0) { Matcher m = getInternalPatternFirstLine().matcher(s); if (m.find()) { // First line line = s; moreLinesToCome = minLinesPerEntry - 1; continue; } else { // Some crazy stuff line = s; } } else if (iter.hasNext() && (moreLinesToCome > 1)) { // Some middle line line += IOUtils.LINE_SEPARATOR + s; moreLinesToCome--; continue; } else { // Last line line += IOUtils.LINE_SEPARATOR + s; if (!iter.hasNext()) { line += IOUtils.LINE_SEPARATOR; } moreLinesToCome = 0; } } // Error handling List<IStatus> statuses = null; boolean fatal = false; // determines whether to interrupt parsing Matcher m = getInternalPatternFull().matcher(line); if (m.find()) { // The next line matches, so flush the previous entry and continue if (currentEntry != null) { collector.collect(currentEntry); currentEntry = null; } currentEntry = new LogEntry(); for (int i = 0; i < m.groupCount(); i++) { try { getPatternTranslator().extractField(currentEntry, getRules().get(i), m.group(i + 1)); } catch (CoreException e) { // Mark for interruption fatal = fatal || e.getStatus().matches(IStatus.ERROR); // Messages will be displayed later if (statuses == null) { statuses = new ArrayList<IStatus>(); } if (e.getStatus().isMultiStatus()) { Collections.addAll(statuses, e.getStatus().getChildren()); } else { statuses.add(e.getStatus()); } } } // We encountered errors or warnings if (statuses != null && !statuses.isEmpty()) { currentEntry = null; // Stop propagation IStatus status = new MultiStatus(PatternDialectPlugin.PLUGIN_ID, 0, statuses.toArray(new IStatus[statuses.size()]), NLS.bind(Messages.APatternDialect_error_failedToParseLine, lineNo), null); if (fatal) { // Interrupt parsing in case of error throw new CoreException(status); } else { collector.addMessage(status); } } } else if (currentEntry != null) { // Append to message String msg = currentEntry.get(getFieldProvider().getMessageField()); currentEntry.put(getFieldProvider().getMessageField(), msg + IOUtils.LINE_SEPARATOR + line); } if (collector.isCanceled()) { // Cancel parsing break; } } if (currentEntry != null) { // Collect left over entry collector.collect(currentEntry); } } finally { LineIterator.closeQuietly(iter); } } catch (Exception e) { throw new CoreException(new Status(IStatus.ERROR, PatternDialectPlugin.PLUGIN_ID, NLS.bind(Messages.APatternDialect_error_failedToParseFile, new Object[] { log.getName(), e.getLocalizedMessage() }), e)); } }