List of usage examples for org.apache.commons.io LineIterator hasNext
public boolean hasNext()
Reader
has more lines. From source file:com.ipcglobal.fredimport.process.ProcessReadmeSeriesId.java
/** * Read readme series id./*from ww w . jav a 2 s. c o m*/ * * @return the list * @throws Exception the exception */ public List<SeriesIdItem> readReadmeSeriesId() throws Exception { List<SeriesIdItem> seriesIdItems = new ArrayList<SeriesIdItem>(); boolean isHeaderRows = true; boolean isFooterRows = false; LineIterator it = FileUtils.lineIterator(new File(inputPathFredData + "README_SERIES_ID_SORT.txt"), "UTF-8"); int numLines = 0; try { while (it.hasNext()) { String line = it.nextLine(); numLines++; if (isHeaderRows) { if (line.startsWith("File")) isHeaderRows = false; } else if (isFooterRows) { } else { if (line.length() == 0) { isFooterRows = true; continue; } // Data row // File;Title; Units; Frequency; Seasonal Adjustment; Last Updated // Bypass all (DISCONTINUED SERIES) rows; if (line.indexOf("(DISCONTINUED SERIES)") > -1 || line.indexOf("(DISCONTINUED)") > -1 || line.indexOf("(Discontinued Series)") > -1) continue; String[] fields = splitFields(line); seriesIdItems.add(new SeriesIdItem().setCsvFileName(fields[0]) .setTitle(fields[1].replace("", "")).setUnits(fields[2]).setFrequency(fields[3]) .setSeasonalAdj(fields[4]).setLastUpdated(fields[5])); } if ((numLines % 25000) == 0) log.info("readReadmeSeriesId: read lines: " + numLines); } } finally { LineIterator.closeQuietly(it); } return seriesIdItems; }
From source file:net.sf.logsaw.dialect.pattern.APatternDialect.java
@Override public void parse(ILogResource log, InputStream input, ILogEntryCollector collector) throws CoreException { Assert.isNotNull(log, "log"); //$NON-NLS-1$ Assert.isNotNull(input, "input"); //$NON-NLS-1$ Assert.isNotNull(collector, "collector"); //$NON-NLS-1$ Assert.isTrue(isConfigured(), "Dialect should be configured by now"); //$NON-NLS-1$ try {//from ww w .ja va 2 s .co m LogEntry currentEntry = null; IHasEncoding enc = (IHasEncoding) log.getAdapter(IHasEncoding.class); IHasLocale loc = (IHasLocale) log.getAdapter(IHasLocale.class); if (loc != null) { // Apply the locale getPatternTranslator().applyLocale(loc.getLocale(), rules); } IHasTimeZone tz = (IHasTimeZone) log.getAdapter(IHasTimeZone.class); if (tz != null) { // Apply the timezone getPatternTranslator().applyTimeZone(tz.getTimeZone(), rules); } LineIterator iter = IOUtils.lineIterator(input, enc.getEncoding()); int minLinesPerEntry = getPatternTranslator().getMinLinesPerEntry(); int lineNo = 0; int moreLinesToCome = 0; try { String line = null; while (iter.hasNext()) { lineNo++; if (minLinesPerEntry == 1) { // Simple case line = iter.nextLine(); } else { String s = iter.nextLine(); if (moreLinesToCome == 0) { Matcher m = getInternalPatternFirstLine().matcher(s); if (m.find()) { // First line line = s; moreLinesToCome = minLinesPerEntry - 1; continue; } else { // Some crazy stuff line = s; } } else if (iter.hasNext() && (moreLinesToCome > 1)) { // Some middle line line += IOUtils.LINE_SEPARATOR + s; moreLinesToCome--; continue; } else { // Last line line += IOUtils.LINE_SEPARATOR + s; if (!iter.hasNext()) { line += IOUtils.LINE_SEPARATOR; } moreLinesToCome = 0; } } // Error handling List<IStatus> statuses = null; boolean fatal = false; // determines whether to interrupt parsing Matcher m = getInternalPatternFull().matcher(line); if (m.find()) { // The next line matches, so flush the previous entry and continue if (currentEntry != null) { collector.collect(currentEntry); currentEntry = null; } currentEntry = new LogEntry(); for (int i = 0; i < m.groupCount(); i++) { try { getPatternTranslator().extractField(currentEntry, getRules().get(i), m.group(i + 1)); } catch (CoreException e) { // Mark for interruption fatal = fatal || e.getStatus().matches(IStatus.ERROR); // Messages will be displayed later if (statuses == null) { statuses = new ArrayList<IStatus>(); } if (e.getStatus().isMultiStatus()) { Collections.addAll(statuses, e.getStatus().getChildren()); } else { statuses.add(e.getStatus()); } } } // We encountered errors or warnings if (statuses != null && !statuses.isEmpty()) { currentEntry = null; // Stop propagation IStatus status = new MultiStatus(PatternDialectPlugin.PLUGIN_ID, 0, statuses.toArray(new IStatus[statuses.size()]), NLS.bind(Messages.APatternDialect_error_failedToParseLine, lineNo), null); if (fatal) { // Interrupt parsing in case of error throw new CoreException(status); } else { collector.addMessage(status); } } } else if (currentEntry != null) { // Append to message String msg = currentEntry.get(getFieldProvider().getMessageField()); currentEntry.put(getFieldProvider().getMessageField(), msg + IOUtils.LINE_SEPARATOR + line); } if (collector.isCanceled()) { // Cancel parsing break; } } if (currentEntry != null) { // Collect left over entry collector.collect(currentEntry); } } finally { LineIterator.closeQuietly(iter); } } catch (Exception e) { throw new CoreException(new Status(IStatus.ERROR, PatternDialectPlugin.PLUGIN_ID, NLS.bind(Messages.APatternDialect_error_failedToParseFile, new Object[] { log.getName(), e.getLocalizedMessage() }), e)); } }
From source file:com.alexkli.osgi.troubleshoot.impl.TroubleshootServlet.java
private void includeResource(PrintWriter out, String path) { try {/*from ww w.jav a 2 s .c o m*/ if (!path.startsWith("/")) { path = "/" + path; } URL url = getClass().getResource(path); if (url == null) { // not found` return; } InputStream ins = url.openConnection().getInputStream(); LineIterator lineIterator = IOUtils.lineIterator(ins, "UTF-8"); boolean startComment = true; while (lineIterator.hasNext()) { String line = lineIterator.nextLine(); if (startComment) { String trimmed = line.trim(); if (!trimmed.isEmpty() && !trimmed.startsWith("/**") && !trimmed.startsWith("*")) { startComment = false; } } if (!startComment) { out.println(line); } } } catch (IOException e) { e.printStackTrace(); } }
From source file:de.tudarmstadt.ukp.clarin.webanno.tsv.WebannoTsv3Reader.java
/** * Iterate through lines and create span annotations accordingly. For * multiple span annotation, based on the position of the annotation in the * line, update only the end position of the annotation *///from w w w. java 2 s.co m private void setAnnotations(JCas aJCas, InputStream aIs, String aEncoding) throws IOException { // getting header information LineIterator lineIterator = IOUtils.lineIterator(aIs, aEncoding); int sentBegin = -1, sentEnd = 0; int prevSentEnd = 0; StringBuilder sentLineSb = new StringBuilder(); String lastSent = ""; while (lineIterator.hasNext()) { String line = lineIterator.next(); if (line.startsWith("#T_")) { setLayerAndFeature(aJCas, line); continue; } if (line.startsWith("#Text=")) { if (sentLineSb.toString().isEmpty()) { sentLineSb.append(line.substring(line.indexOf("=") + 1)); } else { sentLineSb.append(LF + line.substring(line.indexOf("=") + 1)); } lastSent = sentLineSb.toString(); continue; } if (line.startsWith("#FORMAT=")) { continue; } if (line.trim().isEmpty()) { if (!sentLineSb.toString().isEmpty()) { createSentence(aJCas, sentLineSb.toString(), sentBegin, sentEnd, prevSentEnd); prevSentEnd = sentEnd; sentBegin = -1;// reset for next sentence begin sentLineSb = new StringBuilder(); } continue; } line = line.trim(); int count = StringUtils.countMatches(line, "\t"); if (columns != count) { throw new IOException(fileName + " This is not a valid TSV File. check this line: " + line); } String regex = "(?<!\\\\)*" + Pattern.quote(TAB); String[] lines = line.split(regex); int begin = Integer.parseInt(lines[1].split("-")[0]); int end = Integer.parseInt(lines[1].split("-")[1]); if (sentBegin == -1) { sentBegin = begin; } sentEnd = end; AnnotationUnit unit = createTokens(aJCas, lines, begin, end); int ind = 3; setAnnosPerTypePerUnit(lines, unit, ind); } // the last sentence if (!lastSent.isEmpty()) { createSentence(aJCas, lastSent, sentBegin, sentEnd, prevSentEnd); } Map<Type, Map<AnnotationUnit, List<AnnotationFS>>> annosPerTypePerUnit = new HashMap<>(); setAnnosPerUnit(aJCas, annosPerTypePerUnit); addAnnotations(aJCas, annosPerTypePerUnit); addChainAnnotations(aJCas); }
From source file:com.daphne.es.showcase.excel.service.ExcelDataService.java
/** * csv?// w w w . j a v a 2s. c o m * @param user * @param is */ @Async public void importCvs(final User user, final InputStream is) { ExcelDataService proxy = ((ExcelDataService) AopContext.currentProxy()); BufferedInputStream bis = null; try { long beginTime = System.currentTimeMillis(); bis = new BufferedInputStream(is); String encoding = FileCharset.getCharset(bis); LineIterator iterator = IOUtils.lineIterator(bis, encoding); String separator = ","; int totalSize = 0; //? final List<ExcelData> dataList = Lists.newArrayList(); if (iterator.hasNext()) { iterator.nextLine();// } while (iterator.hasNext()) { totalSize++; String line = iterator.nextLine(); String[] dataArray = StringUtils.split(line, separator); ExcelData data = new ExcelData(); data.setId(Long.valueOf(dataArray[0])); data.setContent(dataArray[1]); dataList.add(data); if (totalSize % batchSize == 0) { try { proxy.doBatchSave(dataList); } catch (Exception e) { Long fromId = dataList.get(0).getId(); Long endId = dataList.get(dataList.size() - 1).getId(); log.error("from " + fromId + " to " + endId + ", error", e); } dataList.clear(); } } if (dataList.size() > 0) { proxy.doBatchSave(dataList); } long endTime = System.currentTimeMillis(); Map<String, Object> context = Maps.newHashMap(); context.put("seconds", (endTime - beginTime) / 1000); notificationApi.notify(user.getId(), "excelImportSuccess", context); } catch (Exception e) { log.error("excel import error", e); Map<String, Object> context = Maps.newHashMap(); context.put("error", e.getMessage()); notificationApi.notify(user.getId(), "excelImportError", context); } finally { IOUtils.closeQuietly(bis); } }
From source file:edu.ku.brc.util.HelpIndexer.java
protected void processFile(final File file, final Vector<String> lines) { // System.out.println("processing file: " + file.getName()); LineIterator it; try {//w w w.ja va 2s . c o m it = FileUtils.lineIterator(file, "UTF-8"); } catch (IOException ex) { edu.ku.brc.af.core.UsageTracker.incrHandledUsageCount(); edu.ku.brc.exceptions.ExceptionTracker.getInstance().capture(HelpIndexer.class, ex); System.out.println("error processing file: " + file.getName()); return; } String target = getTarget(file); String title = getFileTitle(file); boolean removeTitleEntry = false; if (title != null) { String tline = "<indexitem text=\"" + title; if (target != null) { tline += "\" target=\"" + target; } tline += "\">"; lines.add(tline); removeTitleEntry = true; } if (target != null) { try { while (it.hasNext()) { String line = it.nextLine(); //System.out.println(line); if (isIndexLine(line)) { System.out.println("indexing " + file.getName() + ": " + line); String indexEntry = processIndexLine(line, target); if (indexEntry != null) { lines.add(" " + indexEntry); removeTitleEntry = false; } } } } finally { LineIterator.closeQuietly(it); } } if (title != null && !removeTitleEntry) { lines.add("</indexitem>"); } if (removeTitleEntry) { lines.remove(lines.size() - 1); } }
From source file:itemsetmining.itemset.ItemsetTree.java
/** * Build the itemset-tree based on an input file containing transactions * * @param input/* w w w. jav a 2 s . c o m*/ * an input file * @return */ public void buildTree(final File inputFile) throws IOException { // record start time startTimestamp = System.currentTimeMillis(); // reset memory usage statistics MemoryLogger.getInstance().reset(); // create an empty root for the tree root = new ItemsetTreeNode(null, 0); // Scan the database to read the transactions int count = 0; final LineIterator it = FileUtils.lineIterator(inputFile, "UTF-8"); while (it.hasNext()) { final String line = it.nextLine(); // if the line is a comment, is empty or is a // kind of metadata if (line.isEmpty() == true || line.charAt(0) == '#' || line.charAt(0) == '%' || line.charAt(0) == '@') { continue; } // add transaction to the tree addTransaction(line); count++; } // close the input file LineIterator.closeQuietly(it); // set the number of transactions noTransactions = count; // check the memory usage MemoryLogger.getInstance().checkMemory(); endTimestamp = System.currentTimeMillis(); }
From source file:edu.cornell.med.icb.goby.modes.EmpiricalPMode.java
private void scan() throws FileNotFoundException { LineIterator iterator = new LineIterator(new FastBufferedReader(new FileReader(inputFilename))); int lineNumber = 0; ObjectArrayList<String> elementIds = new ObjectArrayList<String>(); IntArrayList valuesA = new IntArrayList(); IntArrayList valuesB = new IntArrayList(); IntArrayList covariatesA = new IntArrayList(); IntArrayList covariatesB = new IntArrayList(); counter = new FormatFieldCounter(0, 2, 2, new String[] { "ALL" }); setupOutput();/*w w w .j av a2 s . c om*/ // ignore the header line: iterator.next(); ProgressLogger pg = new ProgressLogger(LOG); pg.displayFreeMemory = true; pg.itemsName = "pairs"; pg.expectedUpdates = countLines(inputFilename) - 1; pg.start("Starting to scan pairs."); while (iterator.hasNext()) { String next = iterator.nextLine(); String[] tokens = next.split("\t"); boolean pastIds = false; boolean pastValues = false; String typeOfPairString = tokens[0]; ObservationWriter.TypeOfPair typeOfPair = ObservationWriter.TypeOfPair.UNDEFINED; for (int i = 0; i < tokens.length; i++) { try { typeOfPair = ObservationWriter.TypeOfPair.valueOf(typeOfPairString); } catch (IllegalArgumentException e) { System.err.println( "First token of every line should be WITHIN_GROUP_PAIR or BETWEEN_GROUP_PAIR. Found " + typeOfPairString + " on line " + lineNumber); System.exit(1); } elementIds.clear(); valuesA.clear(); valuesB.clear(); covariatesA.clear(); covariatesB.clear(); int j; String groupComparison = tokens[1]; elementIds.add(groupComparison); for (j = 2; !"VALUES_A".equals(tokens[j]); j++) { if (j == tokens.length) { break; } elementIds.add(tokens[j]); } if (j == tokens.length) { System.err.println( "Every line must contain the VALUES keyword. Keyword not found on line " + lineNumber); System.exit(1); } j++; for (; !"VALUES_B".equals(tokens[j]); j++) { if (j == tokens.length) { break; } valuesA.add(Integer.parseInt(tokens[j])); } j++; for (; !"COVARIATES_A".equals(tokens[j]); j++) { if (j == tokens.length) { break; } valuesB.add(Integer.parseInt(tokens[j])); } if (j == tokens.length) { System.err .println("Every line must contain the COVARIATES_A keyword. Keyword not found on line " + lineNumber); System.exit(1); } j++; for (; !"COVARIATES_B".equals(tokens[j]); j++) { if (j == tokens.length) { break; } covariatesA.add(Integer.parseInt(tokens[j])); } if (j == tokens.length) { System.err .println("Every line must contain the COVARIATES_B keyword. Keyword not found on line " + lineNumber); System.exit(1); } j++; for (; j < tokens.length; j++) { covariatesB.add(Integer.parseInt(tokens[j])); } } lineNumber++; final String groupComparison = elementIds.get(0); process(typeOfPair, groupComparison, elementIds, valuesA, valuesB, covariatesA, covariatesB); pg.lightUpdate(); } pg.done(lineNumber); }
From source file:at.ac.ait.ubicity.fileloader.FileLoader.java
/** * //from ww w . j a v a 2 s . c om * @param _fileInfo A FileInformation object representing usage information on the file we are supposed to load: line count already ingested, last usage time... * @param _keySpace Cassandra key space into which to ingest * @param _host Cassandra host / server * @param _batchSize MutationBatch size * @throws Exception Shouldn't happen, although the Disruptor may throw an Exception under duress */ @SuppressWarnings("unchecked") public final static void load(final FileInformation _fileInfo, final String _keySpace, final String _host, final int _batchSize) throws Exception { if (!cassandraInitialized) { keySpace = AstyanaxInitializer.doInit("Test Cluster", _host, _keySpace); cassandraInitialized = true; } LongTimeStampSorter tsSorter = new LongTimeStampSorter(); Thread tTSSorter = new Thread(tsSorter); tTSSorter.setPriority(Thread.MAX_PRIORITY - 1); tTSSorter.setName("long timestamp sorter "); tTSSorter.start(); //get the log id from the file's URI final String log_id = _fileInfo.getURI().toString(); final MutationBatch batch = keySpace.prepareMutationBatch(); logger.info("got keyspace " + keySpace.getKeyspaceName() + " from Astyanax initializer"); final LineIterator onLines = FileUtils.lineIterator(new File(_fileInfo.getURI())); final ExecutorService exec = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * 2); ColumnFamily crawl_stats = null; AggregationJob aggregationJob = new AggregationJob(keySpace, crawl_stats); Thread tAggJob = new Thread(aggregationJob); tAggJob.setName("Monitrix loader / aggregation job "); tAggJob.setPriority(Thread.MIN_PRIORITY + 1); tAggJob.start(); logger.info("[FILELOADER] started aggregation job, ring buffer running"); final Disruptor<SingleLogLineAsString> disruptor = new Disruptor(SingleLogLineAsString.EVENT_FACTORY, (int) Math.pow(TWO, 17), exec); SingleLogLineAsStringEventHandler.batch = batch; SingleLogLineAsStringEventHandler.keySpace = keySpace; SingleLogLineAsStringEventHandler.batchSize = _batchSize; SingleLogLineAsStringEventHandler.LOG_ID = log_id; SingleLogLineAsStringEventHandler.tsSorter = tsSorter; SingleLogLineAsStringEventHandler.aggregationJob = aggregationJob; //The EventHandler contains the actual logic for ingesting final EventHandler<SingleLogLineAsString> handler = new SingleLogLineAsStringEventHandler(); disruptor.handleEventsWith(handler); //get our Aggregate job in place //we are almost ready to start final RingBuffer<SingleLogLineAsString> rb = disruptor.start(); int _lineCount = 0; long _start, _lapse; _start = System.nanoTime(); int _linesAlreadyProcessed = _fileInfo.getLineCount(); //cycle through the lines already processed while (_lineCount < _linesAlreadyProcessed) { onLines.nextLine(); _lineCount++; } //now get down to the work we actually must do, and fill the ring buffer logger.info("begin proccessing of file " + _fileInfo.getURI() + " @line #" + _lineCount); while (onLines.hasNext()) { final long _seq = rb.next(); final SingleLogLineAsString event = rb.get(_seq); event.setValue(onLines.nextLine()); rb.publish(_seq); _lineCount++; } _lapse = System.nanoTime() - _start; logger.info("ended proccessing of file " + _fileInfo.getURI() + " @line #" + _lineCount); //stop, waiting for last threads still busy to finish their work disruptor.shutdown(); //update the file info, this will land in the cache _fileInfo.setLineCount(_lineCount); _fileInfo.setLastAccess(System.currentTimeMillis()); int _usageCount = _fileInfo.getUsageCount(); _fileInfo.setUsageCount(_usageCount++); //make sure we release resources onLines.close(); logger.info( "handled " + (_lineCount - _linesAlreadyProcessed) + " log lines in " + _lapse + " nanoseconds"); //now go to aggregation step SortedSet<Long> timeStamps = new TreeSet(tsSorter.timeStamps); long _minTs = timeStamps.first(); long _maxTs = timeStamps.last(); logger.info("**** min TimeStamp = " + _minTs); logger.info("**** max TimeStamp = " + _maxTs); StatsTableActualizer.update(_fileInfo.getURI().toString(), _minTs, _maxTs, _lineCount); // AggregationJob aggJob = new AggregationJob( keySpace, _host, _batchSize ); // Thread tAgg = new Thread( aggJob ); // tAgg.setName( "aggregation job " ); // tAgg.setPriority( Thread.MAX_PRIORITY - 1 ); // tAgg.start(); }
From source file:au.org.ala.names.search.ALANameIndexer.java
public void init() throws Exception { tnse = new TaxonNameSoundEx(); // init the known homonyms LineIterator lines = new LineIterator( new BufferedReader(new InputStreamReader(this.getClass().getClassLoader() .getResource("au/org/ala/propertystore/known_homonyms.txt").openStream(), "ISO-8859-1"))); LineIterator blines = new LineIterator(new BufferedReader( new InputStreamReader(this.getClass().getClassLoader().getResource("blacklist.txt").openStream()))); try {//w w w . j av a 2 s .co m //load known homonyms while (lines.hasNext()) { String line = lines.nextLine().trim(); knownHomonyms.add(line.toUpperCase()); } //load the blacklist while (blines.hasNext()) { String line = blines.nextLine().trim(); if (!line.startsWith("#") && StringUtils.isNotBlank(line)) blacklist.add(line); } } catch (Exception e) { e.printStackTrace(); } finally { lines.close(); blines.close(); } }