List of usage examples for org.apache.commons.io LineIterator closeQuietly
public static void closeQuietly(LineIterator iterator)
From source file:org.gbif.harvest.digir.DigirHarvester.java
/** * Iterates over the nameRanges file, executing a search for each range. A * single search retrieves all records for the concept of interest that fall * in the specified range. If the concept were scientific name, for example, * the searches would collectively retrieve all the destination's records * for all scientific names contained in the resource of interest. This * information, collected as raw xml responses, is saved in the output * directory. The files written this way depend on the assumption * that at most one harvester will be targeting a given resource/name range * pair at a time./*from w w w .j ava 2s . co m*/ * * @param resource to query * @param destination of the DiGIR server to issue against * @param outputDirectory where files of interest are located * @param protocol name of harvester * @param maxSearchResponse maximum number of records returned in a response * @param targetCount target count of resource * * @throws HarvesterException thrown if method fails */ public void search(String resource, String destination, String outputDirectory, String protocol, String maxSearchResponse, int targetCount) throws HarvesterException { if (debug) { log.debug(">> search"); } // set the request response writer to the right directory searchWriter.setFileOutputDirectory(outputDirectory); // setup the NameRangeHandler for this search NameRangeHandler nameRangeHandler = new NameRangeHandler(outputDirectory, fileUtils); // get iterator over name ranges LineIterator nameRangeIter = nameRangeHandler.getNameRangeIterator(); // build the parameters required for the template into a map Map<String, String> templateParams = new HashMap<String, String>(); templateParams.put("resource", resource); templateParams.put("destination", destination); // gather the request content namespaces Properties mapping = new Properties(); String mappingFilePath = fileUtils.constructMappingFilePath(BASE_LOCATION, protocol, DigirHarvester.MAPPING_DIRECTORY_NAME, DigirHarvester.REQUEST_NAMESPACE_MAPPING_FILENAME); if (debug) { log.debug("Reading mappingFile from [" + mappingFilePath + "]"); } InputStream is = null; try { is = getClass().getResourceAsStream(mappingFilePath); if (is == null) { throw new HarvesterException( "Namespace mapping file not found, aborting search [" + mappingFilePath + "]"); } mapping.load(is); for (Object key : mapping.keySet()) { requestNamespaceMappings.put((String) key, mapping.getProperty((String) key)); } } catch (IOException e) { log.error("Problem loading request namespace mapping file", e); throw new HarvesterException(e.getMessage(), e); } finally { if (is != null) { try { is.close(); } catch (IOException e) { log.error( "An error occurred closing input stream on " + mappingFilePath + ": " + e.getMessage(), e); } } } // set the default DwC request namespace String contentNamespace = DigirHarvester.DEFAULT_CONTENT_NAMESPACE; for (String property : requestNamespaceMappings.keySet()) { if (StringUtils.equalsIgnoreCase("full", property)) { contentNamespace = requestNamespaceMappings.get(property); } } if (debug) { log.debug("The request content namespace that will be used is [" + contentNamespace + "]"); } templateParams.put("contentNamespace", contentNamespace); // determine max search response size int maxResponseSize = -1; try { maxResponseSize = Integer.valueOf(maxSearchResponse); } catch (Exception e) { log.warn("Error reading maxResponseSize [" + maxSearchResponse + "], please check that it has been set properly by the provider", e); } if (maxResponseSize < Constants.MIN_QUERY_TERM_LENGTH || maxResponseSize > MAX_RESPONSE_SIZE) { maxResponseSize = MAX_RESPONSE_SIZE; log.info("Defaulting maximum number of search records returned in a single response [" + String.valueOf(MAX_RESPONSE_SIZE) + "]"); } else { log.info("The maximum number of search records returned in a single response has been set to [" + String.valueOf(MAX_RESPONSE_SIZE) + "]"); } // grab the gbif log message output file File gbifLogMessageFile = new File(outputDirectory, Constants.GBIF_LOG_MESSAGE_FILENAME + Constants.TEXT_FILENAME_EXTENSION); // created buffered writer BufferedWriter gbifLogMessageFileBW = null; try { gbifLogMessageFileBW = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(gbifLogMessageFile, true), "UTF8")); } catch (IOException e) { log.error("Couldn't open GBIF 'Harvesting' log event file", e); } // each line in the file is a name range int requestCount = 0; int sameRequestCount = 0; while (nameRangeIter.hasNext()) { // For each line, extract the lower and upper name ranges String line = nameRangeIter.nextLine(); String lower = StringUtils.strip(fileUtils.getDelimitedPart(line, "\t", 0)); String upper = StringUtils.strip(fileUtils.getDelimitedPart(line, "\t", 1)); if (StringUtils.isBlank(lower) || StringUtils.isBlank(upper)) { // something is wrong with that range - log and ignore log.warn("Found invalid name range lower [" + lower + "] and upper [" + upper + "] - ignoring and continuing with next range"); continue; } log.info("Start harvesting range [" + lower + " - " + upper + "]"); // add range parameters to templateParams templateParams.put("lower", lower); templateParams.put("upper", upper); Integer startAt = 0; Integer innerMaxResults = maxResponseSize; boolean finished = false; boolean fireSameSearch = false; Integer endOfErrorRange = null; while (!finished) { templateParams.put("startAt", startAt.toString()); templateParams.put("maxResults", innerMaxResults.toString()); boolean inError = false; try { finished = fireSearch(destination, templateParams, protocol, gbifLogMessageFileBW, requestCount); } catch (SAXParseException saxe) { log.info("SAX Parse exception in parsing search response starting from [" + startAt + "] with maxResults [" + innerMaxResults + "]", saxe); // the chances are good that this error is in only one of the records coming back // so we can try again and start narrowing the size of the response until we find // the one causing the problem. It also may have been a transient error, so just // retrying would be enough to fix it. In that case we watch for a startAt beyond // this error range and when that is hit, bump the search back up to maxResponseSize. if (innerMaxResults == 1) { // we've found exactly one record that is in error - log in detail and move on log.warn( "Search record in error - record num [" + startAt + "]" + "in range lower [" + lower + "] upper [" + upper + "] " + "from destination [" + destination + "]", saxe); // assume that most errors are bad chars in a single record, so now set max results back to full endOfErrorRange = null; innerMaxResults = maxResponseSize; startAt++; // start should never exceed targetCount (avoid endless looping - skip range) if (startAt > targetCount) { log.error("Inside retry loop: request parameter startAt exceeded targetCount: " + String.valueOf(targetCount) + " Skipping range lower [" + lower + "] upper [" + upper + "]"); break; } else { continue; } } else { inError = true; endOfErrorRange = startAt + innerMaxResults; } } catch (SAXException e) { // we don't know enough to recover, so write this range out as failed, and move on to next range nameRangeHandler.appendFailedRange(lower, upper); break; } catch (SocketException se) { // the connection was reset, so re-issue the exact same request. // We have already waited 3 minutes to give the provider software the chance to reset in fireSearch() sameRequestCount++; // only re-issue same request on SocketException 5 times if (sameRequestCount <= 4) { log.info("Re-issue same request (" + String.valueOf(sameRequestCount) + ")"); inError = true; fireSameSearch = true; } else { log.info("Exceeded max number of possible re-issue same request"); nameRangeHandler.appendFailedRange(lower, upper); break; } } // was the operation stopped? catch (OperationStoppedException e) { throw new HarvesterException(e.getMessage(), e); } // setup for the next pass if (!finished) { if (inError && !fireSameSearch) { innerMaxResults = Math.max(1, innerMaxResults / DIVISOR); } else if (inError && fireSameSearch) { // parameters stay the same } else { requestCount++; if (endOfErrorRange != null && (startAt > endOfErrorRange)) { if (debug) { log.debug("StartAt passed endOfErrorRange, resetting max results"); } innerMaxResults = maxResponseSize; endOfErrorRange = null; } startAt = startAt + innerMaxResults; } } else { // successful completion of range nameRangeHandler.registerSuccessfulRange(lower); } } } LineIterator.closeQuietly(nameRangeIter); // a successful search has been completed, so do name range cleanup (ie delete pendingNameRanges file) nameRangeHandler.close(); // close bufferedWriter on GBIF Log Event Message file if (gbifLogMessageFile != null) { // close bufferedWriter on GBIF Log Event Message file try { gbifLogMessageFileBW.close(); } catch (IOException e) { log.error("Could not close buffered writer for file [" + gbifLogMessageFile.getAbsolutePath() + "]", e); } } log.info("End search"); }
From source file:org.h819.commons.file.MyFileUtils.java
/** * ????????//from ww w .ja v a 2 s. c om */ private static void exampleReadLargeFile() { File theFile = new File(""); LineIterator it = null; try { it = FileUtils.lineIterator(theFile, "UTF-8"); while (it.hasNext()) { String line = it.nextLine(); // do something with line } } catch (IOException e) { e.printStackTrace(); } finally { LineIterator.closeQuietly(it); } }
From source file:org.jdev.emg.sonar.cci.CCIXmlMetricsDecorator.java
@Override public void decorate(Resource resource, DecoratorContext context) { if (!Qualifiers.isFile(resource)) { return;//from w w w .j a v a 2 s . c o m } ProjectFileSystem fileSystem = context.getProject().getFileSystem(); File file = lookup(resource, fileSystem); try { if (readFirstByte(file) != '<') { return; } } catch (IOException e) { throw new SonarException(e); } int numCommentLines; CCICountCommentParser commentCounter = new CCICountCommentParser(); try { numCommentLines = commentCounter.countLinesOfComment(FileUtils.openInputStream(file)); if (numCommentLines == -1) { return; } } catch (IOException e) { throw new SonarException(e); } LineIterator iterator = null; int numLines = 0; int numBlankLines = 0; try { Charset charset = fileSystem.getSourceCharset(); iterator = charset == null ? FileUtils.lineIterator(file) : FileUtils.lineIterator(file, charset.name()); while (iterator.hasNext()) { String line = iterator.nextLine(); numLines++; if (line.trim().isEmpty()) { numBlankLines++; } } } catch (IOException e) { LOG.warn("error reading " + file + " to collect metrics", e); } finally { LineIterator.closeQuietly(iterator); } context.saveMeasure(CoreMetrics.LINES, (double) numLines); // Lines context.saveMeasure(CoreMetrics.COMMENT_LINES, (double) numCommentLines); // Non Commenting Lines of Code context.saveMeasure(CoreMetrics.NCLOC, (double) numLines - numBlankLines - numCommentLines); // Comment Lines }
From source file:org.kalypso.model.wspm.tuhh.schema.simulation.LengthSectionParser.java
private IStatus processLSFile(final String header, final String footer, final LogHelper log) throws FileNotFoundException, IOException { final Collection<IStatus> result = new ArrayList<>(); final WspmWaterBody waterBody = m_calculation.getReach().getWaterBody(); final TuhhStationComparator stationComparator = new TuhhStationComparator(waterBody.isDirectionUpstreams()); LineIterator lineIterator = null;//from ww w .j av a 2 s . c o m ResultLengthSection lsProc = null; try { lineIterator = IOUtils.lineIterator(new FileInputStream(m_lsFile), IWspmTuhhConstants.WSPMTUHH_CODEPAGE); BigDecimal firstStation = null; // station of previous line while (lineIterator.hasNext()) { if (log.checkCanceled()) return Status.CANCEL_STATUS; final String nextLine = lineIterator.nextLine(); /* Introduce space around 'NaN' and '***' values to make it parseable */ if (nextLine.contains("NaN")) //$NON-NLS-1$ log.log(false, Messages.getString("LengthSectionParser.0")); //$NON-NLS-1$ // TODO: handle NaN-values to keep information alive (unfortunally BigDecimal throws a NumberFormatException) final String cleanLine1 = nextLine.replaceAll("-NaN", " null "); //$NON-NLS-1$ //$NON-NLS-2$ final String cleanLine2 = cleanLine1.replaceAll("NaN", " null "); //$NON-NLS-1$ //$NON-NLS-2$ final String cleanLine3 = cleanLine2.replaceAll("-999.999", " null "); //$NON-NLS-1$ //$NON-NLS-2$ final BigDecimal station = NumberUtils.parseQuietDecimal(cleanLine3, 0, 11, IWspmTuhhConstants.STATION_SCALE); final BigDecimal runoff = NumberUtils.parseQuietDecimal(cleanLine3, 17, 27, 3); /* Any lines where station or runoff cannot be parsed are filtered out */ if (Objects.isNull(station, runoff)) continue; /* A new section begins, if the new station is lower than the next station */ final boolean sectionEnd = firstStation == null || stationComparator.compare(firstStation, station) == 0; if (sectionEnd) { if (lsProc != null) { final IStatus processorResult = closeProcessor(lsProc, footer); if (!processorResult.isOK()) result.add(processorResult); } log.log(false, Messages.getString( "org.kalypso.model.wspm.tuhh.schema.simulation.LengthSectionParser.2"), runoff); //$NON-NLS-1$ lsProc = new ResultLengthSection(runoff, m_outputDir, m_calculation, m_epsThinning, m_ovwMapURL); lsProc.addLine(header); lsProc.setTitlePattern(m_titlePattern); lsProc.setLsFilePattern(m_lsFilePattern); } /* clean line */ lsProc.addLine(cleanLine3); if (firstStation == null) firstStation = station; } if (lsProc != null) { final IStatus processorResult = closeProcessor(lsProc, footer); if (!processorResult.isOK()) result.add(processorResult); } } finally { LineIterator.closeQuietly(lineIterator); } if (result.isEmpty()) return Status.OK_STATUS; final IStatus[] children = result.toArray(new IStatus[result.size()]); final String msg = String.format(Messages.getString("LengthSectionParser.1")); //$NON-NLS-1$ return new MultiStatus(KalypsoModelWspmTuhhSchemaPlugin.getID(), 0, children, msg, null); }
From source file:org.kalypso.wspwin.core.CalculationBean.java
public static CalculationBean[] readBerFile(final File berFile) throws ParseException, IOException { // if a zustand has no calculations, no .ber file is present. if (!berFile.exists()) return new CalculationBean[0]; final List<CalculationBean> beans = new ArrayList<>(10); LineIterator lineIt = null;/*from ww w. j a v a 2s .c o m*/ try { int count = 0; lineIt = FileUtils.lineIterator(berFile, "CP850"); //$NON-NLS-1$ // ignore first line, we just read all lines lineIt.nextLine(); count++; while (lineIt.hasNext()) { final String line = lineIt.nextLine(); count++; if (line.length() < 60) throw new ParseException(Messages.getString("org.kalypso.wspwin.core.CalculationBean.0") + line, //$NON-NLS-1$ count); final String name = line.substring(0, 60).trim(); final StringTokenizer tokenizer = new StringTokenizer(line.substring(60)); if (tokenizer.countTokens() != 3) throw new ParseException(Messages.getString("org.kalypso.wspwin.core.CalculationBean.1") + line, //$NON-NLS-1$ count); final BigDecimal fromStation = new BigDecimal(tokenizer.nextToken()); final BigDecimal toStation = new BigDecimal(tokenizer.nextToken()); final String fileName = tokenizer.nextToken(); beans.add(new CalculationBean(name, fileName, fromStation, toStation)); } return beans.toArray(new CalculationBean[beans.size()]); } finally { LineIterator.closeQuietly(lineIt); } }
From source file:org.kalypso.wspwin.core.LocalEnergyLossBean.java
/** * Reads a psi file (Energieverluste/Verlustbeiwerte/local energy losses) *///from w w w. jav a 2 s . c o m public static LocalEnergyLossBean[] read(final File lelFile) throws ParseException, IOException { final List<LocalEnergyLossBean> beans = new ArrayList<>(0); LineIterator lineIt = null; try { if (lelFile.exists()) { int count = 0; for (lineIt = FileUtils.lineIterator(lelFile, null); lineIt.hasNext();) { final String nextLine = lineIt.nextLine(); count++; final StringTokenizer tokenizer = new StringTokenizer(nextLine); if (tokenizer.countTokens() % 2 != 0) throw new ParseException( Messages.getString("org.kalypso.wspwin.core.LocalEnergyLossBean.1") + nextLine, //$NON-NLS-1$ count); final int countKinds = tokenizer.countTokens() / 2 - 1; final String key = tokenizer.nextToken(); if (!STATION.equalsIgnoreCase(key)) throw new ParseException(Messages.getString("org.kalypso.wspwin.core.LocalEnergyLossBean.2") //$NON-NLS-1$ + STATION + "': " + nextLine, count); //$NON-NLS-1$ final BigDecimal station = new BigDecimal(tokenizer.nextToken()); // read pairs: kind -> value final Collection<Pair<String, BigDecimal>> entries = new ArrayList<>(); for (int i = 0; i < countKinds; i++) { final String kind = tokenizer.nextToken(); final BigDecimal value = new BigDecimal(tokenizer.nextToken()); entries.add(Pair.of(kind, value)); } final LocalEnergyLossBean lossBean = new LocalEnergyLossBean(station, entries.toArray(new Pair[entries.size()])); beans.add(lossBean); } } return beans.toArray(new LocalEnergyLossBean[beans.size()]); } finally { LineIterator.closeQuietly(lineIt); } }
From source file:org.kalypso.wspwin.core.RunOffEventBean.java
/** Reads a qwt or wsf file */ public static RunOffEventBean[] read(final File qwtFile) throws ParseException, IOException { // the qwt and wsf files may not exist; return empty list of beans if (!qwtFile.exists()) return new RunOffEventBean[] {}; final List<RunOffEventBean> beans = new ArrayList<>(10); LineIterator lineIt = null;/*from w w w.ja v a 2 s . c o m*/ try { int count = 0; for (lineIt = FileUtils.lineIterator(qwtFile, null); lineIt.hasNext();) { final String nextLine = lineIt.nextLine().trim(); count++; if (nextLine.isEmpty()) continue; final StringTokenizer tokenizer = new StringTokenizer(nextLine); if (tokenizer.countTokens() != 2) throw new ParseException( Messages.getString("org.kalypso.wspwin.core.RunOffEventBean.0") + nextLine, count); //$NON-NLS-1$ final String eventName = tokenizer.nextToken(); final RunOffEventBean bean = new RunOffEventBean(eventName); final int eventLength = Integer.parseInt(tokenizer.nextToken()); // read block: station -> value for (int i = 0; i < eventLength; i++) { if (!lineIt.hasNext()) throw new ParseException( Messages.getString("org.kalypso.wspwin.core.RunOffEventBean.1") + eventName, count); //$NON-NLS-1$ final String line = lineIt.nextLine(); count++; final StringTokenizer tz = new StringTokenizer(line); if (tz.countTokens() != 2) throw new ParseException( Messages.getString("org.kalypso.wspwin.core.RunOffEventBean.2") + nextLine, count); //$NON-NLS-1$ final double station = Double.parseDouble(tz.nextToken()); final double value = Double.parseDouble(tz.nextToken()); bean.addEntry(BigDecimal.valueOf(station), BigDecimal.valueOf(value)); } beans.add(bean); } return beans.toArray(new RunOffEventBean[beans.size()]); } finally { LineIterator.closeQuietly(lineIt); } }
From source file:org.klab.com.etl.Movies.java
public void readFile(String file, BufferedWriter bw) { int _n_ = 0;//from w w w . j ava 2 s. c o m int _r_ = 0; LineIterator it = null; String FILE_DATE = movieFileDate(file); try { it = FileUtils.lineIterator(new File(file), "UTF-8"); while (it.hasNext()) { String line = it.nextLine(); int amountOfTabsLine = StringUtils.countMatches(line, SEPARATOR); //This will decide the structure String fixedLine = fixLine(line); // do something with line // System.out.println("N:\t"+amountOfTabsLine+ "\t"+line); if (!fixedLine.isEmpty() && _n_ > 0) { String output = _n_ + "\t" + FILE_DATE + "\t" + amountOfTabsLine + "\t" + fixedLine; bw.write(output); bw.newLine(); _r_++; } _n_++; } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { System.out.println("io > [" + _n_ + "][" + _r_ + "] > out"); LineIterator.closeQuietly(it); } }
From source file:org.klab.com.etl.Movies.java
public void readinHeader(File file) { LineIterator it = null;/*w w w . j av a 2 s .c o m*/ try { it = FileUtils.lineIterator(file, "UTF-8"); if (it.hasNext()) { String line = it.nextLine(); getHeaderStructure(line); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { LineIterator.closeQuietly(it); } }
From source file:org.kuali.kfs.gl.batch.PreScrubberStep.java
@Override protected CustomBatchExecutor getCustomBatchExecutor() { return new CustomBatchExecutor() { public boolean execute() { StopWatch stopWatch = new StopWatch(); stopWatch.start();/*from w w w. java 2 s.co m*/ String inputFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.BACKUP_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION; String outputFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.PRE_SCRUBBER_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION; PreScrubberReportData preScrubberReportData = null; LineIterator oeIterator = null; try { oeIterator = FileUtils.lineIterator(new File(inputFile)); preScrubberReportData = preScrubberService.preprocessOriginEntries(oeIterator, outputFile); } catch (IOException e) { LOG.error("IO exception occurred during pre scrubbing.", e); throw new RuntimeException("IO exception occurred during pre scrubbing.", e); } finally { LineIterator.closeQuietly(oeIterator); } if (preScrubberReportData != null) { new PreScrubberReport().generateReport(preScrubberReportData, preScrubberReportWriterService); } stopWatch.stop(); if (LOG.isDebugEnabled()) { LOG.debug("scrubber step of took " + (stopWatch.getTotalTimeSeconds() / 60.0) + " minutes to complete"); } return true; } }; }