List of usage examples for org.apache.commons.io LineIterator nextLine
public String nextLine()
Reader
. From source file:org.eclipse.smarthome.extensionservice.marketplace.internal.BindingExtensionHandler.java
private Map<String, Long> loadInstalledBindingsMap() { File dataFile = bundleContext.getDataFile(BINDING_FILE); if (dataFile != null && dataFile.exists()) { try (FileReader reader = new FileReader(dataFile)) { LineIterator lineIterator = IOUtils.lineIterator(reader); Map<String, Long> map = new HashMap<>(); while (lineIterator.hasNext()) { String line = lineIterator.nextLine(); String[] parts = line.split(";"); if (parts.length == 2) { try { map.put(parts[0], Long.valueOf(parts[1])); } catch (NumberFormatException e) { logger.debug("Cannot parse '{}' as a number in file {} - ignoring it.", parts[1], dataFile.getName()); }/*from www.jav a 2 s . com*/ } else { logger.debug("Invalid line in file {} - ignoring it:\n{}", dataFile.getName(), line); } } return map; } catch (IOException e) { logger.debug("File '{}' for installed bindings does not exist.", dataFile.getName()); // ignore and just return an empty map } } return new HashMap<>(); }
From source file:org.eclipse.smarthome.extensionservice.marketplace.internal.BundleExtensionHandler.java
private Map<String, Long> loadInstalledBundlesFile(File dataFile) { try (FileReader reader = new FileReader(dataFile)) { LineIterator lineIterator = IOUtils.lineIterator(reader); Map<String, Long> map = new HashMap<>(); while (lineIterator.hasNext()) { String line = lineIterator.nextLine(); String[] parts = line.split(";"); if (parts.length == 2) { try { map.put(parts[0], Long.valueOf(parts[1])); } catch (NumberFormatException e) { logger.debug("Cannot parse '{}' as a number in file {} - ignoring it.", parts[1], dataFile.getName()); }//from w w w .ja v a2 s . c om } else { logger.debug("Invalid line in file {} - ignoring it:\n{}", dataFile.getName(), line); } } return map; } catch (IOException e) { logger.debug("File '{}' for installed bundles does not exist.", dataFile.getName()); // ignore and just return an empty map } return new HashMap<>(); }
From source file:org.eclipse.smila.utils.scriptexecution.LogHelper.java
/** * Log info from input stream with specified level. * //from w w w .j a va2 s . co m * @param log * log * @param inputStream * input stream * @param logLevel * log level * * @throws IOException * IOException */ private static void log(final Log log, final InputStream inputStream, final LogLevel logLevel) throws IOException { final LineIterator lineIterator = IOUtils.lineIterator(inputStream, null); while (lineIterator.hasNext()) { final String string = lineIterator.nextLine(); log(log, string, logLevel); } }
From source file:org.ednovo.data.handlers.FileInputProcessor.java
@Override public void handleRow(Object row) throws Exception { File folder = new File(fileInputData.get("file-path")); Collection<File> files = FileUtils.listFiles(folder, new WildcardFileFilter(fileInputData.get("path-pattern")), DirectoryFileFilter.DIRECTORY); StopWatch sw = new StopWatch(); for (final File file : files) { LOG.info("processing file {}", file.getAbsolutePath()); sw.start();//from ww w .j a v a 2s. com long lines = 0; try { LineIterator it = FileUtils.lineIterator(file, "UTF-8"); try { while (it.hasNext()) { final String line = it.nextLine(); // Send the row to the next process handler. getNextRowHandler().processRow(line); lines++; if (lines % 1000 == 0) { LOG.info("file-lines: {} ", lines); } } } finally { LineIterator.closeQuietly(it); } } catch (IOException e) { LOG.error("Error processing file {} ", file.getAbsolutePath(), e); } sw.stop("file:" + file.getAbsolutePath() + ": lines= " + lines + " "); LOG.info(sw.toString(Integer.parseInt(lines + ""))); } }
From source file:org.emergent.bzr4j.intellij.command.ShellCommandServiceTest.java
private List<String> tokenize2(String in) { ArrayList<String> retval = new ArrayList<String>(); LineIterator iter = new LineIterator(new StringReader(in)); while (iter.hasNext()) { retval.add(iter.nextLine()); }// w w w . j a va 2 s .com return retval; }
From source file:org.freeeed.main.DocumentParser.java
/** * This function is specifically Memex crawler. *jl means JSON lines. * Furthermore, each JSON line has the expected fields * * @param fileName input file in *jl format * @param metadata extracted metadata/*from w ww. jav a 2 s. c om*/ */ // TODO make the code more elegant, try-with-exceptions private void extractJlFields(String fileName, DocumentMetadata metadata) { LineIterator it = null; try { it = FileUtils.lineIterator(new File(fileName), "UTF-8"); while (it.hasNext()) { String jsonAsString = it.nextLine(); String htmlText = JsonParser.getJsonField(jsonAsString, "extracted_text"); String text = Jsoup.parse(htmlText).text(); metadata.set(DocumentMetadataKeys.DOCUMENT_TEXT, text); metadata.setContentType("application/jl"); } } catch (IOException e) { LOGGER.error("Problem with JSON line", e); } finally { assert it != null; it.close(); } }
From source file:org.freeeed.main.FileProcessor.java
private void extractJlFields(DiscoveryFile discoveryFile) { LineIterator it = null; try {/*from ww w . j a va 2 s .c om*/ it = FileUtils.lineIterator(discoveryFile.getPath(), "UTF-8"); while (it.hasNext()) { DocumentMetadata metadata = new DocumentMetadata(); String jsonAsString = it.nextLine(); String htmlText = JsonParser.getJsonField(jsonAsString, "extracted_text"); String text = Jsoup.parse(htmlText).text(); // text metadata fields metadata.set(DocumentMetadataKeys.DOCUMENT_TEXT, text); metadata.setContentType("application/jl"); // other necessary metadata fields metadata.setOriginalPath(getOriginalDocumentPath(discoveryFile)); metadata.setHasAttachments(discoveryFile.isHasAttachments()); metadata.setHasParent(discoveryFile.isHasParent()); metadata.setCustodian(Project.getCurrentProject().getCurrentCustodian()); writeMetadata(discoveryFile, metadata); ESIndex.getInstance().addBatchData(metadata); } } catch (Exception e) { LOGGER.error("Problem with JSON line", e); } finally { if (it != null) { it.close(); } } }
From source file:org.gbif.harvest.digir.DigirHarvester.java
/** * Iterates over the nameRanges file, executing a search for each range. A * single search retrieves all records for the concept of interest that fall * in the specified range. If the concept were scientific name, for example, * the searches would collectively retrieve all the destination's records * for all scientific names contained in the resource of interest. This * information, collected as raw xml responses, is saved in the output * directory. The files written this way depend on the assumption * that at most one harvester will be targeting a given resource/name range * pair at a time.//from w w w .j a v a2 s .co m * * @param resource to query * @param destination of the DiGIR server to issue against * @param outputDirectory where files of interest are located * @param protocol name of harvester * @param maxSearchResponse maximum number of records returned in a response * @param targetCount target count of resource * * @throws HarvesterException thrown if method fails */ public void search(String resource, String destination, String outputDirectory, String protocol, String maxSearchResponse, int targetCount) throws HarvesterException { if (debug) { log.debug(">> search"); } // set the request response writer to the right directory searchWriter.setFileOutputDirectory(outputDirectory); // setup the NameRangeHandler for this search NameRangeHandler nameRangeHandler = new NameRangeHandler(outputDirectory, fileUtils); // get iterator over name ranges LineIterator nameRangeIter = nameRangeHandler.getNameRangeIterator(); // build the parameters required for the template into a map Map<String, String> templateParams = new HashMap<String, String>(); templateParams.put("resource", resource); templateParams.put("destination", destination); // gather the request content namespaces Properties mapping = new Properties(); String mappingFilePath = fileUtils.constructMappingFilePath(BASE_LOCATION, protocol, DigirHarvester.MAPPING_DIRECTORY_NAME, DigirHarvester.REQUEST_NAMESPACE_MAPPING_FILENAME); if (debug) { log.debug("Reading mappingFile from [" + mappingFilePath + "]"); } InputStream is = null; try { is = getClass().getResourceAsStream(mappingFilePath); if (is == null) { throw new HarvesterException( "Namespace mapping file not found, aborting search [" + mappingFilePath + "]"); } mapping.load(is); for (Object key : mapping.keySet()) { requestNamespaceMappings.put((String) key, mapping.getProperty((String) key)); } } catch (IOException e) { log.error("Problem loading request namespace mapping file", e); throw new HarvesterException(e.getMessage(), e); } finally { if (is != null) { try { is.close(); } catch (IOException e) { log.error( "An error occurred closing input stream on " + mappingFilePath + ": " + e.getMessage(), e); } } } // set the default DwC request namespace String contentNamespace = DigirHarvester.DEFAULT_CONTENT_NAMESPACE; for (String property : requestNamespaceMappings.keySet()) { if (StringUtils.equalsIgnoreCase("full", property)) { contentNamespace = requestNamespaceMappings.get(property); } } if (debug) { log.debug("The request content namespace that will be used is [" + contentNamespace + "]"); } templateParams.put("contentNamespace", contentNamespace); // determine max search response size int maxResponseSize = -1; try { maxResponseSize = Integer.valueOf(maxSearchResponse); } catch (Exception e) { log.warn("Error reading maxResponseSize [" + maxSearchResponse + "], please check that it has been set properly by the provider", e); } if (maxResponseSize < Constants.MIN_QUERY_TERM_LENGTH || maxResponseSize > MAX_RESPONSE_SIZE) { maxResponseSize = MAX_RESPONSE_SIZE; log.info("Defaulting maximum number of search records returned in a single response [" + String.valueOf(MAX_RESPONSE_SIZE) + "]"); } else { log.info("The maximum number of search records returned in a single response has been set to [" + String.valueOf(MAX_RESPONSE_SIZE) + "]"); } // grab the gbif log message output file File gbifLogMessageFile = new File(outputDirectory, Constants.GBIF_LOG_MESSAGE_FILENAME + Constants.TEXT_FILENAME_EXTENSION); // created buffered writer BufferedWriter gbifLogMessageFileBW = null; try { gbifLogMessageFileBW = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(gbifLogMessageFile, true), "UTF8")); } catch (IOException e) { log.error("Couldn't open GBIF 'Harvesting' log event file", e); } // each line in the file is a name range int requestCount = 0; int sameRequestCount = 0; while (nameRangeIter.hasNext()) { // For each line, extract the lower and upper name ranges String line = nameRangeIter.nextLine(); String lower = StringUtils.strip(fileUtils.getDelimitedPart(line, "\t", 0)); String upper = StringUtils.strip(fileUtils.getDelimitedPart(line, "\t", 1)); if (StringUtils.isBlank(lower) || StringUtils.isBlank(upper)) { // something is wrong with that range - log and ignore log.warn("Found invalid name range lower [" + lower + "] and upper [" + upper + "] - ignoring and continuing with next range"); continue; } log.info("Start harvesting range [" + lower + " - " + upper + "]"); // add range parameters to templateParams templateParams.put("lower", lower); templateParams.put("upper", upper); Integer startAt = 0; Integer innerMaxResults = maxResponseSize; boolean finished = false; boolean fireSameSearch = false; Integer endOfErrorRange = null; while (!finished) { templateParams.put("startAt", startAt.toString()); templateParams.put("maxResults", innerMaxResults.toString()); boolean inError = false; try { finished = fireSearch(destination, templateParams, protocol, gbifLogMessageFileBW, requestCount); } catch (SAXParseException saxe) { log.info("SAX Parse exception in parsing search response starting from [" + startAt + "] with maxResults [" + innerMaxResults + "]", saxe); // the chances are good that this error is in only one of the records coming back // so we can try again and start narrowing the size of the response until we find // the one causing the problem. It also may have been a transient error, so just // retrying would be enough to fix it. In that case we watch for a startAt beyond // this error range and when that is hit, bump the search back up to maxResponseSize. if (innerMaxResults == 1) { // we've found exactly one record that is in error - log in detail and move on log.warn( "Search record in error - record num [" + startAt + "]" + "in range lower [" + lower + "] upper [" + upper + "] " + "from destination [" + destination + "]", saxe); // assume that most errors are bad chars in a single record, so now set max results back to full endOfErrorRange = null; innerMaxResults = maxResponseSize; startAt++; // start should never exceed targetCount (avoid endless looping - skip range) if (startAt > targetCount) { log.error("Inside retry loop: request parameter startAt exceeded targetCount: " + String.valueOf(targetCount) + " Skipping range lower [" + lower + "] upper [" + upper + "]"); break; } else { continue; } } else { inError = true; endOfErrorRange = startAt + innerMaxResults; } } catch (SAXException e) { // we don't know enough to recover, so write this range out as failed, and move on to next range nameRangeHandler.appendFailedRange(lower, upper); break; } catch (SocketException se) { // the connection was reset, so re-issue the exact same request. // We have already waited 3 minutes to give the provider software the chance to reset in fireSearch() sameRequestCount++; // only re-issue same request on SocketException 5 times if (sameRequestCount <= 4) { log.info("Re-issue same request (" + String.valueOf(sameRequestCount) + ")"); inError = true; fireSameSearch = true; } else { log.info("Exceeded max number of possible re-issue same request"); nameRangeHandler.appendFailedRange(lower, upper); break; } } // was the operation stopped? catch (OperationStoppedException e) { throw new HarvesterException(e.getMessage(), e); } // setup for the next pass if (!finished) { if (inError && !fireSameSearch) { innerMaxResults = Math.max(1, innerMaxResults / DIVISOR); } else if (inError && fireSameSearch) { // parameters stay the same } else { requestCount++; if (endOfErrorRange != null && (startAt > endOfErrorRange)) { if (debug) { log.debug("StartAt passed endOfErrorRange, resetting max results"); } innerMaxResults = maxResponseSize; endOfErrorRange = null; } startAt = startAt + innerMaxResults; } } else { // successful completion of range nameRangeHandler.registerSuccessfulRange(lower); } } } LineIterator.closeQuietly(nameRangeIter); // a successful search has been completed, so do name range cleanup (ie delete pendingNameRanges file) nameRangeHandler.close(); // close bufferedWriter on GBIF Log Event Message file if (gbifLogMessageFile != null) { // close bufferedWriter on GBIF Log Event Message file try { gbifLogMessageFileBW.close(); } catch (IOException e) { log.error("Could not close buffered writer for file [" + gbifLogMessageFile.getAbsolutePath() + "]", e); } } log.info("End search"); }
From source file:org.gridgain.client.impl.GridClientPropertiesConfigurationSelfTest.java
/** * Uncomment properties.// w ww . j a v a2 s .c o m * * @param url Source to uncomment client properties for. * @return Temporary file with uncommented client properties. * @throws IOException In case of IO exception. */ private File uncommentProperties(URL url) throws IOException { InputStream in = url.openStream(); assertNotNull(in); LineIterator it = IOUtils.lineIterator(in, "UTF-8"); Collection<String> lines = new ArrayList<>(); while (it.hasNext()) lines.add(it.nextLine().replace("#gg.client.", "gg.client.")); GridUtils.closeQuiet(in); File tmp = File.createTempFile(UUID.randomUUID().toString(), "properties"); tmp.deleteOnExit(); FileUtils.writeLines(tmp, lines); return tmp; }
From source file:org.h819.commons.file.MyFileUtils.java
/** * ????????/*from www. j a va 2s . c o m*/ */ private static void exampleReadLargeFile() { File theFile = new File(""); LineIterator it = null; try { it = FileUtils.lineIterator(theFile, "UTF-8"); while (it.hasNext()) { String line = it.nextLine(); // do something with line } } catch (IOException e) { e.printStackTrace(); } finally { LineIterator.closeQuietly(it); } }