List of usage examples for org.apache.commons.io LineIterator hasNext
public boolean hasNext()
Reader
has more lines. From source file:org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector.java
/** * Sweep phase of gc candidate deletion. * <p>//from w ww .j av a 2 s. c o m * Performs the following steps depending upon the type of the blob store refer * {@link org.apache.jackrabbit.oak.plugins.blob.SharedDataStore.Type}: * * <ul> * <li>Shared</li> * <li> * <ul> * <li> Merge all marked references (from the mark phase run independently) available in the data store meta * store (from all configured independent repositories). * <li> Retrieve all blob ids available. * <li> Diffs the 2 sets above to retrieve list of blob ids not used. * <li> Deletes only blobs created after * (earliest time stamp of the marked references - #maxLastModifiedInterval) from the above set. * </ul> * </li> * * <li>Default</li> * <li> * <ul> * <li> Mark phase already run. * <li> Retrieve all blob ids available. * <li> Diffs the 2 sets above to retrieve list of blob ids not used. * <li> Deletes only blobs created after * (time stamp of the marked references - #maxLastModifiedInterval). * </ul> * </li> * </ul> * * @return the number of blobs deleted * @throws Exception the exception * @param fs the garbage collector file state * @param markStart the start time of mark to take as reference for deletion */ protected long sweep(GarbageCollectorFileState fs, long markStart) throws Exception { long earliestRefAvailTime; // Merge all the blob references available from all the reference files in the data store meta store // Only go ahead if merge succeeded try { earliestRefAvailTime = GarbageCollectionType.get(blobStore).mergeAllMarkedReferences(blobStore, fs); LOG.debug("Earliest reference available for timestamp [{}]", earliestRefAvailTime); earliestRefAvailTime = (earliestRefAvailTime < markStart ? earliestRefAvailTime : markStart); } catch (Exception e) { return 0; } // Find all blob references after iterating over the whole repository (new BlobIdRetriever(fs)).call(); // Calculate the references not used difference(fs); long count = 0; long deleted = 0; long lastMaxModifiedTime = getLastMaxModifiedTime(earliestRefAvailTime); LOG.debug("Starting sweep phase of the garbage collector"); LOG.debug("Sweeping blobs with modified time > than the configured max deleted time ({}). ", timestampToString(lastMaxModifiedTime)); ConcurrentLinkedQueue<String> exceptionQueue = new ConcurrentLinkedQueue<String>(); LineIterator iterator = FileUtils.lineIterator(fs.getGcCandidates(), Charsets.UTF_8.name()); List<String> ids = newArrayList(); while (iterator.hasNext()) { ids.add(iterator.next()); if (ids.size() >= getBatchCount()) { count += ids.size(); deleted += sweepInternal(ids, exceptionQueue, lastMaxModifiedTime); ids = newArrayList(); } } if (!ids.isEmpty()) { count += ids.size(); deleted += sweepInternal(ids, exceptionQueue, lastMaxModifiedTime); } BufferedWriter writer = null; try { if (!exceptionQueue.isEmpty()) { writer = Files.newWriter(fs.getGarbage(), Charsets.UTF_8); saveBatchToFile(newArrayList(exceptionQueue), writer); } } finally { LineIterator.closeQuietly(iterator); IOUtils.closeQuietly(writer); } if (!exceptionQueue.isEmpty()) { LOG.warn( "Unable to delete some blobs entries from the blob store. Details around such blob entries can " + "be found in [{}]", fs.getGarbage().getAbsolutePath()); } if (count != deleted) { LOG.warn( "Deleted only [{}] blobs entries from the [{}] candidates identified. This may happen if blob " + "modified time is > " + "than the max deleted time ({})", deleted, count, timestampToString(lastMaxModifiedTime)); } // Remove all the merged marked references GarbageCollectionType.get(blobStore).removeAllMarkedReferences(blobStore); LOG.debug("Ending sweep phase of the garbage collector"); return deleted; }
From source file:org.apache.jackrabbit.oak.upgrade.blob.LengthCachingDataStore.java
private static Map<String, Long> loadMappingData(File mappingFile) throws FileNotFoundException { Map<String, Long> mapping = new HashMap<String, Long>(); log.info("Reading mapping data from {}", mappingFile.getAbsolutePath()); LineIterator itr = new LineIterator(Files.newReader(mappingFile, Charsets.UTF_8)); try {//from w w w . j av a 2 s . c om while (itr.hasNext()) { String line = itr.nextLine(); int indexOfBar = line.indexOf(SEPARATOR); checkState(indexOfBar > 0, "Malformed entry found [%s]", line); String length = line.substring(0, indexOfBar); String id = line.substring(indexOfBar + 1); mapping.put(id.trim(), Long.valueOf(length)); } log.info("Total {} mapping entries found", mapping.size()); } finally { itr.close(); } return mapping; }
From source file:org.apache.marmotta.loader.rio.GeonamesParser.java
/** * Parses the data from the supplied InputStream, using the supplied baseURI * to resolve any relative URI references. * * @param in The InputStream from which to read the data. * @param baseURI The URI associated with the data in the InputStream. * @throws java.io.IOException If an I/O error occurred while data was read from the InputStream. * @throws org.openrdf.rio.RDFParseException If the parser has found an unrecoverable parse error. * @throws org.openrdf.rio.RDFHandlerException If the configured statement handler has encountered an * unrecoverable error. *//*from w ww . ja v a 2s . co m*/ @Override public void parse(InputStream in, String baseURI) throws IOException, RDFParseException, RDFHandlerException { LineIterator it = IOUtils.lineIterator(in, RDFFormat.RDFXML.getCharset()); try { while (it.hasNext()) { lineNumber++; String line = it.nextLine(); if (lineNumber % 2 == 0) { // only odd line numbers contain triples StringReader buffer = new StringReader(line); lineParser.parse(buffer, baseURI); } } } finally { it.close(); } }
From source file:org.apache.marmotta.loader.rio.GeonamesParser.java
/** * Parses the data from the supplied Reader, using the supplied baseURI to * resolve any relative URI references.// ww w.jav a2s . c o m * * @param reader The Reader from which to read the data. * @param baseURI The URI associated with the data in the InputStream. * @throws java.io.IOException If an I/O error occurred while data was read from the InputStream. * @throws org.openrdf.rio.RDFParseException If the parser has found an unrecoverable parse error. * @throws org.openrdf.rio.RDFHandlerException If the configured statement handler has encountered an * unrecoverable error. */ @Override public void parse(Reader reader, String baseURI) throws IOException, RDFParseException, RDFHandlerException { LineIterator it = IOUtils.lineIterator(reader); try { while (it.hasNext()) { lineNumber++; String line = it.nextLine(); if (lineNumber % 2 == 1) { // only odd line numbers contain triples StringReader buffer = new StringReader(line); lineParser.parse(buffer, baseURI); } } } finally { it.close(); } }
From source file:org.apache.marmotta.platform.core.services.prefix.PrefixCC.java
@Override public String getNamespace(final String prefix) { HttpGet get = new HttpGet(URI + prefix + ".file.txt"); HttpRequestUtil.setUserAgentString(get, USER_AGENT); get.setHeader(ACCEPT, "text/plain"); try {//w w w. j a va2 s . c om return httpClientService.execute(get, new ResponseHandler<String>() { @Override public String handleResponse(HttpResponse response) throws ClientProtocolException, IOException { if (200 == response.getStatusLine().getStatusCode()) { HttpEntity entity = response.getEntity(); final LineIterator it = IOUtils.lineIterator(entity.getContent(), Charset.defaultCharset()); try { while (it.hasNext()) { final String l = it.next(); if (l.startsWith(prefix + "\t")) { return l.substring(prefix.length() + 1); } } } finally { it.close(); } } log.error("Error: prefix '" + prefix + "' not found at prefix.cc"); return null; } }); } catch (Exception e) { log.error("Error retrieving prefix '" + prefix + "' from prefix.cc: " + e.getMessage()); return null; } }
From source file:org.apache.marmotta.platform.core.services.prefix.PrefixCC.java
@Override public String getPrefix(final String namespace) { try {/*from w ww .j a va 2s .c o m*/ HttpGet get = new HttpGet(URI + "reverse?format=txt&uri=" + URLEncoder.encode(namespace, "utf-8")); HttpRequestUtil.setUserAgentString(get, USER_AGENT); get.setHeader(ACCEPT, "text/plain"); return httpClientService.execute(get, new ResponseHandler<String>() { @Override public String handleResponse(HttpResponse response) throws ClientProtocolException, IOException { if (200 == response.getStatusLine().getStatusCode()) { HttpEntity entity = response.getEntity(); final LineIterator it = IOUtils.lineIterator(entity.getContent(), Charset.defaultCharset()); try { while (it.hasNext()) { final String l = it.next(); if (l.endsWith("\t" + namespace)) { return l.substring(0, l.indexOf("\t")); } } } finally { it.close(); } } log.error("Error: reverse namespace lookup for '" + namespace + "' not found at prefix.cc"); return null; } }); } catch (Exception e) { log.error("Error trying to retrieve prefic.cc reverse lookup for namespace '" + namespace + "': " + e.getMessage()); return null; } }
From source file:org.apache.nifi.processors.csv.ExtractCSVHeader.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final FlowFile original = session.get(); if (original == null) { return;/*from w ww . j av a 2s .co m*/ } final AtomicBoolean lineFound = new AtomicBoolean(false); final Map<String, String> attrs = new HashMap<>(); final AtomicInteger headerLength = new AtomicInteger(0); session.read(original, new InputStreamCallback() { @Override public void process(InputStream inputStream) throws IOException { // TODO expose the charset property? LineIterator iterator = IOUtils.lineIterator(inputStream, UTF_8); if (iterator.hasNext()) { lineFound.set(true); final String header = iterator.nextLine(); final String format = context.getProperty(PROP_FORMAT).getValue(); final String delimiter = context.getProperty(PROP_DELIMITER) .evaluateAttributeExpressions(original).getValue(); final String prefix = context.getProperty(PROP_SCHEMA_ATTR_PREFIX) .evaluateAttributeExpressions(original).getValue(); attrs.put(prefix + ATTR_HEADER_ORIGINAL, header); // TODO validate delimiter in the callback first final CSVFormat csvFormat = buildFormat(format, delimiter, true, // we assume first line is the header null); // no custom header final CSVParser parser = csvFormat.parse(new StringReader(header)); final Map<String, Integer> headers = parser.getHeaderMap(); final int columnCount = headers.size(); attrs.put(prefix + ATTR_HEADER_COLUMN_COUNT, String.valueOf(columnCount)); for (Map.Entry<String, Integer> h : headers.entrySet()) { // CSV columns are 1-based in Excel attrs.put(prefix + (h.getValue() + 1), h.getKey()); } // strip the header and send to the 'content' relationship if (StringUtils.isNotBlank(header)) { int hLength = header.length(); // move past the new line if there are more lines if (original.getSize() > hLength + 1) { hLength++; } headerLength.set(hLength); } } } }); if (lineFound.get()) { FlowFile ff = session.putAllAttributes(original, attrs); int offset = headerLength.get(); if (offset > 0) { FlowFile contentOnly = session.clone(ff, offset, original.getSize() - offset); session.transfer(contentOnly, REL_CONTENT); } session.transfer(ff, REL_ORIGINAL); } else { session.transfer(original, REL_FAILURE); } }
From source file:org.apache.nifi.processors.csv.ParseCSVRecord.java
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final FlowFile original = session.get(); if (original == null) { return;//from w w w. j a v a 2 s . c om } final AtomicBoolean lineFound = new AtomicBoolean(false); final Map<String, String> outputAttrs = new HashMap<>(); session.read(original, new InputStreamCallback() { @Override public void process(InputStream inputStream) throws IOException { final String fromAttribute = context.getProperty(PROP_RECORD_FROM_ATTRIBUTE).getValue(); String unparsedRecord; // data source is the attribute if (StringUtils.isNotBlank(fromAttribute)) { unparsedRecord = original.getAttribute(fromAttribute); if (StringUtils.isBlank(unparsedRecord)) { // will be routed to failure at the end of the method implementation return; } } else { // data source is the content // TODO expose the charset property? LineIterator iterator = IOUtils.lineIterator(inputStream, UTF_8); if (!iterator.hasNext()) { return; } unparsedRecord = iterator.next(); } lineFound.set(true); final String format = context.getProperty(PROP_FORMAT).getValue(); final String delimiter = context.getProperty(PROP_DELIMITER).evaluateAttributeExpressions(original) .getValue(); final String schemaPrefix = context.getProperty(PROP_SCHEMA_ATTR_PREFIX) .evaluateAttributeExpressions(original).getValue(); final String valuePrefix = context.getProperty(PROP_VALUE_ATTR_PREFIX) .evaluateAttributeExpressions(original).getValue(); final boolean trimValues = context.getProperty(PROP_TRIM_VALUES).asBoolean(); final CSVFormat csvFormat = buildFormat(format, delimiter, false, // this is a payload, not header anymore null); // no custom header final CSVParser parser = csvFormat.parse(new StringReader(unparsedRecord)); List<CSVRecord> records = parser.getRecords(); if (records.size() > 1) { // TODO revisit for NiFi's native micro-batching throw new ProcessException("Multi-line entries not supported"); } CSVRecord record = records.get(0); Map<String, String> originalAttrs = original.getAttributes(); // filter delimited schema attributes only Map<String, String> schemaAttrs = new HashMap<>(); for (String key : originalAttrs.keySet()) { if (key.startsWith(schemaPrefix)) { schemaAttrs.put(key, originalAttrs.get(key)); } } // put key/value pairs into attributes for (int i = 0; i < record.size(); i++) { String columnName = schemaAttrs.get(schemaPrefix + (i + 1)); // 1-based column numbering if (columnName == null) { // 1-based column index columnName = String.valueOf(i + 1); } // TODO indexed schemaless parsing vs auto-schema vs user-provided schema String columnValue = record.get(i); if (trimValues) { columnValue = columnValue.trim(); } String attrName = (StringUtils.isBlank(valuePrefix) ? "delimited.column." : valuePrefix) + columnName; outputAttrs.put(attrName, columnValue); } } }); if (lineFound.get()) { FlowFile ff = session.putAllAttributes(original, outputAttrs); session.transfer(ff, REL_SUCCESS); } else { session.transfer(original, REL_FAILURE); } }
From source file:org.apache.olingo.client.core.communication.request.batch.ODataBatchUtilities.java
/** * Reads headers from the batch starting from the given position. * <p>//from w w w. ja va2 s.c o m * Retrieved headers will be added to the map given by target parameter. * * @param iterator batch iterator. * @param target destination of the retrieved headers. */ public static void readHeaders(final ODataBatchLineIterator iterator, final Map<String, Collection<String>> target) { try { final ByteArrayOutputStream baos = new ByteArrayOutputStream(); readBatchPart(new ODataBatchController(iterator, null), baos, true); final LineIterator headers = IOUtils.lineIterator(new ByteArrayInputStream(baos.toByteArray()), Constants.UTF8); while (headers.hasNext()) { final String line = headers.nextLine().trim(); if (StringUtils.isNotBlank(line)) { addHeaderLine(line, target); } } } catch (Exception e) { LOG.error("Error retrieving headers", e); throw new IllegalStateException(e); } }
From source file:org.apache.sling.distribution.queue.impl.simple.SimpleDistributionQueueProvider.java
public void enableQueueProcessing(@Nonnull DistributionQueueProcessor queueProcessor, String... queueNames) { if (checkpoint) { // recover from checkpoints log.debug("recovering from checkpoints if needed"); for (final String queueName : queueNames) { log.debug("recovering for queue {}", queueName); DistributionQueue queue = getQueue(queueName); FilenameFilter filenameFilter = new FilenameFilter() { @Override// www .j a v a2s .c o m public boolean accept(File file, String name) { return name.equals(queueName + "-checkpoint"); } }; for (File qf : checkpointDirectory.listFiles(filenameFilter)) { log.info("recovering from checkpoint {}", qf); try { LineIterator lineIterator = IOUtils.lineIterator(new FileReader(qf)); while (lineIterator.hasNext()) { String s = lineIterator.nextLine(); String[] split = s.split(" "); String id = split[0]; String infoString = split[1]; Map<String, Object> info = new HashMap<String, Object>(); JSONTokener jsonTokener = new JSONTokener(infoString); JSONObject jsonObject = new JSONObject(jsonTokener); Iterator<String> keys = jsonObject.keys(); while (keys.hasNext()) { String key = keys.next(); JSONArray v = jsonObject.optJSONArray(key); if (v != null) { String[] a = new String[v.length()]; for (int i = 0; i < a.length; i++) { a[i] = v.getString(i); } info.put(key, a); } else { info.put(key, jsonObject.getString(key)); } } queue.add(new DistributionQueueItem(id, info)); } log.info("recovered {} items from queue {}", queue.getStatus().getItemsCount(), queueName); } catch (FileNotFoundException e) { log.warn("could not read checkpoint file {}", qf.getAbsolutePath()); } catch (JSONException e) { log.warn("could not parse info from checkpoint file {}", qf.getAbsolutePath()); } } } // enable checkpointing for (String queueName : queueNames) { ScheduleOptions options = scheduler.NOW(-1, 15).canRunConcurrently(false) .name(getJobName(queueName + "-checkpoint")); scheduler.schedule(new SimpleDistributionQueueCheckpoint(getQueue(queueName), checkpointDirectory), options); } } // enable processing for (String queueName : queueNames) { ScheduleOptions options = scheduler.NOW(-1, 1).canRunConcurrently(false).name(getJobName(queueName)); scheduler.schedule(new SimpleDistributionQueueProcessor(getQueue(queueName), queueProcessor), options); } }