Example usage for org.apache.commons.io LineIterator hasNext

List of usage examples for org.apache.commons.io LineIterator hasNext

Introduction

In this page you can find the example usage for org.apache.commons.io LineIterator hasNext.

Prototype

public boolean hasNext() 

Source Link

Document

Indicates whether the Reader has more lines.

Usage

From source file:org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector.java

/**
 * Sweep phase of gc candidate deletion.
 * <p>//from  w ww .j  av a  2 s.  c o  m
 * Performs the following steps depending upon the type of the blob store refer
 * {@link org.apache.jackrabbit.oak.plugins.blob.SharedDataStore.Type}:
 *
 * <ul>
 *     <li>Shared</li>
 *     <li>
 *     <ul>
 *      <li> Merge all marked references (from the mark phase run independently) available in the data store meta
 *          store (from all configured independent repositories).
 *      <li> Retrieve all blob ids available.
 *      <li> Diffs the 2 sets above to retrieve list of blob ids not used.
 *      <li> Deletes only blobs created after
 *          (earliest time stamp of the marked references - #maxLastModifiedInterval) from the above set.
 *     </ul>
 *     </li>
 *
 *     <li>Default</li>
 *     <li>
 *     <ul>
 *      <li> Mark phase already run.
 *      <li> Retrieve all blob ids available.
 *      <li> Diffs the 2 sets above to retrieve list of blob ids not used.
 *      <li> Deletes only blobs created after
 *          (time stamp of the marked references - #maxLastModifiedInterval).
 *     </ul>
 *     </li>
 * </ul>
 *
 * @return the number of blobs deleted
 * @throws Exception the exception
 * @param fs the garbage collector file state
 * @param markStart the start time of mark to take as reference for deletion
 */
protected long sweep(GarbageCollectorFileState fs, long markStart) throws Exception {
    long earliestRefAvailTime;
    // Merge all the blob references available from all the reference files in the data store meta store
    // Only go ahead if merge succeeded
    try {
        earliestRefAvailTime = GarbageCollectionType.get(blobStore).mergeAllMarkedReferences(blobStore, fs);
        LOG.debug("Earliest reference available for timestamp [{}]", earliestRefAvailTime);
        earliestRefAvailTime = (earliestRefAvailTime < markStart ? earliestRefAvailTime : markStart);
    } catch (Exception e) {
        return 0;
    }

    // Find all blob references after iterating over the whole repository
    (new BlobIdRetriever(fs)).call();

    // Calculate the references not used
    difference(fs);
    long count = 0;
    long deleted = 0;

    long lastMaxModifiedTime = getLastMaxModifiedTime(earliestRefAvailTime);
    LOG.debug("Starting sweep phase of the garbage collector");
    LOG.debug("Sweeping blobs with modified time > than the configured max deleted time ({}). ",
            timestampToString(lastMaxModifiedTime));

    ConcurrentLinkedQueue<String> exceptionQueue = new ConcurrentLinkedQueue<String>();

    LineIterator iterator = FileUtils.lineIterator(fs.getGcCandidates(), Charsets.UTF_8.name());
    List<String> ids = newArrayList();

    while (iterator.hasNext()) {
        ids.add(iterator.next());

        if (ids.size() >= getBatchCount()) {
            count += ids.size();
            deleted += sweepInternal(ids, exceptionQueue, lastMaxModifiedTime);
            ids = newArrayList();
        }
    }
    if (!ids.isEmpty()) {
        count += ids.size();
        deleted += sweepInternal(ids, exceptionQueue, lastMaxModifiedTime);
    }

    BufferedWriter writer = null;
    try {
        if (!exceptionQueue.isEmpty()) {
            writer = Files.newWriter(fs.getGarbage(), Charsets.UTF_8);
            saveBatchToFile(newArrayList(exceptionQueue), writer);
        }
    } finally {
        LineIterator.closeQuietly(iterator);
        IOUtils.closeQuietly(writer);
    }

    if (!exceptionQueue.isEmpty()) {
        LOG.warn(
                "Unable to delete some blobs entries from the blob store. Details around such blob entries can "
                        + "be found in [{}]",
                fs.getGarbage().getAbsolutePath());
    }
    if (count != deleted) {
        LOG.warn(
                "Deleted only [{}] blobs entries from the [{}] candidates identified. This may happen if blob "
                        + "modified time is > " + "than the max deleted time ({})",
                deleted, count, timestampToString(lastMaxModifiedTime));
    }

    // Remove all the merged marked references
    GarbageCollectionType.get(blobStore).removeAllMarkedReferences(blobStore);
    LOG.debug("Ending sweep phase of the garbage collector");
    return deleted;
}

From source file:org.apache.jackrabbit.oak.upgrade.blob.LengthCachingDataStore.java

private static Map<String, Long> loadMappingData(File mappingFile) throws FileNotFoundException {
    Map<String, Long> mapping = new HashMap<String, Long>();
    log.info("Reading mapping data from {}", mappingFile.getAbsolutePath());
    LineIterator itr = new LineIterator(Files.newReader(mappingFile, Charsets.UTF_8));
    try {//from  w w w  . j  av a  2  s  .  c om
        while (itr.hasNext()) {
            String line = itr.nextLine();
            int indexOfBar = line.indexOf(SEPARATOR);
            checkState(indexOfBar > 0, "Malformed entry found [%s]", line);
            String length = line.substring(0, indexOfBar);
            String id = line.substring(indexOfBar + 1);
            mapping.put(id.trim(), Long.valueOf(length));
        }
        log.info("Total {} mapping entries found", mapping.size());
    } finally {
        itr.close();
    }
    return mapping;
}

From source file:org.apache.marmotta.loader.rio.GeonamesParser.java

/**
 * Parses the data from the supplied InputStream, using the supplied baseURI
 * to resolve any relative URI references.
 *
 * @param in      The InputStream from which to read the data.
 * @param baseURI The URI associated with the data in the InputStream.
 * @throws java.io.IOException                 If an I/O error occurred while data was read from the InputStream.
 * @throws org.openrdf.rio.RDFParseException   If the parser has found an unrecoverable parse error.
 * @throws org.openrdf.rio.RDFHandlerException If the configured statement handler has encountered an
 *                                             unrecoverable error.
 *//*from w ww .  ja v  a  2s  .  co  m*/
@Override
public void parse(InputStream in, String baseURI) throws IOException, RDFParseException, RDFHandlerException {
    LineIterator it = IOUtils.lineIterator(in, RDFFormat.RDFXML.getCharset());
    try {
        while (it.hasNext()) {
            lineNumber++;

            String line = it.nextLine();
            if (lineNumber % 2 == 0) {
                // only odd line numbers contain triples
                StringReader buffer = new StringReader(line);
                lineParser.parse(buffer, baseURI);
            }
        }
    } finally {
        it.close();
    }
}

From source file:org.apache.marmotta.loader.rio.GeonamesParser.java

/**
 * Parses the data from the supplied Reader, using the supplied baseURI to
 * resolve any relative URI references.// ww  w.jav  a2s .  c  o m
 *
 * @param reader  The Reader from which to read the data.
 * @param baseURI The URI associated with the data in the InputStream.
 * @throws java.io.IOException                 If an I/O error occurred while data was read from the InputStream.
 * @throws org.openrdf.rio.RDFParseException   If the parser has found an unrecoverable parse error.
 * @throws org.openrdf.rio.RDFHandlerException If the configured statement handler has encountered an
 *                                             unrecoverable error.
 */
@Override
public void parse(Reader reader, String baseURI) throws IOException, RDFParseException, RDFHandlerException {
    LineIterator it = IOUtils.lineIterator(reader);
    try {
        while (it.hasNext()) {
            lineNumber++;

            String line = it.nextLine();
            if (lineNumber % 2 == 1) {
                // only odd line numbers contain triples
                StringReader buffer = new StringReader(line);
                lineParser.parse(buffer, baseURI);
            }
        }
    } finally {
        it.close();
    }
}

From source file:org.apache.marmotta.platform.core.services.prefix.PrefixCC.java

@Override
public String getNamespace(final String prefix) {
    HttpGet get = new HttpGet(URI + prefix + ".file.txt");
    HttpRequestUtil.setUserAgentString(get, USER_AGENT);
    get.setHeader(ACCEPT, "text/plain");
    try {//w w  w.  j a va2 s . c  om
        return httpClientService.execute(get, new ResponseHandler<String>() {

            @Override
            public String handleResponse(HttpResponse response) throws ClientProtocolException, IOException {
                if (200 == response.getStatusLine().getStatusCode()) {
                    HttpEntity entity = response.getEntity();

                    final LineIterator it = IOUtils.lineIterator(entity.getContent(), Charset.defaultCharset());
                    try {
                        while (it.hasNext()) {
                            final String l = it.next();
                            if (l.startsWith(prefix + "\t")) {
                                return l.substring(prefix.length() + 1);
                            }
                        }
                    } finally {
                        it.close();
                    }
                }
                log.error("Error: prefix '" + prefix + "' not found at prefix.cc");
                return null;
            }
        });
    } catch (Exception e) {
        log.error("Error retrieving prefix '" + prefix + "' from prefix.cc: " + e.getMessage());
        return null;
    }
}

From source file:org.apache.marmotta.platform.core.services.prefix.PrefixCC.java

@Override
public String getPrefix(final String namespace) {
    try {/*from  w ww .j a va  2s .c o m*/
        HttpGet get = new HttpGet(URI + "reverse?format=txt&uri=" + URLEncoder.encode(namespace, "utf-8"));
        HttpRequestUtil.setUserAgentString(get, USER_AGENT);
        get.setHeader(ACCEPT, "text/plain");

        return httpClientService.execute(get, new ResponseHandler<String>() {

            @Override
            public String handleResponse(HttpResponse response) throws ClientProtocolException, IOException {
                if (200 == response.getStatusLine().getStatusCode()) {
                    HttpEntity entity = response.getEntity();

                    final LineIterator it = IOUtils.lineIterator(entity.getContent(), Charset.defaultCharset());
                    try {
                        while (it.hasNext()) {
                            final String l = it.next();
                            if (l.endsWith("\t" + namespace)) {
                                return l.substring(0, l.indexOf("\t"));
                            }
                        }
                    } finally {
                        it.close();
                    }
                }
                log.error("Error: reverse namespace lookup for '" + namespace + "' not found at prefix.cc");
                return null;
            }
        });
    } catch (Exception e) {
        log.error("Error trying to retrieve prefic.cc reverse lookup for namespace '" + namespace + "': "
                + e.getMessage());
        return null;
    }
}

From source file:org.apache.nifi.processors.csv.ExtractCSVHeader.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile original = session.get();
    if (original == null) {
        return;/*from   w  ww  . j  av a  2s .co  m*/
    }

    final AtomicBoolean lineFound = new AtomicBoolean(false);
    final Map<String, String> attrs = new HashMap<>();

    final AtomicInteger headerLength = new AtomicInteger(0);

    session.read(original, new InputStreamCallback() {
        @Override
        public void process(InputStream inputStream) throws IOException {
            // TODO expose the charset property?
            LineIterator iterator = IOUtils.lineIterator(inputStream, UTF_8);
            if (iterator.hasNext()) {
                lineFound.set(true);
                final String header = iterator.nextLine();

                final String format = context.getProperty(PROP_FORMAT).getValue();
                final String delimiter = context.getProperty(PROP_DELIMITER)
                        .evaluateAttributeExpressions(original).getValue();
                final String prefix = context.getProperty(PROP_SCHEMA_ATTR_PREFIX)
                        .evaluateAttributeExpressions(original).getValue();

                attrs.put(prefix + ATTR_HEADER_ORIGINAL, header);
                // TODO validate delimiter in the callback first
                final CSVFormat csvFormat = buildFormat(format, delimiter, true, // we assume first line is the header
                        null); // no custom header
                final CSVParser parser = csvFormat.parse(new StringReader(header));
                final Map<String, Integer> headers = parser.getHeaderMap();
                final int columnCount = headers.size();
                attrs.put(prefix + ATTR_HEADER_COLUMN_COUNT, String.valueOf(columnCount));
                for (Map.Entry<String, Integer> h : headers.entrySet()) {
                    // CSV columns are 1-based in Excel
                    attrs.put(prefix + (h.getValue() + 1), h.getKey());
                }

                // strip the header and send to the 'content' relationship
                if (StringUtils.isNotBlank(header)) {
                    int hLength = header.length();
                    // move past the new line if there are more lines
                    if (original.getSize() > hLength + 1) {
                        hLength++;
                    }
                    headerLength.set(hLength);
                }
            }
        }
    });

    if (lineFound.get()) {
        FlowFile ff = session.putAllAttributes(original, attrs);

        int offset = headerLength.get();
        if (offset > 0) {
            FlowFile contentOnly = session.clone(ff, offset, original.getSize() - offset);
            session.transfer(contentOnly, REL_CONTENT);
        }

        session.transfer(ff, REL_ORIGINAL);
    } else {
        session.transfer(original, REL_FAILURE);
    }
}

From source file:org.apache.nifi.processors.csv.ParseCSVRecord.java

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile original = session.get();
    if (original == null) {
        return;//from w w w. j  a  v a 2 s .  c om
    }

    final AtomicBoolean lineFound = new AtomicBoolean(false);
    final Map<String, String> outputAttrs = new HashMap<>();

    session.read(original, new InputStreamCallback() {
        @Override
        public void process(InputStream inputStream) throws IOException {
            final String fromAttribute = context.getProperty(PROP_RECORD_FROM_ATTRIBUTE).getValue();

            String unparsedRecord;
            // data source is the attribute
            if (StringUtils.isNotBlank(fromAttribute)) {
                unparsedRecord = original.getAttribute(fromAttribute);
                if (StringUtils.isBlank(unparsedRecord)) {
                    // will be routed to failure at the end of the method implementation
                    return;
                }
            } else {
                // data source is the content
                // TODO expose the charset property?
                LineIterator iterator = IOUtils.lineIterator(inputStream, UTF_8);
                if (!iterator.hasNext()) {
                    return;
                }
                unparsedRecord = iterator.next();
            }

            lineFound.set(true);
            final String format = context.getProperty(PROP_FORMAT).getValue();
            final String delimiter = context.getProperty(PROP_DELIMITER).evaluateAttributeExpressions(original)
                    .getValue();
            final String schemaPrefix = context.getProperty(PROP_SCHEMA_ATTR_PREFIX)
                    .evaluateAttributeExpressions(original).getValue();
            final String valuePrefix = context.getProperty(PROP_VALUE_ATTR_PREFIX)
                    .evaluateAttributeExpressions(original).getValue();
            final boolean trimValues = context.getProperty(PROP_TRIM_VALUES).asBoolean();

            final CSVFormat csvFormat = buildFormat(format, delimiter, false, // this is a payload, not header anymore
                    null); // no custom header

            final CSVParser parser = csvFormat.parse(new StringReader(unparsedRecord));
            List<CSVRecord> records = parser.getRecords();
            if (records.size() > 1) {
                // TODO revisit for NiFi's native micro-batching
                throw new ProcessException("Multi-line entries not supported");
            }

            CSVRecord record = records.get(0);

            Map<String, String> originalAttrs = original.getAttributes();
            // filter delimited schema attributes only
            Map<String, String> schemaAttrs = new HashMap<>();
            for (String key : originalAttrs.keySet()) {
                if (key.startsWith(schemaPrefix)) {
                    schemaAttrs.put(key, originalAttrs.get(key));
                }
            }

            // put key/value pairs into attributes
            for (int i = 0; i < record.size(); i++) {
                String columnName = schemaAttrs.get(schemaPrefix + (i + 1)); // 1-based column numbering
                if (columnName == null) {
                    // 1-based column index
                    columnName = String.valueOf(i + 1);
                }
                // TODO indexed schemaless parsing vs auto-schema vs user-provided schema
                String columnValue = record.get(i);
                if (trimValues) {
                    columnValue = columnValue.trim();
                }
                String attrName = (StringUtils.isBlank(valuePrefix) ? "delimited.column." : valuePrefix)
                        + columnName;
                outputAttrs.put(attrName, columnValue);
            }
        }
    });

    if (lineFound.get()) {
        FlowFile ff = session.putAllAttributes(original, outputAttrs);
        session.transfer(ff, REL_SUCCESS);
    } else {
        session.transfer(original, REL_FAILURE);
    }
}

From source file:org.apache.olingo.client.core.communication.request.batch.ODataBatchUtilities.java

/**
 * Reads headers from the batch starting from the given position.
 * <p>//from   w w w.  ja va2  s.c o m
 * Retrieved headers will be added to the map given by target parameter.
 *
 * @param iterator batch iterator.
 * @param target destination of the retrieved headers.
 */
public static void readHeaders(final ODataBatchLineIterator iterator,
        final Map<String, Collection<String>> target) {

    try {
        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
        readBatchPart(new ODataBatchController(iterator, null), baos, true);

        final LineIterator headers = IOUtils.lineIterator(new ByteArrayInputStream(baos.toByteArray()),
                Constants.UTF8);
        while (headers.hasNext()) {
            final String line = headers.nextLine().trim();
            if (StringUtils.isNotBlank(line)) {
                addHeaderLine(line, target);
            }
        }
    } catch (Exception e) {
        LOG.error("Error retrieving headers", e);
        throw new IllegalStateException(e);
    }
}

From source file:org.apache.sling.distribution.queue.impl.simple.SimpleDistributionQueueProvider.java

public void enableQueueProcessing(@Nonnull DistributionQueueProcessor queueProcessor, String... queueNames) {

    if (checkpoint) {
        // recover from checkpoints
        log.debug("recovering from checkpoints if needed");
        for (final String queueName : queueNames) {
            log.debug("recovering for queue {}", queueName);
            DistributionQueue queue = getQueue(queueName);
            FilenameFilter filenameFilter = new FilenameFilter() {
                @Override// www  .j a v  a2s .c o m
                public boolean accept(File file, String name) {
                    return name.equals(queueName + "-checkpoint");
                }
            };
            for (File qf : checkpointDirectory.listFiles(filenameFilter)) {
                log.info("recovering from checkpoint {}", qf);
                try {
                    LineIterator lineIterator = IOUtils.lineIterator(new FileReader(qf));
                    while (lineIterator.hasNext()) {
                        String s = lineIterator.nextLine();
                        String[] split = s.split(" ");
                        String id = split[0];
                        String infoString = split[1];
                        Map<String, Object> info = new HashMap<String, Object>();
                        JSONTokener jsonTokener = new JSONTokener(infoString);
                        JSONObject jsonObject = new JSONObject(jsonTokener);
                        Iterator<String> keys = jsonObject.keys();
                        while (keys.hasNext()) {
                            String key = keys.next();
                            JSONArray v = jsonObject.optJSONArray(key);
                            if (v != null) {
                                String[] a = new String[v.length()];
                                for (int i = 0; i < a.length; i++) {
                                    a[i] = v.getString(i);
                                }
                                info.put(key, a);
                            } else {
                                info.put(key, jsonObject.getString(key));
                            }
                        }
                        queue.add(new DistributionQueueItem(id, info));
                    }
                    log.info("recovered {} items from queue {}", queue.getStatus().getItemsCount(), queueName);
                } catch (FileNotFoundException e) {
                    log.warn("could not read checkpoint file {}", qf.getAbsolutePath());
                } catch (JSONException e) {
                    log.warn("could not parse info from checkpoint file {}", qf.getAbsolutePath());
                }
            }
        }

        // enable checkpointing
        for (String queueName : queueNames) {
            ScheduleOptions options = scheduler.NOW(-1, 15).canRunConcurrently(false)
                    .name(getJobName(queueName + "-checkpoint"));
            scheduler.schedule(new SimpleDistributionQueueCheckpoint(getQueue(queueName), checkpointDirectory),
                    options);
        }
    }

    // enable processing
    for (String queueName : queueNames) {
        ScheduleOptions options = scheduler.NOW(-1, 1).canRunConcurrently(false).name(getJobName(queueName));
        scheduler.schedule(new SimpleDistributionQueueProcessor(getQueue(queueName), queueProcessor), options);
    }

}