org.apache.nifi.provenance.lucene.DocsReader.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.nifi.provenance.lucene.DocsReader.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.nifi.provenance.lucene;

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.SearchableFields;
import org.apache.nifi.provenance.StandardProvenanceEventRecord;
import org.apache.nifi.provenance.authorization.EventAuthorizer;
import org.apache.nifi.provenance.serialization.RecordReader;
import org.apache.nifi.provenance.serialization.RecordReaders;
import org.apache.nifi.provenance.toc.TocReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DocsReader {
    private final Logger logger = LoggerFactory.getLogger(DocsReader.class);

    public Set<ProvenanceEventRecord> read(final TopDocs topDocs, final EventAuthorizer authorizer,
            final IndexReader indexReader, final Collection<Path> allProvenanceLogFiles,
            final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars)
            throws IOException {
        if (retrievalCount.get() >= maxResults) {
            return Collections.emptySet();
        }

        final long start = System.nanoTime();
        final ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        final int numDocs = Math.min(scoreDocs.length, maxResults);
        final List<Document> docs = new ArrayList<>(numDocs);

        for (int i = numDocs - 1; i >= 0; i--) {
            final int docId = scoreDocs[i].doc;
            final Document d = indexReader.document(docId);
            docs.add(d);
        }

        final long readDocuments = System.nanoTime() - start;
        logger.debug("Reading {} Lucene Documents took {} millis", docs.size(),
                TimeUnit.NANOSECONDS.toMillis(readDocuments));
        return read(docs, authorizer, allProvenanceLogFiles, retrievalCount, maxResults, maxAttributeChars);
    }

    private long getByteOffset(final Document d, final RecordReader reader) {
        final IndexableField blockField = d.getField(FieldNames.BLOCK_INDEX);
        if (blockField != null) {
            final int blockIndex = blockField.numericValue().intValue();
            final TocReader tocReader = reader.getTocReader();
            return tocReader.getBlockOffset(blockIndex);
        }

        return d.getField(FieldNames.STORAGE_FILE_OFFSET).numericValue().longValue();
    }

    private ProvenanceEventRecord getRecord(final Document d, final RecordReader reader) throws IOException {
        final IndexableField blockField = d.getField(FieldNames.BLOCK_INDEX);
        if (blockField == null) {
            reader.skipTo(getByteOffset(d, reader));
        } else {
            reader.skipToBlock(blockField.numericValue().intValue());
        }

        StandardProvenanceEventRecord record;
        while ((record = reader.nextRecord()) != null) {
            final IndexableField idField = d.getField(SearchableFields.Identifier.getSearchableFieldName());
            if (idField == null || idField.numericValue().longValue() == record.getEventId()) {
                break;
            }
        }

        if (record == null) {
            logger.warn(
                    "Failed to read Provenance Event for '" + d + "'. The event file may be missing or corrupted");
        }

        return record;
    }

    public Set<ProvenanceEventRecord> read(final List<Document> docs, final EventAuthorizer authorizer,
            final Collection<Path> allProvenanceLogFiles, final AtomicInteger retrievalCount, final int maxResults,
            final int maxAttributeChars) throws IOException {

        if (retrievalCount.get() >= maxResults) {
            return Collections.emptySet();
        }

        final long start = System.nanoTime();
        final Set<ProvenanceEventRecord> matchingRecords = new LinkedHashSet<>();
        final Map<String, List<Document>> byStorageNameDocGroups = LuceneUtil.groupDocsByStorageFileName(docs);

        int eventsReadThisFile = 0;
        int logFileCount = 0;

        for (String storageFileName : byStorageNameDocGroups.keySet()) {
            final File provenanceEventFile = LuceneUtil.getProvenanceLogFile(storageFileName,
                    allProvenanceLogFiles);
            if (provenanceEventFile == null) {
                logger.warn("Could not find Provenance Log File with "
                        + "basename {} in the Provenance Repository; assuming "
                        + "file has expired and continuing without it", storageFileName);
                continue;
            }

            try (final RecordReader reader = RecordReaders.newRecordReader(provenanceEventFile,
                    allProvenanceLogFiles, maxAttributeChars)) {
                final Iterator<Document> docIter = byStorageNameDocGroups.get(storageFileName).iterator();
                while (docIter.hasNext() && retrievalCount.getAndIncrement() < maxResults) {
                    final ProvenanceEventRecord event = getRecord(docIter.next(), reader);
                    if (event != null && authorizer.isAuthorized(event)) {
                        matchingRecords.add(event);
                        eventsReadThisFile++;
                    }
                }
            } catch (final Exception e) {
                logger.warn("Failed to read Provenance Events. The event file '"
                        + provenanceEventFile.getAbsolutePath() + "' may be missing or corrupt.", e);
            }
        }

        logger.debug("Read {} records from previous file", eventsReadThisFile);
        final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
        logger.debug("Took {} ms to read {} events from {} prov log files", millis, matchingRecords.size(),
                logFileCount);

        return matchingRecords;
    }
}