Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException 

Source Link

Document

Returns the stored fields of the nth Document in this index.

Usage

From source file:org.apache.maven.index.context.DefaultIndexingContext.java

License:Apache License

public synchronized void merge(Directory directory, DocumentFilter filter) throws IOException {
    final IndexSearcher s = acquireIndexSearcher();
    try {/*ww  w. ja v a  2  s. com*/
        final IndexWriter w = getIndexWriter();
        final IndexReader directoryReader = DirectoryReader.open(directory);
        TopScoreDocCollector collector = null;
        try {
            int numDocs = directoryReader.maxDoc();

            Bits liveDocs = MultiFields.getLiveDocs(directoryReader);
            for (int i = 0; i < numDocs; i++) {
                if (liveDocs != null && !liveDocs.get(i)) {
                    continue;
                }

                Document d = directoryReader.document(i);
                if (filter != null && !filter.accept(d)) {
                    continue;
                }

                String uinfo = d.get(ArtifactInfo.UINFO);
                if (uinfo != null) {
                    collector = TopScoreDocCollector.create(1);
                    s.search(new TermQuery(new Term(ArtifactInfo.UINFO, uinfo)), collector);
                    if (collector.getTotalHits() == 0) {
                        w.addDocument(IndexUtils.updateDocument(d, this, false));
                    }
                } else {
                    String deleted = d.get(ArtifactInfo.DELETED);

                    if (deleted != null) {
                        // Deleting the document loses history that it was delete,
                        // so incrementals wont work. Therefore, put the delete
                        // document in as well
                        w.deleteDocuments(new Term(ArtifactInfo.UINFO, deleted));
                        w.addDocument(d);
                    }
                }
            }

        } finally {
            directoryReader.close();
            commit();
        }

        rebuildGroups();
        Date mergedTimestamp = IndexUtils.getTimestamp(directory);

        if (getTimestamp() != null && mergedTimestamp != null && mergedTimestamp.after(getTimestamp())) {
            // we have both, keep the newest
            updateTimestamp(true, mergedTimestamp);
        } else {
            updateTimestamp(true);
        }
        optimize();
    } finally {
        releaseIndexSearcher(s);
    }
}

From source file:org.apache.maven.index.context.DefaultIndexingContext.java

License:Apache License

public synchronized void rebuildGroups() throws IOException {
    final IndexSearcher is = acquireIndexSearcher();
    try {/*from   w w w . j  av a2 s .  co m*/
        final IndexReader r = is.getIndexReader();

        Set<String> rootGroups = new LinkedHashSet<String>();
        Set<String> allGroups = new LinkedHashSet<String>();

        int numDocs = r.maxDoc();
        Bits liveDocs = MultiFields.getLiveDocs(r);

        for (int i = 0; i < numDocs; i++) {
            if (liveDocs != null && !liveDocs.get(i)) {
                continue;
            }

            Document d = r.document(i);

            String uinfo = d.get(ArtifactInfo.UINFO);

            if (uinfo != null) {
                ArtifactInfo info = IndexUtils.constructArtifactInfo(d, this);
                rootGroups.add(info.getRootGroup());
                allGroups.add(info.getGroupId());
            }
        }

        setRootGroups(rootGroups);
        setAllGroups(allGroups);

        optimize();
    } finally {
        releaseIndexSearcher(is);
    }
}

From source file:org.apache.maven.index.DefaultScannerListener.java

License:Apache License

private void initialize(IndexingContext ctx) throws IOException, CorruptIndexException {
    final IndexSearcher indexSearcher = ctx.acquireIndexSearcher();
    try {//from   w  ww .  ja v a  2s.  co  m
        final IndexReader r = indexSearcher.getIndexReader();
        Bits liveDocs = MultiFields.getLiveDocs(r);

        for (int i = 0; i < r.maxDoc(); i++) {
            if (liveDocs == null || liveDocs.get(i)) {
                Document d = r.document(i);

                String uinfo = d.get(ArtifactInfo.UINFO);

                if (uinfo != null) {
                    // if ctx is receiving updates (in other words, is a proxy),
                    // there is no need to build a huge Set of strings with all uinfo's
                    // as deletion detection in those cases have no effect. Also, the
                    // removeDeletedArtifacts() method, that uses info gathered in this set
                    // is invoked with same condition. As indexes of Central are getting huge,
                    // the set grows enormously too, but is actually not used
                    if (!ctx.isReceivingUpdates()) {
                        uinfos.add(uinfo);
                    }

                    // add all existing groupIds to the lists, as they will
                    // not be "discovered" and would be missing from the new list..
                    String groupId = uinfo.substring(0, uinfo.indexOf('|'));
                    int n = groupId.indexOf('.');
                    groups.add(n == -1 ? groupId : groupId.substring(0, n));
                    allGroups.add(groupId);
                }
            }
        }
    } finally {
        ctx.releaseIndexSearcher(indexSearcher);
    }
}

From source file:org.apache.maven.index.incremental.DefaultIncrementalHandler.java

License:Apache License

private List<Integer> getIndexChunk(IndexPackingRequest request, Date timestamp) throws IOException {
    final List<Integer> chunk = new ArrayList<>();
    final IndexReader r = request.getIndexReader();
    Bits liveDocs = MultiFields.getLiveDocs(r);
    for (int i = 0; i < r.maxDoc(); i++) {
        if (liveDocs == null || liveDocs.get(i)) {
            Document d = r.document(i);

            String lastModified = d.get(ArtifactInfo.LAST_MODIFIED);

            if (lastModified != null) {
                Date t = new Date(Long.parseLong(lastModified));

                // Only add documents that were added after the last time we indexed
                if (t.after(timestamp)) {
                    chunk.add(i);/*from  w w w  .jav a  2 s .c o m*/
                }
            }
        }
    }

    return chunk;
}

From source file:org.apache.maven.index.Nexus737NexusIndexerTest.java

License:Apache License

public void testValidateUINFOs() throws Exception {
    IndexReader reader = context.acquireIndexSearcher().getIndexReader();
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int foundCount = 0;

    for (int i = 0; i < reader.maxDoc(); i++) {
        if (liveDocs == null || liveDocs.get(i)) {
            Document document = reader.document(i);

            String uinfo = document.get(ArtifactInfo.UINFO);

            if ("org.sonatype.nexus|nexus-webapp|1.0.0-SNAPSHOT|NA|jar".equals(uinfo)
                    || "org.sonatype.nexus|nexus-webapp|1.0.0-SNAPSHOT|bundle|zip".equals(uinfo)
                    || "org.sonatype.nexus|nexus-webapp|1.0.0-SNAPSHOT|bundle|tar.gz".equals(uinfo)) {
                foundCount++;//from   w  w w.j  a  va 2s. c o m
            }
        }
    }

    assertEquals(foundCount, 3);
}

From source file:org.apache.maven.index.updater.DefaultIndexUpdater.java

License:Apache License

private static void filterDirectory(final Directory directory, final DocumentFilter filter) throws IOException {
    IndexReader r = null;
    IndexWriter w = null;/*w ww. ja  v a2s  . c om*/
    try {
        r = DirectoryReader.open(directory);
        w = new NexusIndexWriter(directory, new NexusAnalyzer(), false);

        Bits liveDocs = MultiFields.getLiveDocs(r);

        int numDocs = r.maxDoc();

        for (int i = 0; i < numDocs; i++) {
            if (liveDocs != null && !liveDocs.get(i)) {
                continue;
            }

            Document d = r.document(i);

            if (!filter.accept(d)) {
                boolean success = w.tryDeleteDocument(r, i);
                // FIXME handle deletion failure
            }
        }
        w.commit();
    } finally {
        IndexUtils.close(r);
        IndexUtils.close(w);
    }

    w = null;
    try {
        // analyzer is unimportant, since we are not adding/searching to/on index, only reading/deleting
        w = new NexusIndexWriter(directory, new NexusAnalyzer(), false);

        w.commit();
    } finally {
        IndexUtils.close(w);
    }
}

From source file:org.apache.maven.index.updater.IndexDataTest.java

License:Apache License

private Map<String, ArtifactInfo> readIndex(IndexReader r1) throws CorruptIndexException, IOException {
    Map<String, ArtifactInfo> map = new HashMap<String, ArtifactInfo>();

    for (int i = 0; i < r1.maxDoc(); i++) {
        Document document = r1.document(i);

        ArtifactInfo ai = IndexUtils.constructArtifactInfo(document, context);

        if (ai != null) {
            map.put(ai.getUinfo(), ai);//from w w  w.  j a  va 2 s.  c  om
        }
    }

    return map;
}

From source file:org.apache.maven.index.updater.IndexDataWriter.java

License:Apache License

public int writeDocuments(IndexReader r, List<Integer> docIndexes) throws IOException {
    int n = 0;/*from w w  w.ja va 2  s  .com*/
    Bits liveDocs = MultiFields.getLiveDocs(r);

    if (docIndexes == null) {
        for (int i = 0; i < r.maxDoc(); i++) {
            if (liveDocs == null || liveDocs.get(i)) {
                if (writeDocument(r.document(i))) {
                    n++;
                }
            }
        }
    } else {
        for (int i : docIndexes) {
            if (liveDocs == null || liveDocs.get(i)) {
                if (writeDocument(r.document(i))) {
                    n++;
                }
            }
        }
    }

    return n;
}

From source file:org.apache.maven.indexer.examples.BasicUsageExample.java

License:Apache License

public void perform() throws IOException, ComponentLookupException, InvalidVersionSpecificationException {
    // Files where local cache is (if any) and Lucene Index should be located
    File centralLocalCache = new File("target/central-cache");
    File centralIndexDir = new File("target/central-index");

    // Creators we want to use (search for fields it defines)
    List<IndexCreator> indexers = new ArrayList<>();
    indexers.add(plexusContainer.lookup(IndexCreator.class, "min"));
    indexers.add(plexusContainer.lookup(IndexCreator.class, "jarContent"));
    indexers.add(plexusContainer.lookup(IndexCreator.class, "maven-plugin"));

    // Create context for central repository index
    centralContext = indexer.createIndexingContext("central-context", "central", centralLocalCache,
            centralIndexDir, "http://repo1.maven.org/maven2", null, true, true, indexers);

    // Update the index (incremental update will happen if this is not 1st run and files are not deleted)
    // This whole block below should not be executed on every app start, but rather controlled by some configuration
    // since this block will always emit at least one HTTP GET. Central indexes are updated once a week, but
    // other index sources might have different index publishing frequency.
    // Preferred frequency is once a week.
    if (true) {//from w  ww.  j a va 2s. c  o m
        System.out.println("Updating Index...");
        System.out.println("This might take a while on first run, so please be patient!");
        // Create ResourceFetcher implementation to be used with IndexUpdateRequest
        // Here, we use Wagon based one as shorthand, but all we need is a ResourceFetcher implementation
        TransferListener listener = new AbstractTransferListener() {
            public void transferStarted(TransferEvent transferEvent) {
                System.out.print("  Downloading " + transferEvent.getResource().getName());
            }

            public void transferProgress(TransferEvent transferEvent, byte[] buffer, int length) {
            }

            public void transferCompleted(TransferEvent transferEvent) {
                System.out.println(" - Done");
            }
        };
        ResourceFetcher resourceFetcher = new WagonHelper.WagonFetcher(httpWagon, listener, null, null);

        Date centralContextCurrentTimestamp = centralContext.getTimestamp();
        IndexUpdateRequest updateRequest = new IndexUpdateRequest(centralContext, resourceFetcher);
        IndexUpdateResult updateResult = indexUpdater.fetchAndUpdateIndex(updateRequest);
        if (updateResult.isFullUpdate()) {
            System.out.println("Full update happened!");
        } else if (updateResult.getTimestamp().equals(centralContextCurrentTimestamp)) {
            System.out.println("No update needed, index is up to date!");
        } else {
            System.out.println("Incremental update happened, change covered " + centralContextCurrentTimestamp
                    + " - " + updateResult.getTimestamp() + " period.");
        }

        System.out.println();
    }

    System.out.println();
    System.out.println("Using index");
    System.out.println("===========");
    System.out.println();

    // ====
    // Case:
    // dump all the GAVs
    // NOTE: will not actually execute do this below, is too long to do (Central is HUGE), but is here as code
    // example
    if (false) {
        final IndexSearcher searcher = centralContext.acquireIndexSearcher();
        try {
            final IndexReader ir = searcher.getIndexReader();
            Bits liveDocs = MultiFields.getLiveDocs(ir);
            for (int i = 0; i < ir.maxDoc(); i++) {
                if (liveDocs == null || liveDocs.get(i)) {
                    final Document doc = ir.document(i);
                    final ArtifactInfo ai = IndexUtils.constructArtifactInfo(doc, centralContext);
                    System.out.println(ai.getGroupId() + ":" + ai.getArtifactId() + ":" + ai.getVersion() + ":"
                            + ai.getClassifier() + " (sha1=" + ai.getSha1() + ")");
                }
            }
        } finally {
            centralContext.releaseIndexSearcher(searcher);
        }
    }

    // ====
    // Case:
    // Search for all GAVs with known G and A and having version greater than V

    final GenericVersionScheme versionScheme = new GenericVersionScheme();
    final String versionString = "1.5.0";
    final Version version = versionScheme.parseVersion(versionString);

    // construct the query for known GA
    final Query groupIdQ = indexer.constructQuery(MAVEN.GROUP_ID,
            new SourcedSearchExpression("org.sonatype.nexus"));
    final Query artifactIdQ = indexer.constructQuery(MAVEN.ARTIFACT_ID,
            new SourcedSearchExpression("nexus-api"));
    final BooleanQuery query = new BooleanQuery();
    query.add(groupIdQ, Occur.MUST);
    query.add(artifactIdQ, Occur.MUST);

    // we want "jar" artifacts only
    query.add(indexer.constructQuery(MAVEN.PACKAGING, new SourcedSearchExpression("jar")), Occur.MUST);
    // we want main artifacts only (no classifier)
    // Note: this below is unfinished API, needs fixing
    query.add(indexer.constructQuery(MAVEN.CLASSIFIER, new SourcedSearchExpression(Field.NOT_PRESENT)),
            Occur.MUST_NOT);

    // construct the filter to express "V greater than"
    final ArtifactInfoFilter versionFilter = new ArtifactInfoFilter() {
        public boolean accepts(final IndexingContext ctx, final ArtifactInfo ai) {
            try {
                final Version aiV = versionScheme.parseVersion(ai.getVersion());
                // Use ">=" if you are INCLUSIVE
                return aiV.compareTo(version) > 0;
            } catch (InvalidVersionSpecificationException e) {
                // do something here? be safe and include?
                return true;
            }
        }
    };

    System.out.println(
            "Searching for all GAVs with G=org.sonatype.nexus and nexus-api and having V greater than 1.5.0");
    final IteratorSearchRequest request = new IteratorSearchRequest(query,
            Collections.singletonList(centralContext), versionFilter);
    final IteratorSearchResponse response = indexer.searchIterator(request);
    for (ArtifactInfo ai : response) {
        System.out.println(ai.toString());
    }

    // Case:
    // Use index
    // Searching for some artifact
    Query gidQ = indexer.constructQuery(MAVEN.GROUP_ID,
            new SourcedSearchExpression("org.apache.maven.indexer"));
    Query aidQ = indexer.constructQuery(MAVEN.ARTIFACT_ID, new SourcedSearchExpression("indexer-artifact"));

    BooleanQuery bq = new BooleanQuery();
    bq.add(gidQ, Occur.MUST);
    bq.add(aidQ, Occur.MUST);

    searchAndDump(indexer, "all artifacts under GA org.apache.maven.indexer:indexer-artifact", bq);

    // Searching for some main artifact
    bq = new BooleanQuery();
    bq.add(gidQ, Occur.MUST);
    bq.add(aidQ, Occur.MUST);
    // bq.add( nexusIndexer.constructQuery( MAVEN.CLASSIFIER, new SourcedSearchExpression( "*" ) ), Occur.MUST_NOT
    // );

    searchAndDump(indexer, "main artifacts under GA org.apache.maven.indexer:indexer-artifact", bq);

    // doing sha1 search
    searchAndDump(indexer, "SHA1 7ab67e6b20e5332a7fb4fdf2f019aec4275846c2", indexer.constructQuery(MAVEN.SHA1,
            new SourcedSearchExpression("7ab67e6b20e5332a7fb4fdf2f019aec4275846c2")));

    searchAndDump(indexer, "SHA1 7ab67e6b20 (partial hash)",
            indexer.constructQuery(MAVEN.SHA1, new UserInputSearchExpression("7ab67e6b20")));

    // doing classname search (incomplete classname)
    searchAndDump(indexer,
            "classname DefaultNexusIndexer (note: Central does not publish classes in the index)",
            indexer.constructQuery(MAVEN.CLASSNAMES, new UserInputSearchExpression("DefaultNexusIndexer")));

    // doing search for all "canonical" maven plugins latest versions
    bq = new BooleanQuery();
    bq.add(indexer.constructQuery(MAVEN.PACKAGING, new SourcedSearchExpression("maven-plugin")), Occur.MUST);
    bq.add(indexer.constructQuery(MAVEN.GROUP_ID, new SourcedSearchExpression("org.apache.maven.plugins")),
            Occur.MUST);
    searchGroupedAndDump(indexer, "all \"canonical\" maven plugins", bq, new GAGrouping());

    // doing search for all archetypes latest versions
    searchGroupedAndDump(indexer, "all maven archetypes (latest versions)",
            indexer.constructQuery(MAVEN.PACKAGING, new SourcedSearchExpression("maven-archetype")),
            new GAGrouping());

    // close cleanly
    indexer.closeIndexingContext(centralContext, false);
}

From source file:org.apache.nifi.provenance.index.lucene.LuceneEventIndex.java

License:Apache License

long getMaxEventId(final String partitionName) {
    final List<File> allDirectories = getDirectoryManager().getDirectories(0L, Long.MAX_VALUE, partitionName);
    if (allDirectories.isEmpty()) {
        return -1L;
    }//from  www .  ja v a2s . c om

    Collections.sort(allDirectories, DirectoryUtils.NEWEST_INDEX_FIRST);

    for (final File directory : allDirectories) {
        final EventIndexSearcher searcher;
        try {
            searcher = indexManager.borrowIndexSearcher(directory);
        } catch (final IOException ioe) {
            logger.warn(
                    "Unable to read from Index Directory {}. Will assume that the index is incomplete and not consider this index when determining max event ID",
                    directory);
            continue;
        }

        try {
            final IndexReader reader = searcher.getIndexSearcher().getIndexReader();
            final int maxDocId = reader.maxDoc() - 1;
            final Document document = reader.document(maxDocId);
            final long eventId = document.getField(SearchableFields.Identifier.getSearchableFieldName())
                    .numericValue().longValue();
            logger.info(
                    "Determined that Max Event ID indexed for Partition {} is approximately {} based on index {}",
                    partitionName, eventId, directory);
            return eventId;
        } catch (final IOException ioe) {
            logger.warn(
                    "Unable to search Index Directory {}. Will assume that the index is incomplete and not consider this index when determining max event ID",
                    directory, ioe);
        } finally {
            indexManager.returnIndexSearcher(searcher);
        }
    }

    return -1L;
}