Example usage for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:org.apache.maven.index.context.DefaultIndexingContext.java

License:Apache License

public synchronized void rebuildGroups() throws IOException {
    final IndexSearcher is = acquireIndexSearcher();
    try {/* w  w  w  . ja  v a2 s .co  m*/
        final IndexReader r = is.getIndexReader();

        Set<String> rootGroups = new LinkedHashSet<String>();
        Set<String> allGroups = new LinkedHashSet<String>();

        int numDocs = r.maxDoc();
        Bits liveDocs = MultiFields.getLiveDocs(r);

        for (int i = 0; i < numDocs; i++) {
            if (liveDocs != null && !liveDocs.get(i)) {
                continue;
            }

            Document d = r.document(i);

            String uinfo = d.get(ArtifactInfo.UINFO);

            if (uinfo != null) {
                ArtifactInfo info = IndexUtils.constructArtifactInfo(d, this);
                rootGroups.add(info.getRootGroup());
                allGroups.add(info.getGroupId());
            }
        }

        setRootGroups(rootGroups);
        setAllGroups(allGroups);

        optimize();
    } finally {
        releaseIndexSearcher(is);
    }
}

From source file:org.apache.maven.index.DefaultScannerListener.java

License:Apache License

private void initialize(IndexingContext ctx) throws IOException, CorruptIndexException {
    final IndexSearcher indexSearcher = ctx.acquireIndexSearcher();
    try {/*  ww w .  j a v  a 2  s  .c om*/
        final IndexReader r = indexSearcher.getIndexReader();
        Bits liveDocs = MultiFields.getLiveDocs(r);

        for (int i = 0; i < r.maxDoc(); i++) {
            if (liveDocs == null || liveDocs.get(i)) {
                Document d = r.document(i);

                String uinfo = d.get(ArtifactInfo.UINFO);

                if (uinfo != null) {
                    // if ctx is receiving updates (in other words, is a proxy),
                    // there is no need to build a huge Set of strings with all uinfo's
                    // as deletion detection in those cases have no effect. Also, the
                    // removeDeletedArtifacts() method, that uses info gathered in this set
                    // is invoked with same condition. As indexes of Central are getting huge,
                    // the set grows enormously too, but is actually not used
                    if (!ctx.isReceivingUpdates()) {
                        uinfos.add(uinfo);
                    }

                    // add all existing groupIds to the lists, as they will
                    // not be "discovered" and would be missing from the new list..
                    String groupId = uinfo.substring(0, uinfo.indexOf('|'));
                    int n = groupId.indexOf('.');
                    groups.add(n == -1 ? groupId : groupId.substring(0, n));
                    allGroups.add(groupId);
                }
            }
        }
    } finally {
        ctx.releaseIndexSearcher(indexSearcher);
    }
}

From source file:org.apache.maven.index.incremental.DefaultIncrementalHandler.java

License:Apache License

private List<Integer> getIndexChunk(IndexPackingRequest request, Date timestamp) throws IOException {
    final List<Integer> chunk = new ArrayList<>();
    final IndexReader r = request.getIndexReader();
    Bits liveDocs = MultiFields.getLiveDocs(r);
    for (int i = 0; i < r.maxDoc(); i++) {
        if (liveDocs == null || liveDocs.get(i)) {
            Document d = r.document(i);

            String lastModified = d.get(ArtifactInfo.LAST_MODIFIED);

            if (lastModified != null) {
                Date t = new Date(Long.parseLong(lastModified));

                // Only add documents that were added after the last time we indexed
                if (t.after(timestamp)) {
                    chunk.add(i);//from w  w  w  .  j a v  a 2  s.com
                }
            }
        }
    }

    return chunk;
}

From source file:org.apache.maven.index.Nexus737NexusIndexerTest.java

License:Apache License

public void testValidateUINFOs() throws Exception {
    IndexReader reader = context.acquireIndexSearcher().getIndexReader();
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int foundCount = 0;

    for (int i = 0; i < reader.maxDoc(); i++) {
        if (liveDocs == null || liveDocs.get(i)) {
            Document document = reader.document(i);

            String uinfo = document.get(ArtifactInfo.UINFO);

            if ("org.sonatype.nexus|nexus-webapp|1.0.0-SNAPSHOT|NA|jar".equals(uinfo)
                    || "org.sonatype.nexus|nexus-webapp|1.0.0-SNAPSHOT|bundle|zip".equals(uinfo)
                    || "org.sonatype.nexus|nexus-webapp|1.0.0-SNAPSHOT|bundle|tar.gz".equals(uinfo)) {
                foundCount++;//from  ww  w  .  j  a v  a2  s  . c o m
            }
        }
    }

    assertEquals(foundCount, 3);
}

From source file:org.apache.maven.index.updater.DefaultIndexUpdater.java

License:Apache License

private static void filterDirectory(final Directory directory, final DocumentFilter filter) throws IOException {
    IndexReader r = null;
    IndexWriter w = null;//  w w w  .  ja  v a 2 s .  c  o m
    try {
        r = DirectoryReader.open(directory);
        w = new NexusIndexWriter(directory, new NexusAnalyzer(), false);

        Bits liveDocs = MultiFields.getLiveDocs(r);

        int numDocs = r.maxDoc();

        for (int i = 0; i < numDocs; i++) {
            if (liveDocs != null && !liveDocs.get(i)) {
                continue;
            }

            Document d = r.document(i);

            if (!filter.accept(d)) {
                boolean success = w.tryDeleteDocument(r, i);
                // FIXME handle deletion failure
            }
        }
        w.commit();
    } finally {
        IndexUtils.close(r);
        IndexUtils.close(w);
    }

    w = null;
    try {
        // analyzer is unimportant, since we are not adding/searching to/on index, only reading/deleting
        w = new NexusIndexWriter(directory, new NexusAnalyzer(), false);

        w.commit();
    } finally {
        IndexUtils.close(w);
    }
}

From source file:org.apache.maven.index.updater.IndexDataTest.java

License:Apache License

private Map<String, ArtifactInfo> readIndex(IndexReader r1) throws CorruptIndexException, IOException {
    Map<String, ArtifactInfo> map = new HashMap<String, ArtifactInfo>();

    for (int i = 0; i < r1.maxDoc(); i++) {
        Document document = r1.document(i);

        ArtifactInfo ai = IndexUtils.constructArtifactInfo(document, context);

        if (ai != null) {
            map.put(ai.getUinfo(), ai);/*from  w ww .j ava  2 s  .c  o m*/
        }
    }

    return map;
}

From source file:org.apache.maven.index.updater.IndexDataWriter.java

License:Apache License

public int writeDocuments(IndexReader r, List<Integer> docIndexes) throws IOException {
    int n = 0;//from  w w w  .  j  a v a2  s  . c om
    Bits liveDocs = MultiFields.getLiveDocs(r);

    if (docIndexes == null) {
        for (int i = 0; i < r.maxDoc(); i++) {
            if (liveDocs == null || liveDocs.get(i)) {
                if (writeDocument(r.document(i))) {
                    n++;
                }
            }
        }
    } else {
        for (int i : docIndexes) {
            if (liveDocs == null || liveDocs.get(i)) {
                if (writeDocument(r.document(i))) {
                    n++;
                }
            }
        }
    }

    return n;
}

From source file:org.apache.maven.indexer.examples.BasicUsageExample.java

License:Apache License

public void perform() throws IOException, ComponentLookupException, InvalidVersionSpecificationException {
    // Files where local cache is (if any) and Lucene Index should be located
    File centralLocalCache = new File("target/central-cache");
    File centralIndexDir = new File("target/central-index");

    // Creators we want to use (search for fields it defines)
    List<IndexCreator> indexers = new ArrayList<>();
    indexers.add(plexusContainer.lookup(IndexCreator.class, "min"));
    indexers.add(plexusContainer.lookup(IndexCreator.class, "jarContent"));
    indexers.add(plexusContainer.lookup(IndexCreator.class, "maven-plugin"));

    // Create context for central repository index
    centralContext = indexer.createIndexingContext("central-context", "central", centralLocalCache,
            centralIndexDir, "http://repo1.maven.org/maven2", null, true, true, indexers);

    // Update the index (incremental update will happen if this is not 1st run and files are not deleted)
    // This whole block below should not be executed on every app start, but rather controlled by some configuration
    // since this block will always emit at least one HTTP GET. Central indexes are updated once a week, but
    // other index sources might have different index publishing frequency.
    // Preferred frequency is once a week.
    if (true) {//from w  w w .  j  av  a 2 s . co m
        System.out.println("Updating Index...");
        System.out.println("This might take a while on first run, so please be patient!");
        // Create ResourceFetcher implementation to be used with IndexUpdateRequest
        // Here, we use Wagon based one as shorthand, but all we need is a ResourceFetcher implementation
        TransferListener listener = new AbstractTransferListener() {
            public void transferStarted(TransferEvent transferEvent) {
                System.out.print("  Downloading " + transferEvent.getResource().getName());
            }

            public void transferProgress(TransferEvent transferEvent, byte[] buffer, int length) {
            }

            public void transferCompleted(TransferEvent transferEvent) {
                System.out.println(" - Done");
            }
        };
        ResourceFetcher resourceFetcher = new WagonHelper.WagonFetcher(httpWagon, listener, null, null);

        Date centralContextCurrentTimestamp = centralContext.getTimestamp();
        IndexUpdateRequest updateRequest = new IndexUpdateRequest(centralContext, resourceFetcher);
        IndexUpdateResult updateResult = indexUpdater.fetchAndUpdateIndex(updateRequest);
        if (updateResult.isFullUpdate()) {
            System.out.println("Full update happened!");
        } else if (updateResult.getTimestamp().equals(centralContextCurrentTimestamp)) {
            System.out.println("No update needed, index is up to date!");
        } else {
            System.out.println("Incremental update happened, change covered " + centralContextCurrentTimestamp
                    + " - " + updateResult.getTimestamp() + " period.");
        }

        System.out.println();
    }

    System.out.println();
    System.out.println("Using index");
    System.out.println("===========");
    System.out.println();

    // ====
    // Case:
    // dump all the GAVs
    // NOTE: will not actually execute do this below, is too long to do (Central is HUGE), but is here as code
    // example
    if (false) {
        final IndexSearcher searcher = centralContext.acquireIndexSearcher();
        try {
            final IndexReader ir = searcher.getIndexReader();
            Bits liveDocs = MultiFields.getLiveDocs(ir);
            for (int i = 0; i < ir.maxDoc(); i++) {
                if (liveDocs == null || liveDocs.get(i)) {
                    final Document doc = ir.document(i);
                    final ArtifactInfo ai = IndexUtils.constructArtifactInfo(doc, centralContext);
                    System.out.println(ai.getGroupId() + ":" + ai.getArtifactId() + ":" + ai.getVersion() + ":"
                            + ai.getClassifier() + " (sha1=" + ai.getSha1() + ")");
                }
            }
        } finally {
            centralContext.releaseIndexSearcher(searcher);
        }
    }

    // ====
    // Case:
    // Search for all GAVs with known G and A and having version greater than V

    final GenericVersionScheme versionScheme = new GenericVersionScheme();
    final String versionString = "1.5.0";
    final Version version = versionScheme.parseVersion(versionString);

    // construct the query for known GA
    final Query groupIdQ = indexer.constructQuery(MAVEN.GROUP_ID,
            new SourcedSearchExpression("org.sonatype.nexus"));
    final Query artifactIdQ = indexer.constructQuery(MAVEN.ARTIFACT_ID,
            new SourcedSearchExpression("nexus-api"));
    final BooleanQuery query = new BooleanQuery();
    query.add(groupIdQ, Occur.MUST);
    query.add(artifactIdQ, Occur.MUST);

    // we want "jar" artifacts only
    query.add(indexer.constructQuery(MAVEN.PACKAGING, new SourcedSearchExpression("jar")), Occur.MUST);
    // we want main artifacts only (no classifier)
    // Note: this below is unfinished API, needs fixing
    query.add(indexer.constructQuery(MAVEN.CLASSIFIER, new SourcedSearchExpression(Field.NOT_PRESENT)),
            Occur.MUST_NOT);

    // construct the filter to express "V greater than"
    final ArtifactInfoFilter versionFilter = new ArtifactInfoFilter() {
        public boolean accepts(final IndexingContext ctx, final ArtifactInfo ai) {
            try {
                final Version aiV = versionScheme.parseVersion(ai.getVersion());
                // Use ">=" if you are INCLUSIVE
                return aiV.compareTo(version) > 0;
            } catch (InvalidVersionSpecificationException e) {
                // do something here? be safe and include?
                return true;
            }
        }
    };

    System.out.println(
            "Searching for all GAVs with G=org.sonatype.nexus and nexus-api and having V greater than 1.5.0");
    final IteratorSearchRequest request = new IteratorSearchRequest(query,
            Collections.singletonList(centralContext), versionFilter);
    final IteratorSearchResponse response = indexer.searchIterator(request);
    for (ArtifactInfo ai : response) {
        System.out.println(ai.toString());
    }

    // Case:
    // Use index
    // Searching for some artifact
    Query gidQ = indexer.constructQuery(MAVEN.GROUP_ID,
            new SourcedSearchExpression("org.apache.maven.indexer"));
    Query aidQ = indexer.constructQuery(MAVEN.ARTIFACT_ID, new SourcedSearchExpression("indexer-artifact"));

    BooleanQuery bq = new BooleanQuery();
    bq.add(gidQ, Occur.MUST);
    bq.add(aidQ, Occur.MUST);

    searchAndDump(indexer, "all artifacts under GA org.apache.maven.indexer:indexer-artifact", bq);

    // Searching for some main artifact
    bq = new BooleanQuery();
    bq.add(gidQ, Occur.MUST);
    bq.add(aidQ, Occur.MUST);
    // bq.add( nexusIndexer.constructQuery( MAVEN.CLASSIFIER, new SourcedSearchExpression( "*" ) ), Occur.MUST_NOT
    // );

    searchAndDump(indexer, "main artifacts under GA org.apache.maven.indexer:indexer-artifact", bq);

    // doing sha1 search
    searchAndDump(indexer, "SHA1 7ab67e6b20e5332a7fb4fdf2f019aec4275846c2", indexer.constructQuery(MAVEN.SHA1,
            new SourcedSearchExpression("7ab67e6b20e5332a7fb4fdf2f019aec4275846c2")));

    searchAndDump(indexer, "SHA1 7ab67e6b20 (partial hash)",
            indexer.constructQuery(MAVEN.SHA1, new UserInputSearchExpression("7ab67e6b20")));

    // doing classname search (incomplete classname)
    searchAndDump(indexer,
            "classname DefaultNexusIndexer (note: Central does not publish classes in the index)",
            indexer.constructQuery(MAVEN.CLASSNAMES, new UserInputSearchExpression("DefaultNexusIndexer")));

    // doing search for all "canonical" maven plugins latest versions
    bq = new BooleanQuery();
    bq.add(indexer.constructQuery(MAVEN.PACKAGING, new SourcedSearchExpression("maven-plugin")), Occur.MUST);
    bq.add(indexer.constructQuery(MAVEN.GROUP_ID, new SourcedSearchExpression("org.apache.maven.plugins")),
            Occur.MUST);
    searchGroupedAndDump(indexer, "all \"canonical\" maven plugins", bq, new GAGrouping());

    // doing search for all archetypes latest versions
    searchGroupedAndDump(indexer, "all maven archetypes (latest versions)",
            indexer.constructQuery(MAVEN.PACKAGING, new SourcedSearchExpression("maven-archetype")),
            new GAGrouping());

    // close cleanly
    indexer.closeIndexingContext(centralContext, false);
}

From source file:org.apache.nifi.provenance.index.lucene.LuceneEventIndex.java

License:Apache License

long getMaxEventId(final String partitionName) {
    final List<File> allDirectories = getDirectoryManager().getDirectories(0L, Long.MAX_VALUE, partitionName);
    if (allDirectories.isEmpty()) {
        return -1L;
    }/* www .j  ava  2s.c o  m*/

    Collections.sort(allDirectories, DirectoryUtils.NEWEST_INDEX_FIRST);

    for (final File directory : allDirectories) {
        final EventIndexSearcher searcher;
        try {
            searcher = indexManager.borrowIndexSearcher(directory);
        } catch (final IOException ioe) {
            logger.warn(
                    "Unable to read from Index Directory {}. Will assume that the index is incomplete and not consider this index when determining max event ID",
                    directory);
            continue;
        }

        try {
            final IndexReader reader = searcher.getIndexSearcher().getIndexReader();
            final int maxDocId = reader.maxDoc() - 1;
            final Document document = reader.document(maxDocId);
            final long eventId = document.getField(SearchableFields.Identifier.getSearchableFieldName())
                    .numericValue().longValue();
            logger.info(
                    "Determined that Max Event ID indexed for Partition {} is approximately {} based on index {}",
                    partitionName, eventId, directory);
            return eventId;
        } catch (final IOException ioe) {
            logger.warn(
                    "Unable to search Index Directory {}. Will assume that the index is incomplete and not consider this index when determining max event ID",
                    directory, ioe);
        } finally {
            indexManager.returnIndexSearcher(searcher);
        }
    }

    return -1L;
}

From source file:org.apache.nutch.indexer.IndexSorter.java

License:Apache License

private static int[] oldToNew(IndexReader reader) throws IOException {
    int readerMax = reader.maxDoc();
    DocScore[] newToOld = new DocScore[readerMax];

    // use site, an indexed, un-tokenized field to get boost
    byte[] boosts = reader.norms("site");

    for (int oldDoc = 0; oldDoc < readerMax; oldDoc++) {
        float score;
        if (reader.isDeleted(oldDoc)) {
            score = 0.0f;//from w w  w .  j  a  v a 2s.com
        } else {
            score = Similarity.decodeNorm(boosts[oldDoc]);
        }
        DocScore docScore = new DocScore();
        docScore.oldDoc = oldDoc;
        docScore.score = score;
        newToOld[oldDoc] = docScore;
    }
    Arrays.sort(newToOld);

    int[] oldToNew = new int[readerMax];
    for (int newDoc = 0; newDoc < readerMax; newDoc++) {
        DocScore docScore = newToOld[newDoc];
        oldToNew[docScore.oldDoc] = docScore.score > 0.0f ? newDoc : -1;
    }
    return oldToNew;
}