Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException 

Source Link

Document

Returns the stored fields of the nth Document in this index.

Usage

From source file:org.sindice.siren.demo.entity.EntityCentricIndexing.java

License:Apache License

public Document getDocument(final int docId) throws CorruptIndexException, IOException {
    final IndexReader reader = IndexReader.open(dir, true);
    try {//www. jav  a2s.c  o  m
        return reader.document(docId);
    } finally {
        reader.close();
    }
}

From source file:org.sindice.siren.search.TestSirenFuzzyQuery.java

License:Open Source License

/** Test the {@link TopTermsBoostOnlySirenBooleanQueryRewrite} rewrite method. */
@Test//from   w  w  w.java2  s  .  com
public void testBoostOnlyRewrite() throws Exception {
    final Directory directory = new RAMDirectory();
    final IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT,
            new TupleAnalyzer(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT),
                    new AnyURIAnalyzer(TEST_VERSION_CURRENT)));
    final IndexWriter writer = new IndexWriter(directory, conf);
    this.addDoc("Lucene", writer);
    this.addDoc("Lucene", writer);
    this.addDoc("Lucenne", writer);

    final IndexReader reader = IndexReader.open(directory);
    final IndexSearcher searcher = new IndexSearcher(directory);
    writer.close();

    final SirenFuzzyQuery query = new SirenFuzzyQuery(new Term("field", "Lucene"));
    query.setRewriteMethod(new SirenMultiTermQuery.TopTermsBoostOnlySirenBooleanQueryRewrite(50));
    final ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals(3, hits.length);
    // normally, 'Lucenne' would be the first result as IDF will skew the score.
    assertEquals(this.getTriple("Lucene"), reader.document(hits[0].doc).get("field"));
    assertEquals(this.getTriple("Lucene"), reader.document(hits[1].doc).get("field"));
    assertEquals(this.getTriple("Lucenne"), reader.document(hits[2].doc).get("field"));
    searcher.close();
    reader.close();
    directory.close();
}

From source file:org.sonatype.nexus.index.AbstractRepoNexusIndexerTest.java

License:Open Source License

public void testPackaging() throws Exception {
    IndexReader reader = context.getIndexReader();

    for (int i = 0; i < reader.numDocs(); i++) {
        if (!reader.isDeleted(i)) {
            Document document = reader.document(i);

            String uinfo = document.get(ArtifactInfo.UINFO);

            if (uinfo != null) {
                String info = document.get(ArtifactInfo.INFO);
                assertFalse(info.startsWith("null"));
            }/*from w w  w  .  j  a va 2 s  .  c o  m*/
        }
    }

    // {
    // Query query = new TermQuery( new Term( ArtifactInfo.PACKAGING, "jar" ) );
    // FlatSearchResponse response = nexusIndexer.searchFlat(new FlatSearchRequest(query));
    // assertEquals(response.getResults().toString(), 22, response.getTotalHits());
    // }
    {
        Query query = new TermQuery(new Term(ArtifactInfo.PACKAGING, "tar.gz"));
        FlatSearchResponse response = nexusIndexer.searchFlat(new FlatSearchRequest(query));
        assertEquals(response.getResults().toString(), 1, response.getTotalHits());

        ArtifactInfo ai = response.getResults().iterator().next();
        assertEquals("tar.gz", ai.packaging);
        assertEquals("tar.gz", ai.fextension);
    }
    {
        Query query = new TermQuery(new Term(ArtifactInfo.PACKAGING, "zip"));
        FlatSearchResponse response = nexusIndexer.searchFlat(new FlatSearchRequest(query));
        assertEquals(response.getResults().toString(), 1, response.getTotalHits());

        ArtifactInfo ai = response.getResults().iterator().next();
        assertEquals("zip", ai.packaging);
        assertEquals("zip", ai.fextension);
    }
}

From source file:org.sonatype.nexus.index.context.DefaultIndexingContext.java

License:Open Source License

public void merge(Directory directory, DocumentFilter filter) throws IOException {
    synchronized (indexLock) {
        closeReaders();/*from  www  . j av a  2 s.c  o m*/

        IndexWriter w = getIndexWriter();

        IndexSearcher s = getIndexSearcher();

        IndexReader r = IndexReader.open(directory);

        try {
            int numDocs = r.maxDoc();

            for (int i = 0; i < numDocs; i++) {
                if (r.isDeleted(i)) {
                    continue;
                }

                Document d = r.document(i);

                if (filter != null && !filter.accept(d)) {
                    continue;
                }

                String uinfo = d.get(ArtifactInfo.UINFO);

                if (uinfo != null) {
                    Hits hits = s.search(new TermQuery(new Term(ArtifactInfo.UINFO, uinfo)));

                    if (hits.length() == 0) {
                        w.addDocument(IndexUtils.updateDocument(d, this, false));
                    }
                } else {
                    String deleted = d.get(ArtifactInfo.DELETED);

                    if (deleted != null) {
                        // Deleting the document loses history that it was delete,
                        // so incrementals wont work. Therefore, put the delete
                        // document in as well
                        w.deleteDocuments(new Term(ArtifactInfo.UINFO, deleted));
                        w.addDocument(d);
                    }
                }
            }

        } finally {
            r.close();
            closeReaders();
        }

        rebuildGroups();

        Date mergedTimestamp = IndexUtils.getTimestamp(directory);

        if (getTimestamp() != null && mergedTimestamp != null && mergedTimestamp.after(getTimestamp())) {
            // we have both, keep the newest
            updateTimestamp(true, mergedTimestamp);
        } else {
            updateTimestamp(true);
        }

        optimize();
    }
}

From source file:org.sonatype.nexus.index.context.IndexUtils.java

License:Open Source License

/**
 * Used to rebuild group information, for example on context which were merged, since merge() of contexts 
 * only merges the Documents with UINFO record (Artifacts).
 *//*from w  w  w.  ja v a 2  s.  c  om*/
public static void rebuildGroups(IndexingContext context) throws IOException {
    IndexReader r = context.getIndexReader();

    Set<String> rootGroups = new LinkedHashSet<String>();
    Set<String> allGroups = new LinkedHashSet<String>();

    int numDocs = r.maxDoc();

    for (int i = 0; i < numDocs; i++) {
        if (r.isDeleted(i)) {
            continue;
        }

        Document d = r.document(i);

        String uinfo = d.get(ArtifactInfo.UINFO);

        if (uinfo != null) {
            ArtifactInfo info = IndexUtils.constructArtifactInfo(d, context);
            rootGroups.add(info.getRootGroup());
            allGroups.add(info.groupId);
        }
    }

    setRootGroups(context, rootGroups);
    setAllGroups(context, allGroups);

    context.getIndexWriter().optimize();
    context.getIndexWriter().flush();
}

From source file:org.sonatype.nexus.index.DefaultScannerListener.java

License:Open Source License

private void initialize(IndexingContext ctx) throws IOException, CorruptIndexException {
    IndexReader r = ctx.getIndexReader();

    for (int i = 0; i < r.numDocs(); i++) {
        if (!r.isDeleted(i)) {
            Document d = r.document(i);

            String uinfo = d.get(ArtifactInfo.UINFO);

            if (uinfo != null) {
                uinfos.add(uinfo);//from  www  .  j a v  a  2 s . c o  m

                // add all existing groupIds to the lists, as they will
                // not be "discovered" and would be missing from the new list..
                String groupId = uinfo.substring(0, uinfo.indexOf('|'));
                int n = groupId.indexOf('.');
                groups.add(n == -1 ? groupId : groupId.substring(0, n));
                allGroups.add(groupId);
            }
        }
    }
}

From source file:org.sonatype.nexus.index.Nexus5393IndexEntryDuplicationIT.java

License:Open Source License

protected void ensureUniqueness() throws IOException {
    final IndexingContext context = indexerManager.getRepositoryIndexContext("central");
    final HashSet<String> uinfos = new HashSet<String>();
    final ArrayList<String> duplicates = new ArrayList<String>();
    final IndexSearcher indexSearcher = context.acquireIndexSearcher();
    try {/*from w  w w.j a  va2s  .co m*/
        final IndexReader r = indexSearcher.getIndexReader();
        for (int i = 0; i < r.maxDoc(); i++) {
            if (!r.isDeleted(i)) {
                final Document d = r.document(i);
                String uinfo = d.get(ArtifactInfo.UINFO);
                if (uinfo != null && !uinfos.add(uinfo)) {
                    duplicates.add(uinfo);
                }
            }
        }
    } finally {
        context.releaseIndexSearcher(indexSearcher);
    }

    // remote proxy contains only one artifact: log4j-1.2.13: so we expect out index to have no
    // dupes and only one artifact
    if (!duplicates.isEmpty() || uinfos.size() > 1) {
        Assert.fail("UINFOs are duplicated or we scanned some unexpected ones, duplicates=" + duplicates
                + ", uinfos=" + uinfos);
    }
}

From source file:org.sonatype.nexus.index.Nexus5393IndexEntryDuplicationLocalTest.java

License:Open Source License

protected void ensureUniqueness() throws IOException {
    final IndexingContext context = indexerManager.getRepositoryIndexContext("releases");
    final HashSet<String> uinfos = new HashSet<String>();
    final ArrayList<String> duplicates = new ArrayList<String>();
    final IndexSearcher indexSearcher = context.acquireIndexSearcher();
    try {//from   ww  w  . ja  v  a2  s.  co m
        final IndexReader r = indexSearcher.getIndexReader();
        for (int i = 0; i < r.maxDoc(); i++) {
            if (!r.isDeleted(i)) {
                final Document d = r.document(i);
                String uinfo = d.get(ArtifactInfo.UINFO);
                if (uinfo != null && !uinfos.add(uinfo)) {
                    duplicates.add(uinfo);
                }
            }
        }
    } finally {
        context.releaseIndexSearcher(indexSearcher);
    }

    // remote proxy contains only one artifact: log4j-1.2.13: so we expect out index to have no
    // dupes and only one artifact
    if (!duplicates.isEmpty() || uinfos.size() > 1) {
        Assert.fail("UINFOs are duplicated or we scanned some unexpected ones, duplicates=" + duplicates
                + ", uinfos=" + uinfos);
    }
}

From source file:org.sonatype.nexus.index.Nexus737NexusIndexerTest.java

License:Open Source License

public void testValidateUINFOs() throws Exception {
    IndexReader reader = context.getIndexReader();

    int foundCount = 0;

    for (int i = 0; i < reader.numDocs(); i++) {
        Document document = reader.document(i);

        String uinfo = document.get(ArtifactInfo.UINFO);

        if ("org.sonatype.nexus|nexus-webapp|1.0.0-SNAPSHOT|NA".equals(uinfo)
                || "org.sonatype.nexus|nexus-webapp|1.0.0-SNAPSHOT|bundle|zip".equals(uinfo)
                || "org.sonatype.nexus|nexus-webapp|1.0.0-SNAPSHOT|bundle|tar.gz".equals(uinfo)) {
            foundCount++;//from w w  w  .  j a  va  2 s.c  om
        }
    }

    assertEquals(foundCount, 3);
}

From source file:org.sonatype.nexus.index.NexusScanningListener.java

License:Open Source License

/**
 * Used in {@code update} mode, deletes documents from index that are not found during scanning (means
 * they were deleted from the storage being scanned).
 *
 * @param contextPath//ww  w .ja va 2  s  . c  o  m
 * @return
 * @throws IOException
 */
private int removeDeletedArtifacts(final String contextPath) throws IOException {
    int deleted = 0;
    final IndexReader r = contextIndexSearcher.getIndexReader();
    for (int i = 0; i < r.maxDoc(); i++) {
        if (!r.isDeleted(i)) {
            final Document d = r.document(i);
            final String uinfo = d.get(ArtifactInfo.UINFO);
            if (uinfo != null && !processedUinfos.contains(uinfo)) {
                // file is not present in storage but is on index, delete it from index
                final String[] ra = ArtifactInfo.FS_PATTERN.split(uinfo);
                final ArtifactInfo ai = new ArtifactInfo();
                ai.repository = context.getRepositoryId();
                ai.groupId = ra[0];
                ai.artifactId = ra[1];
                ai.version = ra[2];
                if (ra.length > 3) {
                    ai.classifier = ArtifactInfo.renvl(ra[3]);
                }
                if (ra.length > 4) {
                    ai.packaging = ArtifactInfo.renvl(ra[4]);
                }

                // minimal ArtifactContext for removal
                final ArtifactContext ac = new ArtifactContext(null, null, null, ai, ai.calculateGav());
                if (contextPath == null
                        || context.getGavCalculator().gavToPath(ac.getGav()).startsWith(contextPath)) {
                    if (IndexOp.DELETED == remove(ac)) {
                        deleted++;
                    }
                }
            }
        }
    }
    return deleted;
}