Example usage for org.apache.lucene.search IndexSearcher getIndexReader

List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher getIndexReader.

Prototype

public IndexReader getIndexReader() 

Source Link

Document

Return the IndexReader this searches.

Usage

From source file:org.sd.text.lucene.LuceneSearcher.java

License:Open Source License

public static void main(String[] args) throws IOException {
    //argi: path to lucene index dir

    for (int i = 0; i < args.length; ++i) {
        final LuceneSearcher luceneSearcher = new LuceneSearcher(args[i]);
        luceneSearcher.open();/*from  ww  w .  j  a  va  2  s . c  o  m*/

        final IndexSearcher indexSearcher = luceneSearcher.getIndexSearcher();
        final IndexReader indexReader = indexSearcher.getIndexReader();

        System.out.println("numDocs(" + args[i] + ")=" + indexReader.numDocs());

        luceneSearcher.close();
    }
}

From source file:org.sindice.siren.trec.SindiceDEIndexingTest.java

License:Apache License

@Test
public void testSimpleQueries() throws Exception {
    final Directory dir = new RAMDirectory();
    final File input = new File("./src/test/resources");
    final SindiceDEIndexing indexDE = new SindiceDEIndexing(input, dir);
    indexDE.indexIt();//  w  ww .jav a 2  s  .  c o  m

    final IndexSearcher searcher = new IndexSearcher(dir);

    // Search for any entities containing the term "rna"
    TopDocs td = searcher.search(new SirenTermQuery(outgoingField.createTerm("rna")), 10);
    assertEquals(1, td.totalHits);
    final Document doc = searcher.getIndexReader().document(td.scoreDocs[0].doc);
    assertEquals("http://eprints.rkbexplorer.com/id/caltech/eprints-7519", doc.get(Indexing.SUBJECT));

    // Search for any entities that have ther term "abstract" in the predicate (i.e., the cell 0 of the tuple).
    final SirenTupleQuery tq = new SirenTupleQuery();
    final SirenCellQuery cell = new SirenCellQuery(new SirenTermQuery(outgoingField.createTerm("abstract")));
    cell.setConstraint(0);
    tq.add(cell, Occur.MUST);
    td = searcher.search(tq, 10);
    assertEquals(2, td.totalHits);

    dir.close();
}

From source file:org.sonatype.nexus.index.Nexus5393IndexEntryDuplicationIT.java

License:Open Source License

protected void ensureUniqueness() throws IOException {
    final IndexingContext context = indexerManager.getRepositoryIndexContext("central");
    final HashSet<String> uinfos = new HashSet<String>();
    final ArrayList<String> duplicates = new ArrayList<String>();
    final IndexSearcher indexSearcher = context.acquireIndexSearcher();
    try {/*from  w  w  w.jav  a2 s .c o  m*/
        final IndexReader r = indexSearcher.getIndexReader();
        for (int i = 0; i < r.maxDoc(); i++) {
            if (!r.isDeleted(i)) {
                final Document d = r.document(i);
                String uinfo = d.get(ArtifactInfo.UINFO);
                if (uinfo != null && !uinfos.add(uinfo)) {
                    duplicates.add(uinfo);
                }
            }
        }
    } finally {
        context.releaseIndexSearcher(indexSearcher);
    }

    // remote proxy contains only one artifact: log4j-1.2.13: so we expect out index to have no
    // dupes and only one artifact
    if (!duplicates.isEmpty() || uinfos.size() > 1) {
        Assert.fail("UINFOs are duplicated or we scanned some unexpected ones, duplicates=" + duplicates
                + ", uinfos=" + uinfos);
    }
}

From source file:org.sonatype.nexus.index.Nexus5393IndexEntryDuplicationLocalTest.java

License:Open Source License

protected void ensureUniqueness() throws IOException {
    final IndexingContext context = indexerManager.getRepositoryIndexContext("releases");
    final HashSet<String> uinfos = new HashSet<String>();
    final ArrayList<String> duplicates = new ArrayList<String>();
    final IndexSearcher indexSearcher = context.acquireIndexSearcher();
    try {/*from  w w  w.j  av  a2s.  c  o m*/
        final IndexReader r = indexSearcher.getIndexReader();
        for (int i = 0; i < r.maxDoc(); i++) {
            if (!r.isDeleted(i)) {
                final Document d = r.document(i);
                String uinfo = d.get(ArtifactInfo.UINFO);
                if (uinfo != null && !uinfos.add(uinfo)) {
                    duplicates.add(uinfo);
                }
            }
        }
    } finally {
        context.releaseIndexSearcher(indexSearcher);
    }

    // remote proxy contains only one artifact: log4j-1.2.13: so we expect out index to have no
    // dupes and only one artifact
    if (!duplicates.isEmpty() || uinfos.size() > 1) {
        Assert.fail("UINFOs are duplicated or we scanned some unexpected ones, duplicates=" + duplicates
                + ", uinfos=" + uinfos);
    }
}

From source file:org.sonatype.nexus.ReindexIT.java

License:Open Source License

protected void shiftContextInTime(IndexingContext ctx, int shiftDays) throws IOException {
    if (shiftDays != 0) {
        final IndexWriter iw = ctx.getIndexWriter();
        final IndexSearcher is = ctx.acquireIndexSearcher();
        try {/*from  w w  w.j  a v  a  2  s  .  c  o m*/
            final IndexReader ir = is.getIndexReader();
            for (int docNum = 0; docNum < ir.maxDoc(); docNum++) {
                if (!ir.isDeleted(docNum)) {
                    Document doc = ir.document(docNum);

                    String lastModified = doc.get(ArtifactInfo.LAST_MODIFIED);

                    if (lastModified != null) {
                        long lm = Long.parseLong(lastModified);

                        lm = lm + (shiftDays * A_DAY_MILLIS);

                        doc.removeFields(ArtifactInfo.LAST_MODIFIED);

                        doc.add(new Field(ArtifactInfo.LAST_MODIFIED, Long.toString(lm), Field.Store.YES,
                                Field.Index.NO));

                        iw.updateDocument(new Term(ArtifactInfo.UINFO, doc.get(ArtifactInfo.UINFO)), doc);
                    }
                }
            }

            ctx.optimize();

            ctx.commit();

            // shift timestamp too
            if (ctx.getTimestamp() != null) {
                ctx.updateTimestamp(true, new Date(ctx.getTimestamp().getTime() + (shiftDays * A_DAY_MILLIS)));
            } else {
                ctx.updateTimestamp(true, new Date(System.currentTimeMillis() + (shiftDays * A_DAY_MILLIS)));
            }
        } finally {
            ctx.releaseIndexSearcher(is);
        }
    }
}

From source file:org.talend.dataquality.semantic.api.LocalDictionaryCache.java

License:Open Source License

private List<DQDocument> dqDocListFromTopDocs(String categoryName, TopDocs docs) throws IOException {
    mgr.maybeRefresh();//from   w w w.j  a  va2 s.c  om
    IndexSearcher searcher = mgr.acquire();
    IndexReader reader = searcher.getIndexReader();
    List<DQDocument> dqDocList = new ArrayList<>();
    for (ScoreDoc scoreDoc : docs.scoreDocs) {
        Document luceneDoc = reader.document(scoreDoc.doc);
        DQDocument dqDoc = DictionaryUtils.dictionaryEntryFromDocument(luceneDoc, categoryName);
        dqDocList.add(dqDoc);
    }
    mgr.release(searcher);
    return dqDocList;
}

From source file:org.talend.dataquality.semantic.api.LocalDictionaryCache.java

License:Open Source License

private Set<String> doSuggestValues(String categoryName, String input, int num, boolean isPrefixSearch) {
    String jointInput = DictionarySearcher.getJointTokens(input);
    String queryString = isPrefixSearch ? jointInput + "*" : "*" + jointInput + "*";

    final BooleanQuery booleanQuery = new BooleanQuery();
    final Query catQuery = new TermQuery(new Term(DictionarySearcher.F_WORD, categoryName));
    booleanQuery.add(catQuery, BooleanClause.Occur.MUST);
    final Query wildcardQuery = new WildcardQuery(new Term(DictionarySearcher.F_SYNTERM, queryString));
    booleanQuery.add(wildcardQuery, BooleanClause.Occur.MUST);

    Set<String> results = new TreeSet<String>();

    try {//www .  j a v a2 s  .c om
        mgr.maybeRefresh();
        IndexSearcher searcher = mgr.acquire();
        IndexReader reader = searcher.getIndexReader();
        TopDocs topDocs = searcher.search(booleanQuery, num);
        mgr.release(searcher);
        for (int i = 0; i < topDocs.scoreDocs.length; i++) {
            Document doc = reader.document(topDocs.scoreDocs[i].doc);
            IndexableField[] fields = doc.getFields(DictionarySearcher.F_RAW);
            for (IndexableField f : fields) {
                final String str = f.stringValue();
                if (isPrefixSearch) {
                    if (StringUtils.startsWithIgnoreCase(str, input) || StringUtils
                            .startsWithIgnoreCase(DictionarySearcher.getJointTokens(str), jointInput)) {
                        results.add(str);
                    }
                } else {// infix search
                    if (StringUtils.containsIgnoreCase(str, input) || StringUtils
                            .containsIgnoreCase(DictionarySearcher.getJointTokens(str), jointInput)) {
                        results.add(str);
                    }
                }
            }
        }
    } catch (IOException e) {
        LOGGER.trace(e.getMessage(), e);
    }
    return results;
}

From source file:org.talend.dataquality.semantic.index.DictionarySearcher.java

License:Open Source License

/**
 * Method "getNumDocs".//from  w w  w .j  a va  2 s. c om
 *
 * @return the number of documents in the index
 */
public int getNumDocs() {
    try {
        final IndexSearcher searcher = mgr.acquire();
        final int numDocs = searcher.getIndexReader().numDocs();
        mgr.release(searcher);
        return numDocs;
    } catch (IOException e) {
        LOGGER.error(e.getMessage(), e);
    }
    return -1;
}

From source file:org.tallison.lucene.search.concordance.charoffsets.SpansCrawler.java

License:Apache License

public static void crawl(SpanQuery query, Query filter, IndexSearcher searcher, DocTokenOffsetsVisitor visitor)
        throws IOException, TargetTokenNotFoundException {

    query = (SpanQuery) query.rewrite(searcher.getIndexReader());

    SpanWeight w = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f);
    if (filter == null) {
        for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {

            Spans spans = w.getSpans(ctx, SpanWeight.Postings.POSITIONS);
            if (spans == null) {
                continue;
            }/*from   w  w  w  .j  ava2s  .  c o m*/
            boolean cont = visitLeafReader(ctx, spans, visitor);
            if (!cont) {
                break;
            }
        }
    } else {
        filter = searcher.rewrite(filter);
        Weight searcherWeight = searcher.createWeight(filter, ScoreMode.COMPLETE_NO_SCORES, 1.0f);
        for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
            Scorer leafReaderContextScorer = searcherWeight.scorer(ctx);
            if (leafReaderContextScorer == null) {
                continue;
            }
            //Can we tell from the scorer that there were no hits?
            //in <= 5.x we could stop here if the filter query had no hits.

            Spans spans = w.getSpans(ctx, SpanWeight.Postings.POSITIONS);
            if (spans == null) {
                continue;
            }
            DocIdSetIterator filterItr = leafReaderContextScorer.iterator();

            if (filterItr == null || filterItr.equals(DocIdSetIterator.empty())) {
                continue;
            }
            boolean cont = visitLeafReader(ctx, spans, filterItr, visitor);
            if (!cont) {
                break;
            }
        }
    }
}

From source file:org.tallison.lucene.search.concordance.classic.ConcordanceSearcher.java

License:Apache License

/**
 * Like/*w  ww . j  a  v a 2  s  . com*/
 * {@link #search(IndexSearcher, String, Query, Query, Analyzer, AbstractConcordanceWindowCollector)}
 * but this takes a SpanQuery
 *
 * @param searcher    searcher
 * @param spanQuery query to use to identify the targets
 * @param filter    filter for document retrieval
 * @param analyzer  to re-analyze terms for window calculations and sort key building
 * @param collector to process (and store) the results
 * @throws TargetTokenNotFoundException if target token is not found
 * @throws IllegalArgumentException if the field can't be found in the main query
 * @throws java.io.IOException if there is an underlying IOException in the reader
 */
public void searchSpan(IndexSearcher searcher, SpanQuery spanQuery, Query filter, Analyzer analyzer,
        AbstractConcordanceWindowCollector collector)
        throws TargetTokenNotFoundException, IllegalArgumentException, IOException {

    Set<String> fields = new HashSet<>(windowBuilder.getFieldSelector());
    fields.add(spanQuery.getField());
    DocTokenOffsetsVisitor visitor = new ConcDTOffsetVisitor(spanQuery.getField(), analyzer, fields, collector);
    SpansCrawler.crawl(spanQuery, filter, searcher, visitor);

    collector.setTotalDocs(searcher.getIndexReader().numDocs());
}