List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader
public IndexReader getIndexReader()
From source file:org.sd.text.lucene.LuceneSearcher.java
License:Open Source License
public static void main(String[] args) throws IOException { //argi: path to lucene index dir for (int i = 0; i < args.length; ++i) { final LuceneSearcher luceneSearcher = new LuceneSearcher(args[i]); luceneSearcher.open();/*from ww w . j a va 2 s . c o m*/ final IndexSearcher indexSearcher = luceneSearcher.getIndexSearcher(); final IndexReader indexReader = indexSearcher.getIndexReader(); System.out.println("numDocs(" + args[i] + ")=" + indexReader.numDocs()); luceneSearcher.close(); } }
From source file:org.sindice.siren.trec.SindiceDEIndexingTest.java
License:Apache License
@Test public void testSimpleQueries() throws Exception { final Directory dir = new RAMDirectory(); final File input = new File("./src/test/resources"); final SindiceDEIndexing indexDE = new SindiceDEIndexing(input, dir); indexDE.indexIt();// w ww .jav a 2 s . c o m final IndexSearcher searcher = new IndexSearcher(dir); // Search for any entities containing the term "rna" TopDocs td = searcher.search(new SirenTermQuery(outgoingField.createTerm("rna")), 10); assertEquals(1, td.totalHits); final Document doc = searcher.getIndexReader().document(td.scoreDocs[0].doc); assertEquals("http://eprints.rkbexplorer.com/id/caltech/eprints-7519", doc.get(Indexing.SUBJECT)); // Search for any entities that have ther term "abstract" in the predicate (i.e., the cell 0 of the tuple). final SirenTupleQuery tq = new SirenTupleQuery(); final SirenCellQuery cell = new SirenCellQuery(new SirenTermQuery(outgoingField.createTerm("abstract"))); cell.setConstraint(0); tq.add(cell, Occur.MUST); td = searcher.search(tq, 10); assertEquals(2, td.totalHits); dir.close(); }
From source file:org.sonatype.nexus.index.Nexus5393IndexEntryDuplicationIT.java
License:Open Source License
protected void ensureUniqueness() throws IOException { final IndexingContext context = indexerManager.getRepositoryIndexContext("central"); final HashSet<String> uinfos = new HashSet<String>(); final ArrayList<String> duplicates = new ArrayList<String>(); final IndexSearcher indexSearcher = context.acquireIndexSearcher(); try {/*from w w w.jav a2 s .c o m*/ final IndexReader r = indexSearcher.getIndexReader(); for (int i = 0; i < r.maxDoc(); i++) { if (!r.isDeleted(i)) { final Document d = r.document(i); String uinfo = d.get(ArtifactInfo.UINFO); if (uinfo != null && !uinfos.add(uinfo)) { duplicates.add(uinfo); } } } } finally { context.releaseIndexSearcher(indexSearcher); } // remote proxy contains only one artifact: log4j-1.2.13: so we expect out index to have no // dupes and only one artifact if (!duplicates.isEmpty() || uinfos.size() > 1) { Assert.fail("UINFOs are duplicated or we scanned some unexpected ones, duplicates=" + duplicates + ", uinfos=" + uinfos); } }
From source file:org.sonatype.nexus.index.Nexus5393IndexEntryDuplicationLocalTest.java
License:Open Source License
protected void ensureUniqueness() throws IOException { final IndexingContext context = indexerManager.getRepositoryIndexContext("releases"); final HashSet<String> uinfos = new HashSet<String>(); final ArrayList<String> duplicates = new ArrayList<String>(); final IndexSearcher indexSearcher = context.acquireIndexSearcher(); try {/*from w w w.j av a2s. c o m*/ final IndexReader r = indexSearcher.getIndexReader(); for (int i = 0; i < r.maxDoc(); i++) { if (!r.isDeleted(i)) { final Document d = r.document(i); String uinfo = d.get(ArtifactInfo.UINFO); if (uinfo != null && !uinfos.add(uinfo)) { duplicates.add(uinfo); } } } } finally { context.releaseIndexSearcher(indexSearcher); } // remote proxy contains only one artifact: log4j-1.2.13: so we expect out index to have no // dupes and only one artifact if (!duplicates.isEmpty() || uinfos.size() > 1) { Assert.fail("UINFOs are duplicated or we scanned some unexpected ones, duplicates=" + duplicates + ", uinfos=" + uinfos); } }
From source file:org.sonatype.nexus.ReindexIT.java
License:Open Source License
protected void shiftContextInTime(IndexingContext ctx, int shiftDays) throws IOException { if (shiftDays != 0) { final IndexWriter iw = ctx.getIndexWriter(); final IndexSearcher is = ctx.acquireIndexSearcher(); try {/*from w w w.j a v a 2 s . c o m*/ final IndexReader ir = is.getIndexReader(); for (int docNum = 0; docNum < ir.maxDoc(); docNum++) { if (!ir.isDeleted(docNum)) { Document doc = ir.document(docNum); String lastModified = doc.get(ArtifactInfo.LAST_MODIFIED); if (lastModified != null) { long lm = Long.parseLong(lastModified); lm = lm + (shiftDays * A_DAY_MILLIS); doc.removeFields(ArtifactInfo.LAST_MODIFIED); doc.add(new Field(ArtifactInfo.LAST_MODIFIED, Long.toString(lm), Field.Store.YES, Field.Index.NO)); iw.updateDocument(new Term(ArtifactInfo.UINFO, doc.get(ArtifactInfo.UINFO)), doc); } } } ctx.optimize(); ctx.commit(); // shift timestamp too if (ctx.getTimestamp() != null) { ctx.updateTimestamp(true, new Date(ctx.getTimestamp().getTime() + (shiftDays * A_DAY_MILLIS))); } else { ctx.updateTimestamp(true, new Date(System.currentTimeMillis() + (shiftDays * A_DAY_MILLIS))); } } finally { ctx.releaseIndexSearcher(is); } } }
From source file:org.talend.dataquality.semantic.api.LocalDictionaryCache.java
License:Open Source License
private List<DQDocument> dqDocListFromTopDocs(String categoryName, TopDocs docs) throws IOException { mgr.maybeRefresh();//from w w w.j a va2 s.c om IndexSearcher searcher = mgr.acquire(); IndexReader reader = searcher.getIndexReader(); List<DQDocument> dqDocList = new ArrayList<>(); for (ScoreDoc scoreDoc : docs.scoreDocs) { Document luceneDoc = reader.document(scoreDoc.doc); DQDocument dqDoc = DictionaryUtils.dictionaryEntryFromDocument(luceneDoc, categoryName); dqDocList.add(dqDoc); } mgr.release(searcher); return dqDocList; }
From source file:org.talend.dataquality.semantic.api.LocalDictionaryCache.java
License:Open Source License
private Set<String> doSuggestValues(String categoryName, String input, int num, boolean isPrefixSearch) { String jointInput = DictionarySearcher.getJointTokens(input); String queryString = isPrefixSearch ? jointInput + "*" : "*" + jointInput + "*"; final BooleanQuery booleanQuery = new BooleanQuery(); final Query catQuery = new TermQuery(new Term(DictionarySearcher.F_WORD, categoryName)); booleanQuery.add(catQuery, BooleanClause.Occur.MUST); final Query wildcardQuery = new WildcardQuery(new Term(DictionarySearcher.F_SYNTERM, queryString)); booleanQuery.add(wildcardQuery, BooleanClause.Occur.MUST); Set<String> results = new TreeSet<String>(); try {//www . j a v a2 s .c om mgr.maybeRefresh(); IndexSearcher searcher = mgr.acquire(); IndexReader reader = searcher.getIndexReader(); TopDocs topDocs = searcher.search(booleanQuery, num); mgr.release(searcher); for (int i = 0; i < topDocs.scoreDocs.length; i++) { Document doc = reader.document(topDocs.scoreDocs[i].doc); IndexableField[] fields = doc.getFields(DictionarySearcher.F_RAW); for (IndexableField f : fields) { final String str = f.stringValue(); if (isPrefixSearch) { if (StringUtils.startsWithIgnoreCase(str, input) || StringUtils .startsWithIgnoreCase(DictionarySearcher.getJointTokens(str), jointInput)) { results.add(str); } } else {// infix search if (StringUtils.containsIgnoreCase(str, input) || StringUtils .containsIgnoreCase(DictionarySearcher.getJointTokens(str), jointInput)) { results.add(str); } } } } } catch (IOException e) { LOGGER.trace(e.getMessage(), e); } return results; }
From source file:org.talend.dataquality.semantic.index.DictionarySearcher.java
License:Open Source License
/** * Method "getNumDocs".//from w w w .j a va 2 s. c om * * @return the number of documents in the index */ public int getNumDocs() { try { final IndexSearcher searcher = mgr.acquire(); final int numDocs = searcher.getIndexReader().numDocs(); mgr.release(searcher); return numDocs; } catch (IOException e) { LOGGER.error(e.getMessage(), e); } return -1; }
From source file:org.tallison.lucene.search.concordance.charoffsets.SpansCrawler.java
License:Apache License
public static void crawl(SpanQuery query, Query filter, IndexSearcher searcher, DocTokenOffsetsVisitor visitor) throws IOException, TargetTokenNotFoundException { query = (SpanQuery) query.rewrite(searcher.getIndexReader()); SpanWeight w = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f); if (filter == null) { for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) { Spans spans = w.getSpans(ctx, SpanWeight.Postings.POSITIONS); if (spans == null) { continue; }/*from w w w .j ava2s . c o m*/ boolean cont = visitLeafReader(ctx, spans, visitor); if (!cont) { break; } } } else { filter = searcher.rewrite(filter); Weight searcherWeight = searcher.createWeight(filter, ScoreMode.COMPLETE_NO_SCORES, 1.0f); for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) { Scorer leafReaderContextScorer = searcherWeight.scorer(ctx); if (leafReaderContextScorer == null) { continue; } //Can we tell from the scorer that there were no hits? //in <= 5.x we could stop here if the filter query had no hits. Spans spans = w.getSpans(ctx, SpanWeight.Postings.POSITIONS); if (spans == null) { continue; } DocIdSetIterator filterItr = leafReaderContextScorer.iterator(); if (filterItr == null || filterItr.equals(DocIdSetIterator.empty())) { continue; } boolean cont = visitLeafReader(ctx, spans, filterItr, visitor); if (!cont) { break; } } } }
From source file:org.tallison.lucene.search.concordance.classic.ConcordanceSearcher.java
License:Apache License
/** * Like/*w ww . j a v a 2 s . com*/ * {@link #search(IndexSearcher, String, Query, Query, Analyzer, AbstractConcordanceWindowCollector)} * but this takes a SpanQuery * * @param searcher searcher * @param spanQuery query to use to identify the targets * @param filter filter for document retrieval * @param analyzer to re-analyze terms for window calculations and sort key building * @param collector to process (and store) the results * @throws TargetTokenNotFoundException if target token is not found * @throws IllegalArgumentException if the field can't be found in the main query * @throws java.io.IOException if there is an underlying IOException in the reader */ public void searchSpan(IndexSearcher searcher, SpanQuery spanQuery, Query filter, Analyzer analyzer, AbstractConcordanceWindowCollector collector) throws TargetTokenNotFoundException, IllegalArgumentException, IOException { Set<String> fields = new HashSet<>(windowBuilder.getFieldSelector()); fields.add(spanQuery.getField()); DocTokenOffsetsVisitor visitor = new ConcDTOffsetVisitor(spanQuery.getField(), analyzer, fields, collector); SpansCrawler.crawl(spanQuery, filter, searcher, visitor); collector.setTotalDocs(searcher.getIndexReader().numDocs()); }