List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader
public IndexReader getIndexReader()
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.DocumentSubsetReaderTests.java
License:Open Source License
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/32457") public void testSearch() throws Exception { IndexWriter iw = new IndexWriter(directory, newIndexWriterConfig()); Document document = new Document(); document.add(new StringField("field", "value1", Field.Store.NO)); iw.addDocument(document);//from w ww. j a v a 2 s.c o m document = new Document(); document.add(new StringField("field", "value2", Field.Store.NO)); iw.addDocument(document); document = new Document(); document.add(new StringField("field", "value3", Field.Store.NO)); iw.addDocument(document); document = new Document(); document.add(new StringField("field", "value4", Field.Store.NO)); iw.addDocument(document); iw.forceMerge(1); iw.deleteDocuments(new Term("field", "value3")); iw.close(); openDirectoryReader(); IndexSearcher indexSearcher = new IndexSearcher(DocumentSubsetReader.wrap(directoryReader, bitsetFilterCache, new TermQuery(new Term("field", "value1")))); assertThat(indexSearcher.getIndexReader().numDocs(), equalTo(1)); TopDocs result = indexSearcher.search(new MatchAllDocsQuery(), 1); assertThat(result.totalHits, equalTo(1L)); assertThat(result.scoreDocs[0].doc, equalTo(0)); indexSearcher = new IndexSearcher(DocumentSubsetReader.wrap(directoryReader, bitsetFilterCache, new TermQuery(new Term("field", "value2")))); assertThat(indexSearcher.getIndexReader().numDocs(), equalTo(1)); result = indexSearcher.search(new MatchAllDocsQuery(), 1); assertThat(result.totalHits, equalTo(1L)); assertThat(result.scoreDocs[0].doc, equalTo(1)); // this doc has been marked as deleted: indexSearcher = new IndexSearcher(DocumentSubsetReader.wrap(directoryReader, bitsetFilterCache, new TermQuery(new Term("field", "value3")))); assertThat(indexSearcher.getIndexReader().numDocs(), equalTo(0)); result = indexSearcher.search(new MatchAllDocsQuery(), 1); assertThat(result.totalHits, equalTo(0L)); indexSearcher = new IndexSearcher(DocumentSubsetReader.wrap(directoryReader, bitsetFilterCache, new TermQuery(new Term("field", "value4")))); assertThat(indexSearcher.getIndexReader().numDocs(), equalTo(1)); result = indexSearcher.search(new MatchAllDocsQuery(), 1); assertThat(result.totalHits, equalTo(1L)); assertThat(result.scoreDocs[0].doc, equalTo(3)); }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.SecurityIndexSearcherWrapper.java
License:Open Source License
@Override protected IndexSearcher wrap(IndexSearcher searcher) throws EngineException { if (licenseState.isSecurityEnabled() == false || licenseState.isDocumentAndFieldLevelSecurityAllowed() == false) { return searcher; }//from w ww . j av a 2 s. co m final DirectoryReader directoryReader = (DirectoryReader) searcher.getIndexReader(); if (directoryReader instanceof DocumentSubsetDirectoryReader) { // The reasons why we return a custom searcher: // 1) in the case the role query is sparse then large part of the main query can be skipped // 2) If the role query doesn't match with any docs in a segment, that a segment can be skipped IndexSearcher indexSearcher = new IndexSearcherWrapper((DocumentSubsetDirectoryReader) directoryReader); indexSearcher.setQueryCache(indexSearcher.getQueryCache()); indexSearcher.setQueryCachingPolicy(indexSearcher.getQueryCachingPolicy()); indexSearcher.setSimilarity(indexSearcher.getSimilarity(true)); return indexSearcher; } return searcher; }
From source file:org.entando.entando.aps.system.services.dataobjectsearchengine.SearcherDAO.java
License:Open Source License
private void releaseResources(IndexSearcher searcher) throws ApsSystemException { try {/*from w w w .ja v a 2 s . c o m*/ if (searcher != null) { searcher.getIndexReader().close(); } } catch (IOException e) { throw new ApsSystemException("Error closing searcher", e); } }
From source file:org.exist.indexing.lucene.LuceneIndexWorker.java
License:Open Source License
/** * SOLR/*from w ww . j av a 2 s.c o m*/ * @param context * @param toBeMatchedURIs * @param queryText * @return search report */ public NodeImpl search(final XQueryContext context, final List<String> toBeMatchedURIs, String queryText) throws XPathException { NodeImpl report = null; IndexSearcher searcher = null; try { // Get index searcher searcher = index.getSearcher(); // Get analyzer : to be retrieved from configuration final Analyzer searchAnalyzer = new StandardAnalyzer(Version.LUCENE_43); // Setup query Version, default field, analyzer final QueryParserWrapper parser = getQueryParser("", searchAnalyzer, null); final Query query = parser.parse(queryText); // extract all used fields from query final String[] fields = LuceneUtil.extractFields(query, searcher.getIndexReader()); final PlainTextHighlighter highlighter = new PlainTextHighlighter(query, searcher.getIndexReader()); final MemTreeBuilder builder = new MemTreeBuilder(); builder.startDocument(); // start root element final int nodeNr = builder.startElement("", "results", "results", null); // Perform actual search searcher.search(query, new Collector() { private Scorer scorer; private AtomicReader reader; @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } @Override public void collect(int docNum) throws IOException { Document doc = reader.document(docNum); // Get URI field of document String fDocUri = doc.get(FIELD_DOC_URI); // Get score float score = scorer.score(); // Check if document URI has a full match or if a // document is in a collection if (isDocumentMatch(fDocUri, toBeMatchedURIs)) { DocumentImpl storedDoc = null; try { // try to read document to check if user is allowed to access it storedDoc = context.getBroker().getXMLResource(XmldbURI.createInternal(fDocUri), Lock.READ_LOCK); if (storedDoc == null) { return; } // setup attributes AttributesImpl attribs = new AttributesImpl(); attribs.addAttribute("", "uri", "uri", "CDATA", fDocUri); attribs.addAttribute("", "score", "score", "CDATA", "" + score); // write element and attributes builder.startElement("", "search", "search", attribs); for (String field : fields) { String[] fieldContent = doc.getValues(field); attribs.clear(); attribs.addAttribute("", "name", "name", "CDATA", field); for (String content : fieldContent) { List<Offset> offsets = highlighter.getOffsets(content, searchAnalyzer); if (offsets != null) { builder.startElement("", "field", "field", attribs); highlighter.highlight(content, offsets, builder); builder.endElement(); } } } builder.endElement(); // clean attributes attribs.clear(); } catch (PermissionDeniedException e) { // not allowed to read the document: ignore the match. } finally { if (storedDoc != null) { storedDoc.getUpdateLock().release(Lock.READ_LOCK); } } } } @Override public void setNextReader(AtomicReaderContext atomicReaderContext) throws IOException { this.reader = atomicReaderContext.reader(); } @Override public boolean acceptsDocsOutOfOrder() { return true; } }); // finish root element builder.endElement(); //System.out.println(builder.getDocument().toString()); // TODO check report = ((org.exist.memtree.DocumentImpl) builder.getDocument()).getNode(nodeNr); } catch (Exception ex) { ex.printStackTrace(); LOG.error(ex); throw new XPathException(ex); } finally { index.releaseSearcher(searcher); } return report; }
From source file:org.exist.indexing.lucene.PlugToLucene.java
License:Open Source License
private NodeImpl search(List<String> toBeMatchedURIs, String queryText) throws XPathException { NodeImpl report = null;// w w w. j a v a2 s . com IndexSearcher searcher = null; try { // Get index searcher searcher = index.getSearcher(); // Get analyzer : to be retrieved from configuration Analyzer searchAnalyzer = new StandardAnalyzer(Version.LUCENE_29); // Setup query Version, default field, analyzer QueryParser parser = new QueryParser(Version.LUCENE_29, "", searchAnalyzer); Query query = parser.parse(queryText); // extract all used fields from query String[] fields = LuceneUtil.extractFields(query, searcher.getIndexReader()); // Setup collector for results LuceneHitCollector collector = new LuceneHitCollector(); // Perform actual search searcher.search(query, collector); // Retrieve all documents that match the query List<ScoreDoc> results = collector.getDocsByScore(); // reusable attributes AttributesImpl attribs = null; PlainTextHighlighter highlighter = new PlainTextHighlighter(query, searcher.getIndexReader()); MemTreeBuilder builder = new MemTreeBuilder(); builder.startDocument(); // start root element int nodeNr = builder.startElement("", "results", "results", null); BitVector processed = new BitVector(searcher.maxDoc()); // Process result documents for (ScoreDoc scoreDoc : results) { if (processed.get(scoreDoc.doc)) continue; processed.set(scoreDoc.doc); Document doc = searcher.doc(scoreDoc.doc); // Get URI field of document String fDocUri = doc.get(FIELD_META_DOC_URI); // Get score float score = scoreDoc.score; // Check if document URI has a full match or if a // document is in a collection if (isDocumentMatch(fDocUri, toBeMatchedURIs)) { // setup attributes attribs = new AttributesImpl(); attribs.addAttribute("", "uri", "uri", "CDATA", fDocUri); attribs.addAttribute("", "score", "score", "CDATA", "" + score); // write element and attributes builder.startElement("", "search", "search", attribs); for (String field : fields) { String[] fieldContent = doc.getValues(field); attribs.clear(); attribs.addAttribute("", "name", "name", "CDATA", field); for (String content : fieldContent) { List<Offset> offsets = highlighter.getOffsets(content, searchAnalyzer); if (offsets != null) { builder.startElement("", "field", "field", attribs); highlighter.highlight(content, offsets, builder); builder.endElement(); } } } builder.endElement(); // clean attributes attribs.clear(); } } // finish root element builder.endElement(); //System.out.println(builder.getDocument().toString()); // TODO check report = ((org.exist.memtree.DocumentImpl) builder.getDocument()).getNode(nodeNr); } catch (Exception ex) { ex.printStackTrace(); //LOG.error(ex); throw new XPathException(ex); } finally { index.releaseSearcher(searcher); } return report; }
From source file:org.exist.indexing.range.RangeIndexWorker.java
License:Open Source License
public NodeSet query(int contextId, DocumentSet docs, NodeSet contextSet, List<QName> qnames, AtomicValue[] keys, RangeIndex.Operator operator, int axis) throws IOException, XPathException { qnames = getDefinedIndexes(qnames);/*from w ww . j a v a2 s .c om*/ NodeSet resultSet = NodeSet.EMPTY_SET; IndexSearcher searcher = null; try { searcher = index.getSearcher(); for (QName qname : qnames) { Query query; String field = LuceneUtil.encodeQName(qname, index.getBrokerPool().getSymbols()); if (keys.length > 1) { BooleanQuery bool = new BooleanQuery(); for (AtomicValue key : keys) { bool.add(toQuery(field, qname, key, operator, docs), BooleanClause.Occur.SHOULD); } query = bool; } else { query = toQuery(field, qname, keys[0], operator, docs); } if (contextSet != null && contextSet.hasOne() && contextSet.getItemType() != Type.DOCUMENT) { NodesFilter filter = new NodesFilter(contextSet); filter.init(searcher.getIndexReader()); FilteredQuery filtered = new FilteredQuery(query, filter, FilteredQuery.LEAP_FROG_FILTER_FIRST_STRATEGY); resultSet = doQuery(contextId, docs, contextSet, axis, searcher, null, filtered, null); } else { resultSet = doQuery(contextId, docs, contextSet, axis, searcher, null, query, null); } } } finally { index.releaseSearcher(searcher); } return resultSet; }
From source file:org.exist.indexing.range.RangeIndexWorker.java
License:Open Source License
public NodeSet queryField(int contextId, DocumentSet docs, NodeSet contextSet, Sequence fields, Sequence[] keys, RangeIndex.Operator[] operators, int axis) throws IOException, XPathException { NodeSet resultSet = NodeSet.EMPTY_SET; IndexSearcher searcher = null; try {//from w ww.ja v a 2 s . co m searcher = index.getSearcher(); BooleanQuery query = new BooleanQuery(); int j = 0; for (SequenceIterator i = fields.iterate(); i.hasNext(); j++) { String field = i.nextItem().getStringValue(); if (keys[j].getItemCount() > 1) { BooleanQuery bool = new BooleanQuery(); bool.setMinimumNumberShouldMatch(1); for (SequenceIterator ki = keys[j].iterate(); ki.hasNext();) { Item key = ki.nextItem(); Query q = toQuery(field, null, key.atomize(), operators[j], docs); bool.add(q, BooleanClause.Occur.SHOULD); } query.add(bool, BooleanClause.Occur.MUST); } else { Query q = toQuery(field, null, keys[j].itemAt(0).atomize(), operators[j], docs); query.add(q, BooleanClause.Occur.MUST); } } Query qu = query; BooleanClause[] clauses = query.getClauses(); if (clauses.length == 1) { qu = clauses[0].getQuery(); } if (contextSet != null && contextSet.hasOne() && contextSet.getItemType() != Type.DOCUMENT) { NodesFilter filter = new NodesFilter(contextSet); filter.init(searcher.getIndexReader()); FilteredQuery filtered = new FilteredQuery(qu, filter, FilteredQuery.LEAP_FROG_FILTER_FIRST_STRATEGY); resultSet = doQuery(contextId, docs, contextSet, axis, searcher, null, filtered, null); } else { resultSet = doQuery(contextId, docs, contextSet, axis, searcher, null, qu, null); } } finally { index.releaseSearcher(searcher); } return resultSet; }
From source file:org.frontcache.cache.impl.LuceneIndexManager.java
License:Apache License
public long getDocumentsCount(String domain) { long count = -1; IndexWriter iWriter = null;//from w ww .j a v a 2 s . c om try { iWriter = getIndexWriter(); if (iWriter == null) { return count; } } catch (Exception e1) { logger.debug("Error during getting indexWriter. " + e1.getMessage()); return count; } IndexReader reader = null; try { reader = DirectoryReader.open(iWriter); Term domainTerm = new Term(DOMAIN_FIELD, domain); IndexSearcher searcher = new IndexSearcher(reader); TermStatistics termStat = searcher.termStatistics(domainTerm, TermContext.build(searcher.getIndexReader().getContext(), domainTerm)); count = termStat.docFreq(); } catch (Exception e1) { logger.debug("Error during reader.totalTermFreq(domainTerm). " + e1.getMessage()); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } return count; }
From source file:org.imixs.workflow.plugins.jee.extended.LucenePlugin.java
License:Open Source License
/** * Returns a ItemCollection List matching the provided search term. The * provided search team will we extended with a users roles to test the read * access level of each workitem matching the search term. The usernames and * user roles will be search lowercase!//from w w w.jav a 2 s . co m * * The optional param 'searchOrder' can be set to force lucene to sort the * search result by any search order. * * The optional param 'defaultOperator' can be set to Operator.AND * * @param sSearchTerm * @param workflowService * @param sortOrder * - optional to sort the result * @param defaultOperator * - optional to change the default search operator * @return collection of search result */ public static List<ItemCollection> search(String sSearchTerm, WorkflowService workflowService, Sort sortOrder, Operator defaultOperator) { ArrayList<ItemCollection> workitems = new ArrayList<ItemCollection>(); // test if searchtem is provided if (sSearchTerm == null || "".equals(sSearchTerm)) return workitems; long ltime = System.currentTimeMillis(); Properties prop = loadProperties(); if (prop.isEmpty()) return workitems; try { IndexSearcher searcher = createIndexSearcher(prop); QueryParser parser = createQueryParser(prop); // extend the Search Term if (!workflowService.isUserInRole(EntityService.ACCESSLEVEL_MANAGERACCESS)) { // get user names list List<String> userNameList = workflowService.getUserNameList(); // create search term String sAccessTerm = "($readaccess:ANONYMOUS"; for (String aRole : userNameList) { if (!"".equals(aRole)) sAccessTerm += " $readaccess:\"" + aRole + "\""; } sAccessTerm += ") AND "; sSearchTerm = sAccessTerm + sSearchTerm; } logger.fine(" lucene search:" + sSearchTerm); if (!"".equals(sSearchTerm)) { parser.setAllowLeadingWildcard(true); // set default operator? if (defaultOperator != null) parser.setDefaultOperator(defaultOperator); TopDocs topDocs = null; if (sortOrder != null) { topDocs = searcher.search(parser.parse(sSearchTerm), maxResult, sortOrder); } else { topDocs = searcher.search(parser.parse(sSearchTerm), maxResult); } logger.fine(" total hits=" + topDocs.totalHits); // Get an array of references to matched documents ScoreDoc[] scoreDosArray = topDocs.scoreDocs; for (ScoreDoc scoredoc : scoreDosArray) { // Retrieve the matched document and show relevant details Document doc = searcher.doc(scoredoc.doc); String sID = doc.get("$uniqueid"); logger.fine(" lucene $uniqueid=" + sID); ItemCollection itemCol = workflowService.getEntityService().load(sID); if (itemCol != null) { workitems.add(itemCol); } else { logger.warning("[LucenePlugin] index returned unreadable workitem : " + sID); // this situation happens if the search index returned // documents // the current user has no read access. // this should normally avoided with the $readaccess // search phrase! // So if this happens we need to check the // createDocument method! } } } searcher.getIndexReader().close(); logger.fine(" lucene serach: " + (System.currentTimeMillis() - ltime) + " ms"); } catch (Exception e) { logger.warning(" lucene error!"); e.printStackTrace(); } return workitems; }
From source file:org.jahia.services.search.facets.SimpleJahiaJcrFacets.java
License:Open Source License
/** * Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>. The field must have at most one indexed * token per document.//ww w. ja va 2 s. c o m */ public NamedList<Object> getFieldCacheCounts(IndexSearcher searcher, OpenBitSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix, String locale, ExtendedPropertyDefinition epd) throws IOException { // TODO: If the number of terms is high compared to docs.size(), and zeros==false, // we should use an alternate strategy to avoid // 1) creating another huge int[] for the counts // 2) looping over that huge int[] looking for the rare non-zeros. // // Yet another variation: if docs.size() is small and termvectors are stored, // then use them instead of the FieldCache. // // TODO: this function is too big and could use some refactoring, but // we also need a facet cache, and refactoring of SimpleFacets instead of // trying to pass all the various params around. FieldType ft = getType(epd); NamedList<Object> res = new NamedList<Object>(); FieldCache.StringIndex si = FieldCache.DEFAULT.getStringIndex(searcher.getIndexReader(), fieldName); final String[] terms = si.lookup; final int[] termNum = si.order; if (prefix != null && prefix.length() == 0) prefix = null; int startTermIndex, endTermIndex; if (prefix != null) { startTermIndex = Arrays.binarySearch(terms, prefix, nullStrComparator); if (startTermIndex < 0) startTermIndex = -startTermIndex - 1; // find the end term. \uffff isn't a legal unicode char, but only compareTo // is used, so it should be fine, and is guaranteed to be bigger than legal chars. endTermIndex = Arrays.binarySearch(terms, prefix + "\uffff\uffff\uffff\uffff", nullStrComparator); endTermIndex = -endTermIndex - 1; } else { startTermIndex = 1; endTermIndex = terms.length; } final int nTerms = endTermIndex - startTermIndex; if (nTerms > 0 && docs.size() >= mincount) { // count collection array only needs to be as big as the number of terms we are // going to collect counts for. final int[] counts = new int[nTerms]; DocIdSetIterator iter = docs.iterator(); while (iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int term = termNum[iter.docID()]; int arrIdx = term - startTermIndex; if (arrIdx >= 0 && arrIdx < nTerms) counts[arrIdx]++; } // IDEA: we could also maintain a count of "other"... everything that fell outside // of the top 'N' int off = offset; int lim = limit >= 0 ? limit : Integer.MAX_VALUE; if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) { int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1; maxsize = Math.min(maxsize, nTerms); final TreeSet<SimpleFacets.CountPair<String, Integer>> queue = new TreeSet<SimpleFacets.CountPair<String, Integer>>(); int min = mincount - 1; // the smallest value in the top 'N' values for (int i = 0; i < nTerms; i++) { int c = counts[i]; if (c > min) { // NOTE: we use c>min rather than c>=min as an optimization because we are going in // index order, so we already know that the keys are ordered. This can be very // important if a lot of the counts are repeated (like zero counts would be). queue.add(new SimpleFacets.CountPair<String, Integer>(terms[startTermIndex + i], c)); if (queue.size() >= maxsize) { break; } } } // now select the right page from the results for (SimpleFacets.CountPair<String, Integer> p : queue) { if (--off >= 0) continue; if (--lim < 0) break; res.add(ft.indexedToReadable(p.key), p.val); } } else { // add results in index order int i = 0; if (mincount <= 0) { // if mincount<=0, then we won't discard any terms and we know exactly // where to start. i = off; off = 0; } for (; i < nTerms; i++) { int c = counts[i]; if (c < mincount || --off >= 0) continue; if (--lim < 0) break; res.add(ft.indexedToReadable(terms[startTermIndex + i]), c); } } } if (missing) { res.add(null, getFieldMissingCount(searcher, docs, fieldName, locale)); } return res; }