List of usage examples for org.apache.lucene.search IndexSearcher searchAfter
public TopDocs searchAfter(ScoreDoc after, Query query, int numHits) throws IOException
n
hits for query
where all results are after a previous result (after
). From source file:com.github.wxiaoqi.search.lucene.LuceneDao.java
License:Open Source License
public TableResultResponse<IndexObject> page(Integer pageNumber, Integer pageSize, String keyword) { IndexReader indexReader = null;/*from www .j a v a2s.c o m*/ TableResultResponse<IndexObject> pageQuery = null; List<IndexObject> searchResults = new ArrayList<>(); try { indexReader = DirectoryReader.open(this.getDirectory()); IndexSearcher indexSearcher = new IndexSearcher(indexReader); Query query = QueryUtil.query(keyword, this.getAnalyzer(), "title", "descripton"); ScoreDoc lastScoreDoc = this.getLastScoreDoc(pageNumber, pageSize, query, indexSearcher); /*?documentsearchAfter */ TopDocs topDocs = indexSearcher.searchAfter(lastScoreDoc, query, pageSize); Highlighter highlighter = this.addStringHighlighter(query); log.info("??{}", keyword); log.info("{}", topDocs.totalHits); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { int docID = scoreDoc.doc; float score = scoreDoc.score; Document document = indexSearcher.doc(docID); IndexObject indexObject = DocumentUtil.document2IndexObject(this.getAnalyzer(), highlighter, document, score); searchResults.add(indexObject); log.info("" + score); } Collections.sort(searchResults); pageQuery = new TableResultResponse<>(topDocs.totalHits, searchResults); } catch (Exception e) { e.printStackTrace(); } finally { try { indexReader.close(); } catch (IOException e) { e.printStackTrace(); } } return pageQuery; }
From source file:com.stratio.cassandra.index.LuceneIndex.java
License:Apache License
private TopDocs topDocs(IndexSearcher searcher, Query query, Sort sort, ScoreDoc after, int count, boolean usesRelevance) throws IOException { if (sort == null) { if (!usesRelevance) { FieldDoc start = after == null ? null : (FieldDoc) after; TopFieldCollector tfc = TopFieldCollector.create(this.sort, count, start, true, false, false, false);//from w w w . ja v a2s . com Collector collector = new EarlyTerminatingSortingCollector(tfc, this.sort, count); searcher.search(query, collector); return tfc.topDocs(); } else { return searcher.searchAfter(after, query, count); } } else { return searcher.searchAfter(after, query, count, sort); } }
From source file:com.stratio.cassandra.lucene.service.LuceneIndex.java
License:Apache License
/** * Finds the top {@code count} hits for {@code query}, applying {@code clusteringKeyFilter} if non-null, and sorting * the hits by the criteria in {@code sortFields}. * * @param searcher The {@link IndexSearcher} to be used. * @param query The {@link Query} to search for. * @param sort The {@link Sort} to be applied. * @param after The starting {@link SearchResult}. * @param count Return only the top {@code count} results. * @param fieldsToLoad The name of the fields to be loaded. * @return The found documents, sorted according to the supplied {@link Sort} instance. * @throws IOException If Lucene throws IO errors. *//*ww w. j a va 2 s . c om*/ public LinkedHashMap<Document, ScoreDoc> search(IndexSearcher searcher, Query query, Sort sort, ScoreDoc after, Integer count, Set<String> fieldsToLoad) throws IOException { Log.debug("%s search by query %s", logName, query); TopDocs topDocs; if (sort == null) { topDocs = searcher.searchAfter(after, query, count); } else { topDocs = searcher.searchAfter(after, query, count, sort); } ScoreDoc[] scoreDocs = topDocs.scoreDocs; // Collect the documents from query result LinkedHashMap<Document, ScoreDoc> searchResults = new LinkedHashMap<>(); for (ScoreDoc scoreDoc : scoreDocs) { Document document = searcher.doc(scoreDoc.doc, fieldsToLoad); searchResults.put(document, scoreDoc); } return searchResults; }
From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java
License:Open Source License
private Object queryIndex(Operation op, ServiceOption targetIndex, EnumSet<QueryOption> options, IndexSearcher s, Query tq, Sort sort, LuceneQueryPage page, int count, long expiration, String indexLink, ServiceDocumentQueryResult rsp) throws Throwable { ScoreDoc[] hits;//from www . ja va 2 s. c o m ScoreDoc after = null; boolean isPaginatedQuery = count != Integer.MAX_VALUE; boolean hasPage = page != null; boolean shouldProcessResults = true; int resultLimit = count; if (hasPage) { // For example, via GET of QueryTask.nextPageLink after = page.after; rsp.prevPageLink = page.link; } else if (isPaginatedQuery) { // QueryTask.resultLimit was set, but we don't have a page param yet, // which means this is the initial POST to create the QueryTask. // Since we are going to throw away TopDocs.hits in this case, // just set the limit to 1 and do not process the results. resultLimit = 1; shouldProcessResults = false; rsp.documentCount = 1L; } TopDocs results = null; rsp.queryTimeMicros = 0L; long queryStartTimeMicros = Utils.getNowMicrosUtc(); long start = queryStartTimeMicros; do { if (sort == null) { results = s.searchAfter(after, tq, resultLimit); } else { results = s.searchAfter(after, tq, resultLimit, sort, false, false); } long end = Utils.getNowMicrosUtc(); if (results == null) { return null; } hits = results.scoreDocs; long queryTime = end - start; rsp.documentCount = Long.valueOf(0); rsp.queryTimeMicros += queryTime; if (shouldProcessResults) { start = Utils.getNowMicrosUtc(); processQueryResults(targetIndex, options, s, rsp, hits, queryStartTimeMicros); end = Utils.getNowMicrosUtc(); if (hasOption(ServiceOption.INSTRUMENTATION)) { String statName = options.contains(QueryOption.INCLUDE_ALL_VERSIONS) ? STAT_NAME_QUERY_ALL_VERSIONS_DURATION_MICROS : STAT_NAME_QUERY_DURATION_MICROS; ServiceStat st = getHistogramStat(statName); setStat(st, queryTime); st = getHistogramStat(STAT_NAME_RESULT_PROCESSING_DURATION_MICROS); setStat(st, end - start); } } if (!isPaginatedQuery) { // single pass break; } if (hits.length == 0) { break; } ScoreDoc bottom = null; if (hasPage) { bottom = hits[hits.length - 1]; } if (!hasPage || rsp.documentLinks.size() >= resultLimit || hits.length < resultLimit) { // query had less results then per page limit or page is full of results expiration += queryTime; rsp.nextPageLink = createNextPage(op, s, options, tq, sort, bottom, count, expiration, indexLink, hasPage); break; } after = page.after = bottom; resultLimit = count - rsp.documentLinks.size(); } while (true && resultLimit > 0); return rsp; }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
private ServiceDocumentQueryResult queryIndex(Operation op, EnumSet<QueryOption> options, IndexSearcher s, Query tq, LuceneQueryPage page, int count, long expiration, String indexLink, ServiceDocumentQueryResult rsp, QuerySpecification qs) throws Throwable { ScoreDoc[] hits;//from w w w . j a v a 2 s. c om ScoreDoc after = null; boolean isPaginatedQuery = count != Integer.MAX_VALUE && !options.contains(QueryOption.TOP_RESULTS); boolean hasPage = page != null; boolean shouldProcessResults = true; int resultLimit = count; if (hasPage) { // For example, via GET of QueryTask.nextPageLink after = page.after; rsp.prevPageLink = page.previousPageLink; } else if (isPaginatedQuery) { // QueryTask.resultLimit was set, but we don't have a page param yet, // which means this is the initial POST to create the QueryTask. // Since we are going to throw away TopDocs.hits in this case, // just set the limit to 1 and do not process the results. resultLimit = 1; shouldProcessResults = false; rsp.documentCount = 1L; } Sort sort = this.versionSort; if (qs != null && qs.sortTerm != null) { // see if query is part of a task and already has a cached sort if (qs.context != null) { sort = (Sort) qs.context.nativeSort; } if (sort == null) { sort = LuceneQueryConverter.convertToLuceneSort(qs, false); } } TopDocs results = null; rsp.queryTimeMicros = 0L; long queryStartTimeMicros = Utils.getNowMicrosUtc(); long start = queryStartTimeMicros; do { if (sort == null) { results = s.searchAfter(after, tq, count); } else { results = s.searchAfter(after, tq, count, sort, false, false); } long end = Utils.getNowMicrosUtc(); if (results == null) { return rsp; } hits = results.scoreDocs; long queryTime = end - start; rsp.documentCount = 0L; rsp.queryTimeMicros += queryTime; ScoreDoc bottom = null; if (shouldProcessResults) { start = Utils.getNowMicrosUtc(); bottom = processQueryResults(qs, options, count, s, rsp, hits, queryStartTimeMicros); end = Utils.getNowMicrosUtc(); if (hasOption(ServiceOption.INSTRUMENTATION)) { String statName = options.contains(QueryOption.INCLUDE_ALL_VERSIONS) ? STAT_NAME_QUERY_ALL_VERSIONS_DURATION_MICROS : STAT_NAME_QUERY_DURATION_MICROS; ServiceStat st = getHistogramStat(statName); setStat(st, queryTime); st = getHistogramStat(STAT_NAME_RESULT_PROCESSING_DURATION_MICROS); setStat(st, end - start); } } if (!isPaginatedQuery && !options.contains(QueryOption.TOP_RESULTS)) { // single pass break; } if (hits.length == 0) { break; } if (isPaginatedQuery) { if (!hasPage) { bottom = null; } if (!hasPage || rsp.documentLinks.size() >= count || hits.length < resultLimit) { // query had less results then per page limit or page is full of results boolean createNextPageLink = true; if (hasPage) { createNextPageLink = checkNextPageHasEntry(bottom, options, s, tq, sort, count, qs, queryStartTimeMicros); } if (createNextPageLink) { expiration += queryTime; rsp.nextPageLink = createNextPage(op, s, qs, tq, sort, bottom, null, expiration, indexLink, hasPage); } break; } } after = bottom; resultLimit = count - rsp.documentLinks.size(); } while (resultLimit > 0); return rsp; }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
/** * Checks next page exists or not./* w ww. jav a2s.co m*/ * * If there is a valid entry in searchAfter result, this returns true. * If searchAfter result is empty or entries are all invalid(expired, etc), this returns false. * * For example, let's say there are 5 docs. doc=1,2,5 are valid and doc=3,4 are expired(invalid). * * When limit=2, the first page shows doc=1,2. In this logic, searchAfter will first fetch * doc=3,4 but they are invalid(filtered out in `processQueryResults`). * Next iteration will hit doc=5 and it is a valid entry. Therefore, it returns true. * * If doc=1,2 are valid and doc=3,4,5 are invalid, then searchAfter will hit doc=3,4 and * doc=5. However, all entries are invalid. This returns false indicating there is no next page. */ private boolean checkNextPageHasEntry(ScoreDoc after, EnumSet<QueryOption> options, IndexSearcher s, Query tq, Sort sort, int count, QuerySpecification qs, long queryStartTimeMicros) throws Throwable { boolean hasValidNextPageEntry = false; // Iterate searchAfter until it finds a *valid* entry. // If loop reaches to the end and no valid entries found, then current page is the last page. while (after != null) { // fetch next page TopDocs nextPageResults; if (sort == null) { nextPageResults = s.searchAfter(after, tq, count); } else { nextPageResults = s.searchAfter(after, tq, count, sort, false, false); } if (nextPageResults == null) { break; } ScoreDoc[] hits = nextPageResults.scoreDocs; if (hits.length == 0) { // reached to the end break; } ServiceDocumentQueryResult rspForNextPage = new ServiceDocumentQueryResult(); rspForNextPage.documents = new HashMap<>(); after = processQueryResults(qs, options, count, s, rspForNextPage, hits, queryStartTimeMicros); if (rspForNextPage.documentCount > 0) { hasValidNextPageEntry = true; break; } } return hasValidNextPageEntry; }
From source file:com.vnet.demo.service.lucene.LuceneService.java
License:Apache License
public SearchResult<DocumentData> query(String keyword, int start, int number) { SearchResult<DocumentData> searchResult = null; try {/*from w ww . ja va2 s. co m*/ List<DocumentData> documentDatas = new ArrayList<DocumentData>(); DirectoryReader ireader = DirectoryReader.open(index); IndexSearcher isearcher = new IndexSearcher(ireader); Query query = new QueryParser(version, "title", analyzer).parse(keyword + "*"); TopDocs hits = null; if (start > 0) { TopDocs result = isearcher.search(query, start); ScoreDoc scoreDoc = result.scoreDocs[result.scoreDocs.length - 1]; hits = isearcher.searchAfter(scoreDoc, query, number); } else { hits = isearcher.search(query, number); } for (int i = 0; i < hits.scoreDocs.length; i++) { DocumentData data = new DocumentData(); Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc); data.setId(Long.parseLong(hitDoc.get("id"))); data.setTitle(hitDoc.get("title")); data.setSummary(hitDoc.get("summary")); data.setCreateDate(Long.parseLong(hitDoc.get("createDate"))); documentDatas.add(data); } searchResult = new SearchResult<DocumentData>(new Long(hits.totalHits), documentDatas); } catch (ParseException | IOException e) { e.printStackTrace(); } return searchResult; }
From source file:io.jpress.module.article.searcher.LuceneSearcher.java
License:LGPL
@Override public Page<Article> search(String keyword, int pageNum, int pageSize) { IndexReader indexReader = null;// w ww. j a va 2 s . c om try { //Bug fix , QueryParser.escape(keyword),keyword=I/O,?buildQuery keyword = QueryParser.escape(keyword); indexReader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); Query query = buildQuery(keyword); ScoreDoc lastScoreDoc = getLastScoreDoc(pageNum, pageSize, query, indexSearcher); TopDocs topDocs = indexSearcher.searchAfter(lastScoreDoc, query, pageSize); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font class=\"" + HIGH_LIGHT_CLASS + "\">", "</font>"); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(100)); List<Article> articles = toArticleList(indexSearcher, topDocs, highlighter, keyword); int totalRow = getTotalRow(indexSearcher, query); return newPage(pageNum, pageSize, totalRow, articles); } catch (IOException e) { e.printStackTrace(); } finally { CommonsUtils.quietlyClose(indexReader); } return null; }
From source file:it.drwolf.ridire.util.async.FrequencyListGenerator.java
License:Apache License
private Map<String, Integer> getBareTable(List<String> corporaNames, String functionalMetadatumDescription, String semanticMetadatumDescription, String frequencyBy) throws IOException { Map<String, Integer> fl = new HashMap<String, Integer>(); Query q = new BooleanQuery(); if (corporaNames != null && corporaNames.size() > 0 && !(corporaNames.size() == 1 && corporaNames.get(0) == null)) { BooleanQuery corporaQuery = new BooleanQuery(); for (String cn : corporaNames) { if (cn != null) { corporaQuery.add(new TermQuery(new Term("corpus", cn)), Occur.SHOULD); }/*from ww w .jav a 2 s. c o m*/ } ((BooleanQuery) q).add(corporaQuery, Occur.MUST); } if (functionalMetadatumDescription != null) { TermQuery funcQuery = new TermQuery(new Term("functionalMetadatum", functionalMetadatumDescription)); ((BooleanQuery) q).add(funcQuery, Occur.MUST); } if (semanticMetadatumDescription != null) { TermQuery semaQuery = new TermQuery(new Term("semanticMetadatum", semanticMetadatumDescription)); ((BooleanQuery) q).add(semaQuery, Occur.MUST); } PrefixQuery prefixQuery = new PrefixQuery(new Term("performaFL", "")); ((BooleanQuery) q).add(prefixQuery, Occur.MUST); IndexSearcher indexSearcher = this.contextsIndexManager.getIndexSearcherR(); System.out.println("Starting FL calculation"); TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); indexSearcher.search(q, null, totalHitCountCollector); int totalHits = totalHitCountCollector.getTotalHits(); System.out.println("Frequency list calculation. Docs to be processed: " + totalHits); ScoreDoc after = null; int docsProcessed = 0; for (int j = 0; j < totalHits; j += FrequencyListGenerator.BATCH_SIZE) { TopDocs topDocs = null; if (after == null) { topDocs = indexSearcher.search(q, FrequencyListGenerator.BATCH_SIZE); } else { topDocs = indexSearcher.searchAfter(after, q, FrequencyListGenerator.BATCH_SIZE); } StrTokenizer strTokenizer = new StrTokenizer(); strTokenizer.setDelimiterString(ContextAnalyzer.SEPARATOR); ScoreDoc[] scoreDocs = topDocs.scoreDocs; if (scoreDocs != null) { for (ScoreDoc scoreDoc : scoreDocs) { ++docsProcessed; after = scoreDoc; TermFreqVector termFreqVector = indexSearcher.getIndexReader().getTermFreqVector(scoreDoc.doc, "performaFL"); if (termFreqVector == null) { continue; } String[] terms = termFreqVector.getTerms(); int[] frequencies = termFreqVector.getTermFrequencies(); for (int i = 0; i < terms.length; i++) { String term = terms[i]; String[] tokenArray = strTokenizer.reset(term).getTokenArray(); if (tokenArray.length != 3) { continue; } String pos = tokenArray[1]; String lemma = tokenArray[2]; if (lemma.equals("<unknown>")) { lemma = tokenArray[0]; } if (frequencyBy.equals("forma")) { term = tokenArray[0]; } else if (frequencyBy.equals("lemma")) { term = lemma; } else if (frequencyBy.equals("PoS-lemma")) { if (pos.startsWith("VER")) { pos = "VER"; } term = pos + " / " + lemma; } else if (frequencyBy.equals("PoS-forma")) { if (pos.startsWith("VER")) { pos = "VER"; } term = pos + " / " + tokenArray[0]; } else { term = tokenArray[1]; } Integer count = fl.get(term); if (count == null) { fl.put(term, frequencies[i]); } else { fl.put(term, frequencies[i] + count); } } if (docsProcessed % 1000 == 0) { System.out.println("Frequency list calculation. Docs processed: " + docsProcessed + " on total: " + totalHits + " (" + docsProcessed * 100.0f / totalHits + "%)"); } } } } return fl; }
From source file:org.apache.carbondata.datamap.lucene.LuceneFineGrainDataMap.java
License:Apache License
/** * Prune the datamap with filter expression. It returns the list of * blocklets where these filters can exist. *//*from ww w.j a va2 s . c om*/ @Override public List<FineGrainBlocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, List<PartitionSpec> partitions) throws IOException { // convert filter expr into lucene list query List<String> fields = new ArrayList<String>(); // only for test , query all data String strQuery = getQueryString(filterExp.getFilterExpression()); int maxDocs; try { maxDocs = getMaxDoc(filterExp.getFilterExpression()); } catch (NumberFormatException e) { maxDocs = Integer.MAX_VALUE; } if (null == strQuery) { return null; } String[] sFields = new String[fields.size()]; fields.toArray(sFields); // get analyzer if (analyzer == null) { analyzer = new StandardAnalyzer(); } // use MultiFieldQueryParser to parser query QueryParser queryParser = new MultiFieldQueryParser(sFields, analyzer); queryParser.setAllowLeadingWildcard(true); Query query; try { query = queryParser.parse(strQuery); } catch (ParseException e) { String errorMessage = String.format("failed to filter block with query %s, detail is %s", strQuery, e.getMessage()); LOGGER.error(errorMessage); return null; } // temporary data, delete duplicated data // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>> Map<String, Map<Integer, List<Short>>> mapBlocks = new HashMap<>(); long luceneSearchStartTime = System.currentTimeMillis(); for (Map.Entry<String, IndexSearcher> searcherEntry : indexSearcherMap.entrySet()) { IndexSearcher indexSearcher = searcherEntry.getValue(); // take the min of total documents available in the reader and limit if set by the user maxDocs = Math.min(maxDocs, indexSearcher.getIndexReader().maxDoc()); // execute index search TopDocs result = null; // the number of documents to be queried in one search. It will always be minimum of // search result and maxDocs int numberOfDocumentsToBeQueried = 0; // counter for maintaining the total number of documents finished querying int documentHitCounter = 0; try { numberOfDocumentsToBeQueried = Math.min(maxDocs, SEARCH_LIMIT); result = indexSearcher.search(query, numberOfDocumentsToBeQueried); documentHitCounter += numberOfDocumentsToBeQueried; } catch (IOException e) { String errorMessage = String.format("failed to search lucene data, detail is %s", e.getMessage()); LOGGER.error(errorMessage); throw new IOException(errorMessage); } ByteBuffer intBuffer = ByteBuffer.allocate(4); // last scoreDoc in a result to be used in searchAfter API ScoreDoc lastScoreDoc = null; while (true) { for (ScoreDoc scoreDoc : result.scoreDocs) { // get a document Document doc = indexSearcher.doc(scoreDoc.doc); // get all fields List<IndexableField> fieldsInDoc = doc.getFields(); if (writeCacheSize > 0) { // It fills rowids to the map, its value is combined with multiple rows. fillMapForCombineRows(intBuffer, mapBlocks, fieldsInDoc, searcherEntry.getKey()); } else { // Fill rowids to the map fillMap(intBuffer, mapBlocks, fieldsInDoc, searcherEntry.getKey()); } lastScoreDoc = scoreDoc; } // result will have the total number of hits therefore we always need to query on the // left over documents int remainingHits = result.totalHits - documentHitCounter; // break the loop if count reaches maxDocs to be searched or remaining hits become <=0 if (remainingHits <= 0 || documentHitCounter >= maxDocs) { break; } numberOfDocumentsToBeQueried = Math.min(remainingHits, SEARCH_LIMIT); result = indexSearcher.searchAfter(lastScoreDoc, query, numberOfDocumentsToBeQueried); documentHitCounter += numberOfDocumentsToBeQueried; } } LOGGER.info( "Time taken for lucene search: " + (System.currentTimeMillis() - luceneSearchStartTime) + " ms"); // result blocklets List<FineGrainBlocklet> blocklets = new ArrayList<>(); // transform all blocks into result type blocklets // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>> for (Map.Entry<String, Map<Integer, List<Short>>> mapBlocklet : mapBlocks.entrySet()) { String blockletId = mapBlocklet.getKey(); Map<Integer, List<Short>> mapPageIds = mapBlocklet.getValue(); List<FineGrainBlocklet.Page> pages = new ArrayList<FineGrainBlocklet.Page>(); // for pages in this blocklet Map<PageId, Set<RowId>>> for (Map.Entry<Integer, List<Short>> mapPageId : mapPageIds.entrySet()) { // construct array rowid int[] rowIds = new int[mapPageId.getValue().size()]; int i = 0; // for rowids in this page Set<RowId> for (Short rowid : mapPageId.getValue()) { rowIds[i++] = rowid; } // construct one page FineGrainBlocklet.Page page = new FineGrainBlocklet.Page(); page.setPageId(mapPageId.getKey()); page.setRowId(rowIds); // add this page into list pages pages.add(page); } // add a FineGrainBlocklet blocklets.add(new FineGrainBlocklet(filePath, blockletId, pages)); } return blocklets; }