List of usage examples for org.apache.lucene.search.grouping GroupingSearch GroupingSearch
public GroupingSearch(Query groupEndDocs)
From source file:com.difference.historybook.index.lucene.LuceneIndex.java
License:Apache License
@Override public SearchResultWrapper search(String collection, String query, int offset, int size, boolean includeDebug) throws IndexException { try {//from w w w . ja v a 2 s. co m //TODO: make age be a component in the ranking? BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); queryBuilder.add(parser.parse(query), Occur.MUST); queryBuilder.add(new TermQuery(new Term(IndexDocumentAdapter.FIELD_COLLECTION, collection)), Occur.FILTER); Query baseQuery = queryBuilder.build(); FunctionQuery boostQuery = new FunctionQuery( new ReciprocalFloatFunction(new DurationValueSource(new Date().getTime() / 1000, new LongFieldSource(IndexDocumentAdapter.FIELD_TIMESTAMP)), RECIP, 1F, 1F)); Query q = new CustomScoreQuery(baseQuery, boostQuery); QueryScorer queryScorer = new QueryScorer(q, IndexDocumentAdapter.FIELD_SEARCH); Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer); Highlighter highlighter = new Highlighter(queryScorer); highlighter.setTextFragmenter(fragmenter); GroupingSearch gsearch = new GroupingSearch(IndexDocumentAdapter.FIELD_URL_GROUP).setGroupDocsLimit(1) .setAllGroups(true).setIncludeMaxScore(true); TopGroups<?> groups = gsearch.search(searcher, q, offset, size); ArrayList<SearchResult> results = new ArrayList<>(size); for (int i = offset; i < offset + size && i < groups.groups.length; i++) { ScoreDoc scoreDoc = groups.groups[i].scoreDocs[0]; Document luceneDoc = searcher.doc(scoreDoc.doc); IndexDocumentAdapter doc = new IndexDocumentAdapter(luceneDoc); TokenStream tokenStream = TokenSources.getTokenStream(IndexDocumentAdapter.FIELD_SEARCH, reader.getTermVectors(scoreDoc.doc), luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH), analyzer, highlighter.getMaxDocCharsToAnalyze() - 1); String[] snippets = highlighter.getBestFragments(tokenStream, luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH), 3); String snippet = Arrays.asList(snippets).stream().collect(Collectors.joining("\n")); snippet = Jsoup.clean(snippet, Whitelist.simpleText()); String debugInfo = null; if (includeDebug) { Explanation explanation = searcher.explain(q, scoreDoc.doc); debugInfo = explanation.toString(); } results.add(new SearchResult(doc.getKey(), doc.getCollection(), doc.getTitle(), doc.getUrl(), doc.getDomain(), doc.getTimestampText(), snippet, debugInfo, scoreDoc.score)); } SearchResultWrapper wrapper = new SearchResultWrapper().setQuery(query).setOffset(offset) .setMaxResultsRequested(size) .setResultCount(groups.totalGroupCount != null ? groups.totalGroupCount : 0) .setResults(results); if (includeDebug) { wrapper.setDebugInfo(q.toString()); } return wrapper; } catch (IOException | ParseException | InvalidTokenOffsetsException e) { LOG.error(e.getLocalizedMessage()); throw new IndexException(e); } }
From source file:com.dreamerpartner.codereview.lucene.SearchHelper.java
License:Apache License
/** * //from ww w . j a v a2s.c o m * @param groupField * @param searchField * @param searchStr * @param pageNo * @param pageSize * @param orderField ? * @param orderFieldType ? * @param desc ? ?? * @return */ @SuppressWarnings("deprecation") public static Map<String, List<Document>> group(String module, String groupField, String searchField, String searchStr, int pageNo, int pageSize, String orderField, Type orderFieldType, boolean desc) { Map<String, List<Document>> result = new LinkedHashMap<String, List<Document>>(10); IndexReader reader = null; try { reader = DirectoryReader.open(FSDirectory.open(new File(LuceneUtil.getIndexPath(module)))); IndexSearcher indexSearcher = new IndexSearcher(reader); GroupingSearch groupingSearch = new GroupingSearch(groupField); Sort sort = new Sort(new SortField(orderField, orderFieldType, desc)); groupingSearch.setGroupSort(sort); groupingSearch.setSortWithinGroup(sort); groupingSearch.setFillSortFields(true); groupingSearch.setCachingInMB(4.0, true); groupingSearch.setAllGroups(true); //groupingSearch.setAllGroupHeads(true); groupingSearch.setGroupDocsLimit(pageSize); QueryParser parser = new QueryParser(Version.LUCENE_4_10_0, searchField, new StandardAnalyzer(Version.LUCENE_4_10_0)); Query query = parser.parse(searchStr); TopGroups<BytesRef> groupResult = groupingSearch.search(indexSearcher, query, (pageNo - 1) * pageSize, pageSize); System.out.println("?" + groupResult.totalHitCount + ", ?" + groupResult.groups.length); List<Document> groupData = null; for (GroupDocs<BytesRef> groupDocs : groupResult.groups) { groupData = new ArrayList<Document>(pageSize); String groupName = groupDocs.groupValue.utf8ToString(); for (ScoreDoc scoreDoc : groupDocs.scoreDocs) { groupData.add(indexSearcher.doc(scoreDoc.doc)); } result.put(groupName, groupData); groupData = null; } } catch (Exception e) { e.printStackTrace(); } finally { try { if (reader != null) reader.close(); } catch (IOException e) { e.printStackTrace(); } } return result; }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
/** * Queries a feature index of a list of files, returning specified page of specified size. * If no paging parameters are passed, returns all results * * @param files a {@link List} of {@link FeatureFile}, which indexes to search * @param query a query to search in index * @param vcfInfoFields list of info fields to retrieve * @param page number of a page to display * @param pageSize number of entries per page * @param orderBy object, that specifies sorting * @return a {List} of {@code FeatureIndexEntry} objects that satisfy index query * @throws IOException if something is wrong in the filesystem *///from ww w. ja v a2 s . c o m public <T extends FeatureIndexEntry> IndexSearchResult<T> searchFileIndexesPaging( List<? extends FeatureFile> files, Query query, List<String> vcfInfoFields, Integer page, Integer pageSize, List<VcfFilterForm.OrderBy> orderBy) throws IOException { if (CollectionUtils.isEmpty(files)) { return new IndexSearchResult<>(Collections.emptyList(), false, 0); } List<FeatureIndexEntry> entries; int totalHits = 0; SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(files); try (MultiReader reader = openMultiReader(indexes)) { if (reader.numDocs() == 0) { return new IndexSearchResult<>(Collections.emptyList(), false, 0); } IndexSearcher searcher = new IndexSearcher(reader); GroupingSearch groupingSearch = new GroupingSearch(FeatureIndexFields.UID.fieldName); setSorting(orderBy, groupingSearch, files); TopGroups<String> topGroups = groupingSearch.search(searcher, query, page == null ? 0 : (page - 1) * pageSize, page == null ? reader.numDocs() : pageSize); final ScoreDoc[] hits = new ScoreDoc[topGroups.groups.length]; for (int i = 0; i < topGroups.groups.length; i++) { hits[i] = topGroups.groups[i].scoreDocs[0]; } entries = new ArrayList<>(hits.length); for (ScoreDoc hit : hits) { entries.add(createIndexEntry(hit, new HashMap<>(), searcher, vcfInfoFields)); } } finally { for (SimpleFSDirectory index : indexes) { IOUtils.closeQuietly(index); } } return new IndexSearchResult<>((List<T>) entries, false, totalHits); }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
private void handleGroupByQueryTaskPatch(Operation op, QueryTask task) throws IOException { QuerySpecification qs = task.querySpec; IndexSearcher s = (IndexSearcher) qs.context.nativeSearcher; LuceneQueryPage page = (LuceneQueryPage) qs.context.nativePage; Query tq = (Query) qs.context.nativeQuery; Sort sort = (Sort) qs.context.nativeSort; if (sort == null && qs.sortTerm != null) { sort = LuceneQueryConverter.convertToLuceneSort(qs, false); }/* ww w . ja va 2s . co m*/ Sort groupSort = null; if (qs.groupSortTerm != null) { groupSort = LuceneQueryConverter.convertToLuceneSort(qs, true); } GroupingSearch groupingSearch = new GroupingSearch(qs.groupByTerm.propertyName); groupingSearch.setGroupSort(groupSort); groupingSearch.setSortWithinGroup(sort); super.adjustStat(STAT_NAME_GROUP_QUERY_COUNT, 1); int groupOffset = page != null ? page.groupOffset : 0; int groupLimit = qs.groupResultLimit != null ? qs.groupResultLimit : 10000; if (s == null && qs.groupResultLimit != null) { s = createPaginatedQuerySearcher(task.documentExpirationTimeMicros, this.writer); } if (s == null) { // If DO_NOT_REFRESH is set use the existing searcher. s = this.searcher; if (!qs.options.contains(QueryOption.DO_NOT_REFRESH) || s == null) { s = updateSearcher(null, Integer.MAX_VALUE, this.writer); } } ServiceDocumentQueryResult rsp = new ServiceDocumentQueryResult(); rsp.nextPageLinksPerGroup = new TreeMap<>(); long startTimeMicros = Utils.getNowMicrosUtc(); // perform the actual search TopGroups<?> groups = groupingSearch.search(s, tq, groupOffset, groupLimit); long endTimeMicros = Utils.getNowMicrosUtc(); String statName = STAT_NAME_GROUP_QUERY_DURATION_MICROS; ServiceStat st = getHistogramStat(statName); setStat(st, endTimeMicros - startTimeMicros); // generate page links for each grouped result for (GroupDocs<?> groupDocs : groups.groups) { if (groupDocs.totalHits == 0) { continue; } QueryTask.Query perGroupQuery = Utils.clone(qs.query); String groupValue = ((BytesRef) groupDocs.groupValue).utf8ToString(); // we need to modify the query to include a top level clause that restricts scope // to documents with the groupBy field and value QueryTask.Query clause = new QueryTask.Query().setTermPropertyName(qs.groupByTerm.propertyName) .setTermMatchValue(groupValue).setTermMatchType(MatchType.TERM); clause.occurance = QueryTask.Query.Occurance.MUST_OCCUR; if (perGroupQuery.booleanClauses == null) { QueryTask.Query topLevelClause = perGroupQuery; perGroupQuery.addBooleanClause(topLevelClause); } perGroupQuery.addBooleanClause(clause); Query lucenePerGroupQuery = LuceneQueryConverter.convertToLuceneQuery(perGroupQuery); // for each group generate a query page link String pageLink = createNextPage(op, s, qs, lucenePerGroupQuery, sort, null, null, task.documentExpirationTimeMicros, task.indexLink, false); rsp.nextPageLinksPerGroup.put(groupValue, pageLink); } if (qs.groupResultLimit != null && groups.groups.length >= groupLimit) { // check if we need to generate a next page for the next set of group results groups = groupingSearch.search(s, tq, groupLimit + groupOffset, groupLimit); if (groups.totalGroupedHitCount > 0) { rsp.nextPageLink = createNextPage(op, s, qs, tq, sort, null, groupLimit + groupOffset, task.documentExpirationTimeMicros, task.indexLink, page != null); } } op.setBodyNoCloning(rsp).complete(); }
From source file:com.xiaomi.linden.core.search.LindenCoreImpl.java
License:Apache License
public LindenResult search(LindenSearchRequest request) throws IOException { SearcherTaxonomyManager.SearcherAndTaxonomy searcherAndTaxonomy = lindenNRTSearcherManager.acquire(); try {//from w w w . ja v a 2 s .co m IndexSearcher indexSearcher = searcherAndTaxonomy.searcher; Filter filter = FilterConstructor.constructFilter(request.getFilter(), config); Sort sort = SortConstructor.constructSort(request, indexSearcher, config); indexSearcher.setSimilarity(config.getSearchSimilarityInstance()); Query query = QueryConstructor.constructQuery(request.getQuery(), config); if (filter != null) { query = new FilteredQuery(query, filter); } int from = request.getOffset(); int size = request.getLength(); LindenResultParser resultParser = new LindenResultParser(config, request, indexSearcher, snippetGenerator, query, filter, sort); // very common search, no group, no facet, no early termination, no search time limit if (!request.isSetGroupParam() && !request.isSetFacet() && !request.isSetEarlyParam() && config.getSearchTimeLimit() <= 0) { TopDocs docs; if (sort != null) { docs = indexSearcher.search(query, from + size, sort); } else { docs = indexSearcher.search(query, from + size); } return resultParser.parse(docs, null, null, null); } // group param will suppress facet, group, early termination and search time limit parameters if (request.isSetGroupParam()) { String groupField = request.getGroupParam().getGroupField(); GroupingSearch groupingSearch = new GroupingSearch(groupField); groupingSearch.setGroupDocsLimit(request.getGroupParam().getGroupInnerLimit()); if (sort != null) { groupingSearch.setGroupSort(sort); groupingSearch.setSortWithinGroup(sort); groupingSearch.setFillSortFields(true); } groupingSearch.setCachingInMB(8.0, true); groupingSearch.setAllGroups(true); TopGroups<TopDocs> topGroupedDocs = groupingSearch.search(indexSearcher, query, 0, from + size); return resultParser.parse(null, topGroupedDocs, null, null); } TopDocsCollector topDocsCollector; if (sort != null) { topDocsCollector = TopFieldCollector.create(sort, from + size, null, true, false, false, false); } else { topDocsCollector = TopScoreDocCollector.create(from + size, false); } LindenDocsCollector lindenDocsCollector; if (request.isSetEarlyParam()) { MergePolicy mergePolicy = indexWriter.getConfig().getMergePolicy(); Sort mergePolicySort = null; if (mergePolicy instanceof SortingMergePolicyDecorator) { mergePolicySort = ((SortingMergePolicyDecorator) mergePolicy).getSort(); } EarlyTerminationCollector earlyTerminationCollector = new EarlyTerminationCollector( topDocsCollector, mergePolicySort, request.getEarlyParam().getMaxNum()); lindenDocsCollector = new LindenDocsCollector(earlyTerminationCollector); } else { lindenDocsCollector = new LindenDocsCollector(topDocsCollector); } Collector collector = lindenDocsCollector; if (config.getSearchTimeLimit() > 0) { collector = new TimeLimitingCollector(lindenDocsCollector, TimeLimitingCollector.getGlobalCounter(), config.getSearchTimeLimit()); } // no facet param if (!request.isSetFacet()) { indexSearcher.search(query, collector); return resultParser.parse(lindenDocsCollector.topDocs(), null, null, null); } // facet search LindenFacet facetRequest = request.getFacet(); FacetsCollector facetsCollector = new FacetsCollector(); lindenDocsCollector.wrap(facetsCollector); Facets facets = null; if (facetRequest.isSetDrillDownDimAndPaths()) { // drillDown or drillSideways DrillDownQuery drillDownQuery = new DrillDownQuery(facetsConfig, query); List<LindenFacetDimAndPath> drillDownDimAndPaths = facetRequest.getDrillDownDimAndPaths(); for (int i = 0; i < drillDownDimAndPaths.size(); ++i) { String fieldName = drillDownDimAndPaths.get(i).dim; if (drillDownDimAndPaths.get(i).path != null) { drillDownQuery.add(fieldName, drillDownDimAndPaths.get(i).path.split("/")); } else { drillDownQuery.add(fieldName); } } // drillSideways if (facetRequest.getFacetDrillingType() == FacetDrillingType.DRILLSIDEWAYS) { DrillSideways dillSideways = new DrillSideways(indexSearcher, facetsConfig, searcherAndTaxonomy.taxonomyReader); DrillSideways.DrillSidewaysResult drillSidewaysResult = dillSideways.search(drillDownQuery, collector); facets = drillSidewaysResult.facets; } else { // drillDown indexSearcher.search(drillDownQuery, collector); facets = new FastTaxonomyFacetCounts(searcherAndTaxonomy.taxonomyReader, facetsConfig, facetsCollector); } } else { indexSearcher.search(query, collector); // Simple facet browsing if (facetRequest.isSetFacetParams()) { facets = new FastTaxonomyFacetCounts(searcherAndTaxonomy.taxonomyReader, facetsConfig, facetsCollector); } } return resultParser.parse(lindenDocsCollector.topDocs(), null, facets, facetsCollector); } catch (Exception e) { throw new IOException(Throwables.getStackTraceAsString(e)); } finally { lindenNRTSearcherManager.release(searcherAndTaxonomy); } }