Example usage for org.apache.lucene.search.grouping GroupingSearch search

List of usage examples for org.apache.lucene.search.grouping GroupingSearch search

Introduction

In this page you can find the example usage for org.apache.lucene.search.grouping GroupingSearch search.

Prototype

@SuppressWarnings("unchecked")
public <T> TopGroups<T> search(IndexSearcher searcher, Query query, int groupOffset, int groupLimit)
        throws IOException 

Source Link

Document

Executes a grouped search.

Usage

From source file:com.difference.historybook.index.lucene.LuceneIndex.java

License:Apache License

@Override
public SearchResultWrapper search(String collection, String query, int offset, int size, boolean includeDebug)
        throws IndexException {
    try {// www  .j  a va  2  s  .  c om
        //TODO: make age be a component in the ranking?
        BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
        queryBuilder.add(parser.parse(query), Occur.MUST);
        queryBuilder.add(new TermQuery(new Term(IndexDocumentAdapter.FIELD_COLLECTION, collection)),
                Occur.FILTER);
        Query baseQuery = queryBuilder.build();

        FunctionQuery boostQuery = new FunctionQuery(
                new ReciprocalFloatFunction(new DurationValueSource(new Date().getTime() / 1000,
                        new LongFieldSource(IndexDocumentAdapter.FIELD_TIMESTAMP)), RECIP, 1F, 1F));

        Query q = new CustomScoreQuery(baseQuery, boostQuery);

        QueryScorer queryScorer = new QueryScorer(q, IndexDocumentAdapter.FIELD_SEARCH);
        Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer);
        Highlighter highlighter = new Highlighter(queryScorer);
        highlighter.setTextFragmenter(fragmenter);

        GroupingSearch gsearch = new GroupingSearch(IndexDocumentAdapter.FIELD_URL_GROUP).setGroupDocsLimit(1)
                .setAllGroups(true).setIncludeMaxScore(true);
        TopGroups<?> groups = gsearch.search(searcher, q, offset, size);

        ArrayList<SearchResult> results = new ArrayList<>(size);
        for (int i = offset; i < offset + size && i < groups.groups.length; i++) {
            ScoreDoc scoreDoc = groups.groups[i].scoreDocs[0];
            Document luceneDoc = searcher.doc(scoreDoc.doc);
            IndexDocumentAdapter doc = new IndexDocumentAdapter(luceneDoc);

            TokenStream tokenStream = TokenSources.getTokenStream(IndexDocumentAdapter.FIELD_SEARCH,
                    reader.getTermVectors(scoreDoc.doc), luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH),
                    analyzer, highlighter.getMaxDocCharsToAnalyze() - 1);

            String[] snippets = highlighter.getBestFragments(tokenStream,
                    luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH), 3);
            String snippet = Arrays.asList(snippets).stream().collect(Collectors.joining("\n"));
            snippet = Jsoup.clean(snippet, Whitelist.simpleText());

            String debugInfo = null;
            if (includeDebug) {
                Explanation explanation = searcher.explain(q, scoreDoc.doc);
                debugInfo = explanation.toString();
            }

            results.add(new SearchResult(doc.getKey(), doc.getCollection(), doc.getTitle(), doc.getUrl(),
                    doc.getDomain(), doc.getTimestampText(), snippet, debugInfo, scoreDoc.score));
        }

        SearchResultWrapper wrapper = new SearchResultWrapper().setQuery(query).setOffset(offset)
                .setMaxResultsRequested(size)
                .setResultCount(groups.totalGroupCount != null ? groups.totalGroupCount : 0)
                .setResults(results);

        if (includeDebug) {
            wrapper.setDebugInfo(q.toString());
        }

        return wrapper;

    } catch (IOException | ParseException | InvalidTokenOffsetsException e) {
        LOG.error(e.getLocalizedMessage());
        throw new IndexException(e);
    }
}

From source file:com.dreamerpartner.codereview.lucene.SearchHelper.java

License:Apache License

/**
 *  /*from   w w w.j  a  v a  2s. c om*/
 * @param groupField 
 * @param searchField 
 * @param searchStr 
 * @param pageNo
 * @param pageSize
 * @param orderField ?
 * @param orderFieldType ?
 * @param desc ? ??
 * @return
 */
@SuppressWarnings("deprecation")
public static Map<String, List<Document>> group(String module, String groupField, String searchField,
        String searchStr, int pageNo, int pageSize, String orderField, Type orderFieldType, boolean desc) {
    Map<String, List<Document>> result = new LinkedHashMap<String, List<Document>>(10);
    IndexReader reader = null;
    try {
        reader = DirectoryReader.open(FSDirectory.open(new File(LuceneUtil.getIndexPath(module))));
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        GroupingSearch groupingSearch = new GroupingSearch(groupField);
        Sort sort = new Sort(new SortField(orderField, orderFieldType, desc));
        groupingSearch.setGroupSort(sort);
        groupingSearch.setSortWithinGroup(sort);
        groupingSearch.setFillSortFields(true);
        groupingSearch.setCachingInMB(4.0, true);
        groupingSearch.setAllGroups(true);
        //groupingSearch.setAllGroupHeads(true);
        groupingSearch.setGroupDocsLimit(pageSize);

        QueryParser parser = new QueryParser(Version.LUCENE_4_10_0, searchField,
                new StandardAnalyzer(Version.LUCENE_4_10_0));
        Query query = parser.parse(searchStr);

        TopGroups<BytesRef> groupResult = groupingSearch.search(indexSearcher, query, (pageNo - 1) * pageSize,
                pageSize);
        System.out.println("?" + groupResult.totalHitCount + ", ?"
                + groupResult.groups.length);

        List<Document> groupData = null;
        for (GroupDocs<BytesRef> groupDocs : groupResult.groups) {
            groupData = new ArrayList<Document>(pageSize);
            String groupName = groupDocs.groupValue.utf8ToString();
            for (ScoreDoc scoreDoc : groupDocs.scoreDocs) {
                groupData.add(indexSearcher.doc(scoreDoc.doc));
            }
            result.put(groupName, groupData);
            groupData = null;
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        try {
            if (reader != null)
                reader.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    return result;
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

/**
 * Queries a feature index of a list of files, returning specified page of specified size.
 * If no paging parameters are passed, returns all results
 *
 * @param files a {@link List} of {@link FeatureFile}, which indexes to search
 * @param query a query to search in index
 * @param vcfInfoFields list of info fields to retrieve
 * @param page number of a page to display
 * @param pageSize number of entries per page
 * @param orderBy object, that specifies sorting
 * @return a {List} of {@code FeatureIndexEntry} objects that satisfy index query
 * @throws IOException if something is wrong in the filesystem
 *//* w  ww  .ja v a 2 s  .c o m*/
public <T extends FeatureIndexEntry> IndexSearchResult<T> searchFileIndexesPaging(
        List<? extends FeatureFile> files, Query query, List<String> vcfInfoFields, Integer page,
        Integer pageSize, List<VcfFilterForm.OrderBy> orderBy) throws IOException {

    if (CollectionUtils.isEmpty(files)) {
        return new IndexSearchResult<>(Collections.emptyList(), false, 0);
    }

    List<FeatureIndexEntry> entries;

    int totalHits = 0;
    SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(files);

    try (MultiReader reader = openMultiReader(indexes)) {
        if (reader.numDocs() == 0) {
            return new IndexSearchResult<>(Collections.emptyList(), false, 0);
        }

        IndexSearcher searcher = new IndexSearcher(reader);
        GroupingSearch groupingSearch = new GroupingSearch(FeatureIndexFields.UID.fieldName);
        setSorting(orderBy, groupingSearch, files);

        TopGroups<String> topGroups = groupingSearch.search(searcher, query,
                page == null ? 0 : (page - 1) * pageSize, page == null ? reader.numDocs() : pageSize);

        final ScoreDoc[] hits = new ScoreDoc[topGroups.groups.length];
        for (int i = 0; i < topGroups.groups.length; i++) {
            hits[i] = topGroups.groups[i].scoreDocs[0];
        }

        entries = new ArrayList<>(hits.length);
        for (ScoreDoc hit : hits) {
            entries.add(createIndexEntry(hit, new HashMap<>(), searcher, vcfInfoFields));
        }
    } finally {
        for (SimpleFSDirectory index : indexes) {
            IOUtils.closeQuietly(index);
        }
    }

    return new IndexSearchResult<>((List<T>) entries, false, totalHits);
}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java

License:Open Source License

private void handleGroupByQueryTaskPatch(Operation op, QueryTask task) throws IOException {
    QuerySpecification qs = task.querySpec;
    IndexSearcher s = (IndexSearcher) qs.context.nativeSearcher;
    LuceneQueryPage page = (LuceneQueryPage) qs.context.nativePage;
    Query tq = (Query) qs.context.nativeQuery;
    Sort sort = (Sort) qs.context.nativeSort;
    if (sort == null && qs.sortTerm != null) {
        sort = LuceneQueryConverter.convertToLuceneSort(qs, false);
    }// w w w .java 2  s  . c om

    Sort groupSort = null;
    if (qs.groupSortTerm != null) {
        groupSort = LuceneQueryConverter.convertToLuceneSort(qs, true);
    }

    GroupingSearch groupingSearch = new GroupingSearch(qs.groupByTerm.propertyName);
    groupingSearch.setGroupSort(groupSort);
    groupingSearch.setSortWithinGroup(sort);

    super.adjustStat(STAT_NAME_GROUP_QUERY_COUNT, 1);

    int groupOffset = page != null ? page.groupOffset : 0;
    int groupLimit = qs.groupResultLimit != null ? qs.groupResultLimit : 10000;

    if (s == null && qs.groupResultLimit != null) {
        s = createPaginatedQuerySearcher(task.documentExpirationTimeMicros, this.writer);
    }

    if (s == null) {
        // If DO_NOT_REFRESH is set use the existing searcher.
        s = this.searcher;
        if (!qs.options.contains(QueryOption.DO_NOT_REFRESH) || s == null) {
            s = updateSearcher(null, Integer.MAX_VALUE, this.writer);
        }
    }

    ServiceDocumentQueryResult rsp = new ServiceDocumentQueryResult();
    rsp.nextPageLinksPerGroup = new TreeMap<>();
    long startTimeMicros = Utils.getNowMicrosUtc();
    // perform the actual search
    TopGroups<?> groups = groupingSearch.search(s, tq, groupOffset, groupLimit);
    long endTimeMicros = Utils.getNowMicrosUtc();
    String statName = STAT_NAME_GROUP_QUERY_DURATION_MICROS;
    ServiceStat st = getHistogramStat(statName);
    setStat(st, endTimeMicros - startTimeMicros);
    // generate page links for each grouped result
    for (GroupDocs<?> groupDocs : groups.groups) {
        if (groupDocs.totalHits == 0) {
            continue;
        }
        QueryTask.Query perGroupQuery = Utils.clone(qs.query);
        String groupValue = ((BytesRef) groupDocs.groupValue).utf8ToString();

        // we need to modify the query to include a top level clause that restricts scope
        // to documents with the groupBy field and value
        QueryTask.Query clause = new QueryTask.Query().setTermPropertyName(qs.groupByTerm.propertyName)
                .setTermMatchValue(groupValue).setTermMatchType(MatchType.TERM);
        clause.occurance = QueryTask.Query.Occurance.MUST_OCCUR;
        if (perGroupQuery.booleanClauses == null) {
            QueryTask.Query topLevelClause = perGroupQuery;
            perGroupQuery.addBooleanClause(topLevelClause);
        }
        perGroupQuery.addBooleanClause(clause);
        Query lucenePerGroupQuery = LuceneQueryConverter.convertToLuceneQuery(perGroupQuery);

        // for each group generate a query page link
        String pageLink = createNextPage(op, s, qs, lucenePerGroupQuery, sort, null, null,
                task.documentExpirationTimeMicros, task.indexLink, false);

        rsp.nextPageLinksPerGroup.put(groupValue, pageLink);
    }

    if (qs.groupResultLimit != null && groups.groups.length >= groupLimit) {
        // check if we need to generate a next page for the next set of group results
        groups = groupingSearch.search(s, tq, groupLimit + groupOffset, groupLimit);
        if (groups.totalGroupedHitCount > 0) {
            rsp.nextPageLink = createNextPage(op, s, qs, tq, sort, null, groupLimit + groupOffset,
                    task.documentExpirationTimeMicros, task.indexLink, page != null);
        }
    }

    op.setBodyNoCloning(rsp).complete();
}

From source file:com.xiaomi.linden.core.search.LindenCoreImpl.java

License:Apache License

public LindenResult search(LindenSearchRequest request) throws IOException {
    SearcherTaxonomyManager.SearcherAndTaxonomy searcherAndTaxonomy = lindenNRTSearcherManager.acquire();
    try {/*from   w  ww .  ja  v a2  s.c o m*/
        IndexSearcher indexSearcher = searcherAndTaxonomy.searcher;
        Filter filter = FilterConstructor.constructFilter(request.getFilter(), config);
        Sort sort = SortConstructor.constructSort(request, indexSearcher, config);
        indexSearcher.setSimilarity(config.getSearchSimilarityInstance());

        Query query = QueryConstructor.constructQuery(request.getQuery(), config);
        if (filter != null) {
            query = new FilteredQuery(query, filter);
        }

        int from = request.getOffset();
        int size = request.getLength();
        LindenResultParser resultParser = new LindenResultParser(config, request, indexSearcher,
                snippetGenerator, query, filter, sort);
        // very common search, no group, no facet, no early termination, no search time limit
        if (!request.isSetGroupParam() && !request.isSetFacet() && !request.isSetEarlyParam()
                && config.getSearchTimeLimit() <= 0) {
            TopDocs docs;
            if (sort != null) {
                docs = indexSearcher.search(query, from + size, sort);
            } else {
                docs = indexSearcher.search(query, from + size);
            }
            return resultParser.parse(docs, null, null, null);
        }

        // group param will suppress facet, group, early termination and search time limit parameters
        if (request.isSetGroupParam()) {
            String groupField = request.getGroupParam().getGroupField();
            GroupingSearch groupingSearch = new GroupingSearch(groupField);
            groupingSearch.setGroupDocsLimit(request.getGroupParam().getGroupInnerLimit());
            if (sort != null) {
                groupingSearch.setGroupSort(sort);
                groupingSearch.setSortWithinGroup(sort);
                groupingSearch.setFillSortFields(true);
            }
            groupingSearch.setCachingInMB(8.0, true);
            groupingSearch.setAllGroups(true);
            TopGroups<TopDocs> topGroupedDocs = groupingSearch.search(indexSearcher, query, 0, from + size);
            return resultParser.parse(null, topGroupedDocs, null, null);
        }

        TopDocsCollector topDocsCollector;
        if (sort != null) {
            topDocsCollector = TopFieldCollector.create(sort, from + size, null, true, false, false, false);
        } else {
            topDocsCollector = TopScoreDocCollector.create(from + size, false);
        }

        LindenDocsCollector lindenDocsCollector;
        if (request.isSetEarlyParam()) {
            MergePolicy mergePolicy = indexWriter.getConfig().getMergePolicy();
            Sort mergePolicySort = null;
            if (mergePolicy instanceof SortingMergePolicyDecorator) {
                mergePolicySort = ((SortingMergePolicyDecorator) mergePolicy).getSort();
            }
            EarlyTerminationCollector earlyTerminationCollector = new EarlyTerminationCollector(
                    topDocsCollector, mergePolicySort, request.getEarlyParam().getMaxNum());
            lindenDocsCollector = new LindenDocsCollector(earlyTerminationCollector);
        } else {
            lindenDocsCollector = new LindenDocsCollector(topDocsCollector);
        }

        Collector collector = lindenDocsCollector;
        if (config.getSearchTimeLimit() > 0) {
            collector = new TimeLimitingCollector(lindenDocsCollector, TimeLimitingCollector.getGlobalCounter(),
                    config.getSearchTimeLimit());
        }

        // no facet param
        if (!request.isSetFacet()) {
            indexSearcher.search(query, collector);
            return resultParser.parse(lindenDocsCollector.topDocs(), null, null, null);
        }

        // facet search
        LindenFacet facetRequest = request.getFacet();
        FacetsCollector facetsCollector = new FacetsCollector();
        lindenDocsCollector.wrap(facetsCollector);

        Facets facets = null;
        if (facetRequest.isSetDrillDownDimAndPaths()) {
            // drillDown or drillSideways
            DrillDownQuery drillDownQuery = new DrillDownQuery(facetsConfig, query);
            List<LindenFacetDimAndPath> drillDownDimAndPaths = facetRequest.getDrillDownDimAndPaths();
            for (int i = 0; i < drillDownDimAndPaths.size(); ++i) {
                String fieldName = drillDownDimAndPaths.get(i).dim;
                if (drillDownDimAndPaths.get(i).path != null) {
                    drillDownQuery.add(fieldName, drillDownDimAndPaths.get(i).path.split("/"));
                } else {
                    drillDownQuery.add(fieldName);
                }
            }

            // drillSideways
            if (facetRequest.getFacetDrillingType() == FacetDrillingType.DRILLSIDEWAYS) {
                DrillSideways dillSideways = new DrillSideways(indexSearcher, facetsConfig,
                        searcherAndTaxonomy.taxonomyReader);
                DrillSideways.DrillSidewaysResult drillSidewaysResult = dillSideways.search(drillDownQuery,
                        collector);
                facets = drillSidewaysResult.facets;
            } else {
                // drillDown
                indexSearcher.search(drillDownQuery, collector);
                facets = new FastTaxonomyFacetCounts(searcherAndTaxonomy.taxonomyReader, facetsConfig,
                        facetsCollector);
            }
        } else {
            indexSearcher.search(query, collector);
            // Simple facet browsing
            if (facetRequest.isSetFacetParams()) {
                facets = new FastTaxonomyFacetCounts(searcherAndTaxonomy.taxonomyReader, facetsConfig,
                        facetsCollector);
            }
        }
        return resultParser.parse(lindenDocsCollector.topDocs(), null, facets, facetsCollector);
    } catch (Exception e) {
        throw new IOException(Throwables.getStackTraceAsString(e));
    } finally {
        lindenNRTSearcherManager.release(searcherAndTaxonomy);
    }
}