Example usage for org.apache.lucene.search.grouping GroupingSearch GroupingSearch

List of usage examples for org.apache.lucene.search.grouping GroupingSearch GroupingSearch

Introduction

In this page you can find the example usage for org.apache.lucene.search.grouping GroupingSearch GroupingSearch.

Prototype

public GroupingSearch(Query groupEndDocs) 

Source Link

Document

Constructor for grouping documents by doc block.

Usage

From source file:com.difference.historybook.index.lucene.LuceneIndex.java

License:Apache License

@Override
public SearchResultWrapper search(String collection, String query, int offset, int size, boolean includeDebug)
        throws IndexException {
    try {//from w w w  .  ja  v  a 2  s.  co m
        //TODO: make age be a component in the ranking?
        BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
        queryBuilder.add(parser.parse(query), Occur.MUST);
        queryBuilder.add(new TermQuery(new Term(IndexDocumentAdapter.FIELD_COLLECTION, collection)),
                Occur.FILTER);
        Query baseQuery = queryBuilder.build();

        FunctionQuery boostQuery = new FunctionQuery(
                new ReciprocalFloatFunction(new DurationValueSource(new Date().getTime() / 1000,
                        new LongFieldSource(IndexDocumentAdapter.FIELD_TIMESTAMP)), RECIP, 1F, 1F));

        Query q = new CustomScoreQuery(baseQuery, boostQuery);

        QueryScorer queryScorer = new QueryScorer(q, IndexDocumentAdapter.FIELD_SEARCH);
        Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer);
        Highlighter highlighter = new Highlighter(queryScorer);
        highlighter.setTextFragmenter(fragmenter);

        GroupingSearch gsearch = new GroupingSearch(IndexDocumentAdapter.FIELD_URL_GROUP).setGroupDocsLimit(1)
                .setAllGroups(true).setIncludeMaxScore(true);
        TopGroups<?> groups = gsearch.search(searcher, q, offset, size);

        ArrayList<SearchResult> results = new ArrayList<>(size);
        for (int i = offset; i < offset + size && i < groups.groups.length; i++) {
            ScoreDoc scoreDoc = groups.groups[i].scoreDocs[0];
            Document luceneDoc = searcher.doc(scoreDoc.doc);
            IndexDocumentAdapter doc = new IndexDocumentAdapter(luceneDoc);

            TokenStream tokenStream = TokenSources.getTokenStream(IndexDocumentAdapter.FIELD_SEARCH,
                    reader.getTermVectors(scoreDoc.doc), luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH),
                    analyzer, highlighter.getMaxDocCharsToAnalyze() - 1);

            String[] snippets = highlighter.getBestFragments(tokenStream,
                    luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH), 3);
            String snippet = Arrays.asList(snippets).stream().collect(Collectors.joining("\n"));
            snippet = Jsoup.clean(snippet, Whitelist.simpleText());

            String debugInfo = null;
            if (includeDebug) {
                Explanation explanation = searcher.explain(q, scoreDoc.doc);
                debugInfo = explanation.toString();
            }

            results.add(new SearchResult(doc.getKey(), doc.getCollection(), doc.getTitle(), doc.getUrl(),
                    doc.getDomain(), doc.getTimestampText(), snippet, debugInfo, scoreDoc.score));
        }

        SearchResultWrapper wrapper = new SearchResultWrapper().setQuery(query).setOffset(offset)
                .setMaxResultsRequested(size)
                .setResultCount(groups.totalGroupCount != null ? groups.totalGroupCount : 0)
                .setResults(results);

        if (includeDebug) {
            wrapper.setDebugInfo(q.toString());
        }

        return wrapper;

    } catch (IOException | ParseException | InvalidTokenOffsetsException e) {
        LOG.error(e.getLocalizedMessage());
        throw new IndexException(e);
    }
}

From source file:com.dreamerpartner.codereview.lucene.SearchHelper.java

License:Apache License

/**
 *  //from ww w . j  a v a2s.c  o  m
 * @param groupField 
 * @param searchField 
 * @param searchStr 
 * @param pageNo
 * @param pageSize
 * @param orderField ?
 * @param orderFieldType ?
 * @param desc ? ??
 * @return
 */
@SuppressWarnings("deprecation")
public static Map<String, List<Document>> group(String module, String groupField, String searchField,
        String searchStr, int pageNo, int pageSize, String orderField, Type orderFieldType, boolean desc) {
    Map<String, List<Document>> result = new LinkedHashMap<String, List<Document>>(10);
    IndexReader reader = null;
    try {
        reader = DirectoryReader.open(FSDirectory.open(new File(LuceneUtil.getIndexPath(module))));
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        GroupingSearch groupingSearch = new GroupingSearch(groupField);
        Sort sort = new Sort(new SortField(orderField, orderFieldType, desc));
        groupingSearch.setGroupSort(sort);
        groupingSearch.setSortWithinGroup(sort);
        groupingSearch.setFillSortFields(true);
        groupingSearch.setCachingInMB(4.0, true);
        groupingSearch.setAllGroups(true);
        //groupingSearch.setAllGroupHeads(true);
        groupingSearch.setGroupDocsLimit(pageSize);

        QueryParser parser = new QueryParser(Version.LUCENE_4_10_0, searchField,
                new StandardAnalyzer(Version.LUCENE_4_10_0));
        Query query = parser.parse(searchStr);

        TopGroups<BytesRef> groupResult = groupingSearch.search(indexSearcher, query, (pageNo - 1) * pageSize,
                pageSize);
        System.out.println("?" + groupResult.totalHitCount + ", ?"
                + groupResult.groups.length);

        List<Document> groupData = null;
        for (GroupDocs<BytesRef> groupDocs : groupResult.groups) {
            groupData = new ArrayList<Document>(pageSize);
            String groupName = groupDocs.groupValue.utf8ToString();
            for (ScoreDoc scoreDoc : groupDocs.scoreDocs) {
                groupData.add(indexSearcher.doc(scoreDoc.doc));
            }
            result.put(groupName, groupData);
            groupData = null;
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        try {
            if (reader != null)
                reader.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    return result;
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

/**
 * Queries a feature index of a list of files, returning specified page of specified size.
 * If no paging parameters are passed, returns all results
 *
 * @param files a {@link List} of {@link FeatureFile}, which indexes to search
 * @param query a query to search in index
 * @param vcfInfoFields list of info fields to retrieve
 * @param page number of a page to display
 * @param pageSize number of entries per page
 * @param orderBy object, that specifies sorting
 * @return a {List} of {@code FeatureIndexEntry} objects that satisfy index query
 * @throws IOException if something is wrong in the filesystem
 *///from ww  w.  ja v  a2  s .  c o  m
public <T extends FeatureIndexEntry> IndexSearchResult<T> searchFileIndexesPaging(
        List<? extends FeatureFile> files, Query query, List<String> vcfInfoFields, Integer page,
        Integer pageSize, List<VcfFilterForm.OrderBy> orderBy) throws IOException {

    if (CollectionUtils.isEmpty(files)) {
        return new IndexSearchResult<>(Collections.emptyList(), false, 0);
    }

    List<FeatureIndexEntry> entries;

    int totalHits = 0;
    SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(files);

    try (MultiReader reader = openMultiReader(indexes)) {
        if (reader.numDocs() == 0) {
            return new IndexSearchResult<>(Collections.emptyList(), false, 0);
        }

        IndexSearcher searcher = new IndexSearcher(reader);
        GroupingSearch groupingSearch = new GroupingSearch(FeatureIndexFields.UID.fieldName);
        setSorting(orderBy, groupingSearch, files);

        TopGroups<String> topGroups = groupingSearch.search(searcher, query,
                page == null ? 0 : (page - 1) * pageSize, page == null ? reader.numDocs() : pageSize);

        final ScoreDoc[] hits = new ScoreDoc[topGroups.groups.length];
        for (int i = 0; i < topGroups.groups.length; i++) {
            hits[i] = topGroups.groups[i].scoreDocs[0];
        }

        entries = new ArrayList<>(hits.length);
        for (ScoreDoc hit : hits) {
            entries.add(createIndexEntry(hit, new HashMap<>(), searcher, vcfInfoFields));
        }
    } finally {
        for (SimpleFSDirectory index : indexes) {
            IOUtils.closeQuietly(index);
        }
    }

    return new IndexSearchResult<>((List<T>) entries, false, totalHits);
}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java

License:Open Source License

private void handleGroupByQueryTaskPatch(Operation op, QueryTask task) throws IOException {
    QuerySpecification qs = task.querySpec;
    IndexSearcher s = (IndexSearcher) qs.context.nativeSearcher;
    LuceneQueryPage page = (LuceneQueryPage) qs.context.nativePage;
    Query tq = (Query) qs.context.nativeQuery;
    Sort sort = (Sort) qs.context.nativeSort;
    if (sort == null && qs.sortTerm != null) {
        sort = LuceneQueryConverter.convertToLuceneSort(qs, false);
    }/* ww  w  .  ja va 2s  . co m*/

    Sort groupSort = null;
    if (qs.groupSortTerm != null) {
        groupSort = LuceneQueryConverter.convertToLuceneSort(qs, true);
    }

    GroupingSearch groupingSearch = new GroupingSearch(qs.groupByTerm.propertyName);
    groupingSearch.setGroupSort(groupSort);
    groupingSearch.setSortWithinGroup(sort);

    super.adjustStat(STAT_NAME_GROUP_QUERY_COUNT, 1);

    int groupOffset = page != null ? page.groupOffset : 0;
    int groupLimit = qs.groupResultLimit != null ? qs.groupResultLimit : 10000;

    if (s == null && qs.groupResultLimit != null) {
        s = createPaginatedQuerySearcher(task.documentExpirationTimeMicros, this.writer);
    }

    if (s == null) {
        // If DO_NOT_REFRESH is set use the existing searcher.
        s = this.searcher;
        if (!qs.options.contains(QueryOption.DO_NOT_REFRESH) || s == null) {
            s = updateSearcher(null, Integer.MAX_VALUE, this.writer);
        }
    }

    ServiceDocumentQueryResult rsp = new ServiceDocumentQueryResult();
    rsp.nextPageLinksPerGroup = new TreeMap<>();
    long startTimeMicros = Utils.getNowMicrosUtc();
    // perform the actual search
    TopGroups<?> groups = groupingSearch.search(s, tq, groupOffset, groupLimit);
    long endTimeMicros = Utils.getNowMicrosUtc();
    String statName = STAT_NAME_GROUP_QUERY_DURATION_MICROS;
    ServiceStat st = getHistogramStat(statName);
    setStat(st, endTimeMicros - startTimeMicros);
    // generate page links for each grouped result
    for (GroupDocs<?> groupDocs : groups.groups) {
        if (groupDocs.totalHits == 0) {
            continue;
        }
        QueryTask.Query perGroupQuery = Utils.clone(qs.query);
        String groupValue = ((BytesRef) groupDocs.groupValue).utf8ToString();

        // we need to modify the query to include a top level clause that restricts scope
        // to documents with the groupBy field and value
        QueryTask.Query clause = new QueryTask.Query().setTermPropertyName(qs.groupByTerm.propertyName)
                .setTermMatchValue(groupValue).setTermMatchType(MatchType.TERM);
        clause.occurance = QueryTask.Query.Occurance.MUST_OCCUR;
        if (perGroupQuery.booleanClauses == null) {
            QueryTask.Query topLevelClause = perGroupQuery;
            perGroupQuery.addBooleanClause(topLevelClause);
        }
        perGroupQuery.addBooleanClause(clause);
        Query lucenePerGroupQuery = LuceneQueryConverter.convertToLuceneQuery(perGroupQuery);

        // for each group generate a query page link
        String pageLink = createNextPage(op, s, qs, lucenePerGroupQuery, sort, null, null,
                task.documentExpirationTimeMicros, task.indexLink, false);

        rsp.nextPageLinksPerGroup.put(groupValue, pageLink);
    }

    if (qs.groupResultLimit != null && groups.groups.length >= groupLimit) {
        // check if we need to generate a next page for the next set of group results
        groups = groupingSearch.search(s, tq, groupLimit + groupOffset, groupLimit);
        if (groups.totalGroupedHitCount > 0) {
            rsp.nextPageLink = createNextPage(op, s, qs, tq, sort, null, groupLimit + groupOffset,
                    task.documentExpirationTimeMicros, task.indexLink, page != null);
        }
    }

    op.setBodyNoCloning(rsp).complete();
}

From source file:com.xiaomi.linden.core.search.LindenCoreImpl.java

License:Apache License

public LindenResult search(LindenSearchRequest request) throws IOException {
    SearcherTaxonomyManager.SearcherAndTaxonomy searcherAndTaxonomy = lindenNRTSearcherManager.acquire();
    try {//from   w  w w  . ja v  a 2  s .co  m
        IndexSearcher indexSearcher = searcherAndTaxonomy.searcher;
        Filter filter = FilterConstructor.constructFilter(request.getFilter(), config);
        Sort sort = SortConstructor.constructSort(request, indexSearcher, config);
        indexSearcher.setSimilarity(config.getSearchSimilarityInstance());

        Query query = QueryConstructor.constructQuery(request.getQuery(), config);
        if (filter != null) {
            query = new FilteredQuery(query, filter);
        }

        int from = request.getOffset();
        int size = request.getLength();
        LindenResultParser resultParser = new LindenResultParser(config, request, indexSearcher,
                snippetGenerator, query, filter, sort);
        // very common search, no group, no facet, no early termination, no search time limit
        if (!request.isSetGroupParam() && !request.isSetFacet() && !request.isSetEarlyParam()
                && config.getSearchTimeLimit() <= 0) {
            TopDocs docs;
            if (sort != null) {
                docs = indexSearcher.search(query, from + size, sort);
            } else {
                docs = indexSearcher.search(query, from + size);
            }
            return resultParser.parse(docs, null, null, null);
        }

        // group param will suppress facet, group, early termination and search time limit parameters
        if (request.isSetGroupParam()) {
            String groupField = request.getGroupParam().getGroupField();
            GroupingSearch groupingSearch = new GroupingSearch(groupField);
            groupingSearch.setGroupDocsLimit(request.getGroupParam().getGroupInnerLimit());
            if (sort != null) {
                groupingSearch.setGroupSort(sort);
                groupingSearch.setSortWithinGroup(sort);
                groupingSearch.setFillSortFields(true);
            }
            groupingSearch.setCachingInMB(8.0, true);
            groupingSearch.setAllGroups(true);
            TopGroups<TopDocs> topGroupedDocs = groupingSearch.search(indexSearcher, query, 0, from + size);
            return resultParser.parse(null, topGroupedDocs, null, null);
        }

        TopDocsCollector topDocsCollector;
        if (sort != null) {
            topDocsCollector = TopFieldCollector.create(sort, from + size, null, true, false, false, false);
        } else {
            topDocsCollector = TopScoreDocCollector.create(from + size, false);
        }

        LindenDocsCollector lindenDocsCollector;
        if (request.isSetEarlyParam()) {
            MergePolicy mergePolicy = indexWriter.getConfig().getMergePolicy();
            Sort mergePolicySort = null;
            if (mergePolicy instanceof SortingMergePolicyDecorator) {
                mergePolicySort = ((SortingMergePolicyDecorator) mergePolicy).getSort();
            }
            EarlyTerminationCollector earlyTerminationCollector = new EarlyTerminationCollector(
                    topDocsCollector, mergePolicySort, request.getEarlyParam().getMaxNum());
            lindenDocsCollector = new LindenDocsCollector(earlyTerminationCollector);
        } else {
            lindenDocsCollector = new LindenDocsCollector(topDocsCollector);
        }

        Collector collector = lindenDocsCollector;
        if (config.getSearchTimeLimit() > 0) {
            collector = new TimeLimitingCollector(lindenDocsCollector, TimeLimitingCollector.getGlobalCounter(),
                    config.getSearchTimeLimit());
        }

        // no facet param
        if (!request.isSetFacet()) {
            indexSearcher.search(query, collector);
            return resultParser.parse(lindenDocsCollector.topDocs(), null, null, null);
        }

        // facet search
        LindenFacet facetRequest = request.getFacet();
        FacetsCollector facetsCollector = new FacetsCollector();
        lindenDocsCollector.wrap(facetsCollector);

        Facets facets = null;
        if (facetRequest.isSetDrillDownDimAndPaths()) {
            // drillDown or drillSideways
            DrillDownQuery drillDownQuery = new DrillDownQuery(facetsConfig, query);
            List<LindenFacetDimAndPath> drillDownDimAndPaths = facetRequest.getDrillDownDimAndPaths();
            for (int i = 0; i < drillDownDimAndPaths.size(); ++i) {
                String fieldName = drillDownDimAndPaths.get(i).dim;
                if (drillDownDimAndPaths.get(i).path != null) {
                    drillDownQuery.add(fieldName, drillDownDimAndPaths.get(i).path.split("/"));
                } else {
                    drillDownQuery.add(fieldName);
                }
            }

            // drillSideways
            if (facetRequest.getFacetDrillingType() == FacetDrillingType.DRILLSIDEWAYS) {
                DrillSideways dillSideways = new DrillSideways(indexSearcher, facetsConfig,
                        searcherAndTaxonomy.taxonomyReader);
                DrillSideways.DrillSidewaysResult drillSidewaysResult = dillSideways.search(drillDownQuery,
                        collector);
                facets = drillSidewaysResult.facets;
            } else {
                // drillDown
                indexSearcher.search(drillDownQuery, collector);
                facets = new FastTaxonomyFacetCounts(searcherAndTaxonomy.taxonomyReader, facetsConfig,
                        facetsCollector);
            }
        } else {
            indexSearcher.search(query, collector);
            // Simple facet browsing
            if (facetRequest.isSetFacetParams()) {
                facets = new FastTaxonomyFacetCounts(searcherAndTaxonomy.taxonomyReader, facetsConfig,
                        facetsCollector);
            }
        }
        return resultParser.parse(lindenDocsCollector.topDocs(), null, facets, facetsCollector);
    } catch (Exception e) {
        throw new IOException(Throwables.getStackTraceAsString(e));
    } finally {
        lindenNRTSearcherManager.release(searcherAndTaxonomy);
    }
}