Example usage for org.apache.lucene.facet FacetsCollector search

List of usage examples for org.apache.lucene.facet FacetsCollector search

Introduction

In this page you can find the example usage for org.apache.lucene.facet FacetsCollector search.

Prototype

public static TopFieldDocs search(IndexSearcher searcher, Query q, int n, Sort sort, Collector fc)
        throws IOException 

Source Link

Document

Utility method, to search and also collect all hits into the provided Collector .

Usage

From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java

License:Open Source License

public QueryResult performQuery(String aQueryString, String aBacklink, String aBasePath,
        Configuration aConfiguration, Map<String, String> aDrilldownFields) throws IOException {

    searcherManager.maybeRefreshBlocking();
    IndexSearcher theSearcher = searcherManager.acquire();
    SortedSetDocValuesReaderState theSortedSetState = new DefaultSortedSetDocValuesReaderState(
            theSearcher.getIndexReader());

    List<QueryResultDocument> theResultDocuments = new ArrayList<>();

    long theStartTime = System.currentTimeMillis();

    LOGGER.info("Querying for " + aQueryString);

    DateFormat theDateFormat = new SimpleDateFormat("dd.MMMM.yyyy", Locale.ENGLISH);

    try {//from  w ww . j  a v  a 2s.  co  m

        List<FacetDimension> theDimensions = new ArrayList<>();

        // Search only if a search query is given
        if (!StringUtils.isEmpty(aQueryString)) {

            Query theQuery = computeBooleanQueryFor(aQueryString);

            LOGGER.info(" query is " + theQuery);

            theQuery = theQuery.rewrite(theSearcher.getIndexReader());

            LOGGER.info(" rewritten query is " + theQuery);

            DrillDownQuery theDrilldownQuery = new DrillDownQuery(facetsConfig, theQuery);
            aDrilldownFields.entrySet().stream().forEach(aEntry -> {
                LOGGER.info(" with Drilldown " + aEntry.getKey() + " for " + aEntry.getValue());
                theDrilldownQuery.add(aEntry.getKey(), aEntry.getValue());
            });

            FacetsCollector theFacetCollector = new FacetsCollector();

            TopDocs theDocs = FacetsCollector.search(theSearcher, theDrilldownQuery, null,
                    aConfiguration.getNumberOfSearchResults(), theFacetCollector);
            SortedSetDocValuesFacetCounts theFacetCounts = new SortedSetDocValuesFacetCounts(theSortedSetState,
                    theFacetCollector);

            List<Facet> theAuthorFacets = new ArrayList<>();
            List<Facet> theFileTypesFacets = new ArrayList<>();
            List<Facet> theLastModifiedYearFacet = new ArrayList<>();
            List<Facet> theLanguageFacet = new ArrayList<>();

            LOGGER.info("Found " + theDocs.scoreDocs.length + " documents");

            // We need this cache to detect duplicate documents while searching for similarities
            Set<Integer> theUniqueDocumentsFound = new HashSet<>();

            Map<String, QueryResultDocument> theDocumentsByHash = new HashMap<>();

            for (int i = 0; i < theDocs.scoreDocs.length; i++) {
                int theDocumentID = theDocs.scoreDocs[i].doc;
                theUniqueDocumentsFound.add(theDocumentID);
                Document theDocument = theSearcher.doc(theDocumentID);

                String theUniqueID = theDocument.getField(IndexFields.UNIQUEID).stringValue();
                String theFoundFileName = theDocument.getField(IndexFields.FILENAME).stringValue();
                String theHash = theDocument.getField(IndexFields.CONTENTMD5).stringValue();
                QueryResultDocument theExistingDocument = theDocumentsByHash.get(theHash);
                if (theExistingDocument != null) {
                    theExistingDocument.addFileName(theFoundFileName);
                } else {
                    Date theLastModified = new Date(
                            theDocument.getField(IndexFields.LASTMODIFIED).numericValue().longValue());
                    SupportedLanguage theLanguage = SupportedLanguage
                            .valueOf(theDocument.getField(IndexFields.LANGUAGESTORED).stringValue());
                    String theFieldName;
                    if (analyzerCache.supportsLanguage(theLanguage)) {
                        theFieldName = analyzerCache.getFieldNameFor(theLanguage);
                    } else {
                        theFieldName = IndexFields.CONTENT;
                    }

                    String theOriginalContent = theDocument.getField(theFieldName).stringValue();

                    final Query theFinalQuery = theQuery;

                    ForkJoinTask<String> theHighligherResult = executorPool.submit(() -> {
                        StringBuilder theResult = new StringBuilder(theDateFormat.format(theLastModified));
                        theResult.append("&nbsp;-&nbsp;");
                        Highlighter theHighlighter = new Highlighter(new SimpleHTMLFormatter(),
                                new QueryScorer(theFinalQuery));
                        for (String theFragment : theHighlighter.getBestFragments(analyzer, theFieldName,
                                theOriginalContent, NUMBER_OF_FRAGMENTS)) {
                            if (theResult.length() > 0) {
                                theResult = theResult.append("...");
                            }
                            theResult = theResult.append(theFragment);
                        }
                        return theResult.toString();
                    });

                    int theNormalizedScore = (int) (theDocs.scoreDocs[i].score / theDocs.getMaxScore() * 5);

                    File theFileOnDisk = new File(theFoundFileName);
                    if (theFileOnDisk.exists()) {

                        boolean thePreviewAvailable = previewProcessor.previewAvailableFor(theFileOnDisk);

                        theExistingDocument = new QueryResultDocument(theDocumentID, theFoundFileName,
                                theHighligherResult,
                                Long.parseLong(theDocument.getField(IndexFields.LASTMODIFIED).stringValue()),
                                theNormalizedScore, theUniqueID, thePreviewAvailable);
                        theDocumentsByHash.put(theHash, theExistingDocument);
                        theResultDocuments.add(theExistingDocument);
                    }
                }
            }

            if (aConfiguration.isShowSimilarDocuments()) {

                MoreLikeThis theMoreLikeThis = new MoreLikeThis(theSearcher.getIndexReader());
                theMoreLikeThis.setAnalyzer(analyzer);
                theMoreLikeThis.setMinTermFreq(1);
                theMoreLikeThis.setMinDocFreq(1);
                theMoreLikeThis.setFieldNames(analyzerCache.getAllFieldNames());

                for (QueryResultDocument theDocument : theResultDocuments) {
                    Query theMoreLikeThisQuery = theMoreLikeThis.like(theDocument.getDocumentID());
                    TopDocs theMoreLikeThisTopDocs = theSearcher.search(theMoreLikeThisQuery, 5);
                    for (ScoreDoc theMoreLikeThisScoreDoc : theMoreLikeThisTopDocs.scoreDocs) {
                        int theSimilarDocument = theMoreLikeThisScoreDoc.doc;
                        if (theUniqueDocumentsFound.add(theSimilarDocument)) {
                            Document theMoreLikeThisDocument = theSearcher.doc(theSimilarDocument);
                            String theFilename = theMoreLikeThisDocument.getField(IndexFields.FILENAME)
                                    .stringValue();
                            theDocument.addSimilarFile(theFilename);
                        }
                    }
                }
            }

            LOGGER.info("Got Dimensions");
            for (FacetResult theResult : theFacetCounts.getAllDims(20000)) {
                String theDimension = theResult.dim;
                if ("author".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theAuthorFacets.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if ("extension".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theFileTypesFacets.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if ("last-modified-year".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theLastModifiedYearFacet.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if (IndexFields.LANGUAGEFACET.equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            Locale theLocale = new Locale(theLabelAndValue.label);
                            theLanguageFacet.add(new Facet(theLocale.getDisplayLanguage(Locale.ENGLISH),
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }

                LOGGER.info(" " + theDimension);
            }

            if (!theAuthorFacets.isEmpty()) {
                theDimensions.add(new FacetDimension("Author", theAuthorFacets));
            }
            if (!theLastModifiedYearFacet.isEmpty()) {
                theDimensions.add(new FacetDimension("Last modified", theLastModifiedYearFacet));
            }
            if (!theFileTypesFacets.isEmpty()) {
                theDimensions.add(new FacetDimension("File types", theFileTypesFacets));
            }
            if (!theLanguageFacet.isEmpty()) {
                theDimensions.add(new FacetDimension("Language", theLanguageFacet));
            }

            // Wait for all Tasks to complete for the search result highlighter
            ForkJoinTask.helpQuiesce();
        }

        long theDuration = System.currentTimeMillis() - theStartTime;

        LOGGER.info("Total amount of time : " + theDuration + "ms");

        return new QueryResult(System.currentTimeMillis() - theStartTime, theResultDocuments, theDimensions,
                theSearcher.getIndexReader().numDocs(), aBacklink);
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        searcherManager.release(theSearcher);
    }
}

From source file:org.efaps.admin.index.Searcher.java

License:Apache License

/**
 * Search./*  w  w w. j  a va  2  s . c  om*/
 *
 * @param _search the search
 * @return the search result
 * @throws EFapsException on error
 */
protected SearchResult executeSearch(final ISearch _search) throws EFapsException {
    final SearchResult ret = new SearchResult();
    try {
        LOG.debug("Starting search with: {}", _search.getQuery());
        final StandardQueryParser queryParser = new StandardQueryParser(Index.getAnalyzer());
        queryParser.setAllowLeadingWildcard(true);
        if (EFapsSystemConfiguration.get().containsAttributeValue(KernelSettings.INDEXDEFAULTOP)) {
            queryParser.setDefaultOperator(EnumUtils.getEnum(StandardQueryConfigHandler.Operator.class,
                    EFapsSystemConfiguration.get().getAttributeValue(KernelSettings.INDEXDEFAULTOP)));
        } else {
            queryParser.setDefaultOperator(StandardQueryConfigHandler.Operator.AND);
        }
        final Query query = queryParser.parse(_search.getQuery(), "ALL");

        final IndexReader reader = DirectoryReader.open(Index.getDirectory());
        Sort sort = _search.getSort();
        if (sort == null) {
            sort = new Sort(new SortField(Key.CREATED.name(), SortField.Type.LONG, true));
        }

        final FacetsConfig facetConfig = Index.getFacetsConfig();
        final DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(Index.getTaxonomyDirectory());

        final IndexSearcher searcher = new IndexSearcher(reader);
        final FacetsCollector fc = new FacetsCollector();

        final TopFieldDocs topFieldDocs = FacetsCollector.search(searcher, query, _search.getNumHits(), sort,
                fc);

        if (_search.getConfigs().contains(SearchConfig.ACTIVATE_DIMENSION)) {
            final Facets facets = new FastTaxonomyFacetCounts(taxoReader, facetConfig, fc);

            for (final FacetResult result : facets.getAllDims(1000)) {
                LOG.debug("FacetResult {}.", result);
                final DimConfig dimConfig = facetConfig.getDimConfig(result.dim);
                final Dimension retDim = new Dimension().setKey(result.dim);
                ret.getDimensions().add(retDim);
                for (final LabelAndValue labelValue : result.labelValues) {
                    final DimValue dimValue = new DimValue().setLabel(labelValue.label)
                            .setValue(labelValue.value.intValue());
                    dimValue.setPath(new String[] { retDim.getKey() });
                    retDim.getValues().add(dimValue);
                    if (dimConfig.hierarchical) {
                        addSubDimension(facets, dimValue, result.dim, labelValue.label);
                    }
                }
            }
        }
        ret.setHitCount(topFieldDocs.totalHits);
        if (ret.getHitCount() > 0) {
            final ScoreDoc[] hits = topFieldDocs.scoreDocs;

            LOG.debug("Found {} hits.", hits.length);
            for (int i = 0; i < hits.length; ++i) {
                final Document doc = searcher.doc(hits[i].doc);
                final String oid = doc.get(Key.OID.name());
                final String text = doc.get(Key.MSGPHRASE.name());
                LOG.debug("{}. {}\t {}", i + 1, oid, text);
                final Instance instance = Instance.get(oid);
                final List<Instance> list;
                if (this.typeMapping.containsKey(instance.getType())) {
                    list = this.typeMapping.get(instance.getType());
                } else {
                    list = new ArrayList<Instance>();
                    this.typeMapping.put(instance.getType(), list);
                }
                list.add(instance);
                final Element element = new Element().setOid(oid).setText(text);
                for (final Entry<String, Collection<String>> entry : _search.getResultFields().entrySet()) {
                    for (final String name : entry.getValue()) {
                        final String value = doc.get(name);
                        if (value != null) {
                            element.addField(name, value);
                        }
                    }
                }
                this.elements.put(instance, element);
            }
        }
        reader.close();
        checkAccess();
        ret.getElements().addAll(this.elements.values());
    } catch (final IOException | QueryNodeException e) {
        LOG.error("Catched Exception", e);
    }
    return ret;
}