List of usage examples for org.apache.lucene.facet FacetsCollector search
public static TopFieldDocs search(IndexSearcher searcher, Query q, int n, Sort sort, Collector fc) throws IOException
From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java
License:Open Source License
public QueryResult performQuery(String aQueryString, String aBacklink, String aBasePath, Configuration aConfiguration, Map<String, String> aDrilldownFields) throws IOException { searcherManager.maybeRefreshBlocking(); IndexSearcher theSearcher = searcherManager.acquire(); SortedSetDocValuesReaderState theSortedSetState = new DefaultSortedSetDocValuesReaderState( theSearcher.getIndexReader()); List<QueryResultDocument> theResultDocuments = new ArrayList<>(); long theStartTime = System.currentTimeMillis(); LOGGER.info("Querying for " + aQueryString); DateFormat theDateFormat = new SimpleDateFormat("dd.MMMM.yyyy", Locale.ENGLISH); try {//from w ww . j a v a 2s. co m List<FacetDimension> theDimensions = new ArrayList<>(); // Search only if a search query is given if (!StringUtils.isEmpty(aQueryString)) { Query theQuery = computeBooleanQueryFor(aQueryString); LOGGER.info(" query is " + theQuery); theQuery = theQuery.rewrite(theSearcher.getIndexReader()); LOGGER.info(" rewritten query is " + theQuery); DrillDownQuery theDrilldownQuery = new DrillDownQuery(facetsConfig, theQuery); aDrilldownFields.entrySet().stream().forEach(aEntry -> { LOGGER.info(" with Drilldown " + aEntry.getKey() + " for " + aEntry.getValue()); theDrilldownQuery.add(aEntry.getKey(), aEntry.getValue()); }); FacetsCollector theFacetCollector = new FacetsCollector(); TopDocs theDocs = FacetsCollector.search(theSearcher, theDrilldownQuery, null, aConfiguration.getNumberOfSearchResults(), theFacetCollector); SortedSetDocValuesFacetCounts theFacetCounts = new SortedSetDocValuesFacetCounts(theSortedSetState, theFacetCollector); List<Facet> theAuthorFacets = new ArrayList<>(); List<Facet> theFileTypesFacets = new ArrayList<>(); List<Facet> theLastModifiedYearFacet = new ArrayList<>(); List<Facet> theLanguageFacet = new ArrayList<>(); LOGGER.info("Found " + theDocs.scoreDocs.length + " documents"); // We need this cache to detect duplicate documents while searching for similarities Set<Integer> theUniqueDocumentsFound = new HashSet<>(); Map<String, QueryResultDocument> theDocumentsByHash = new HashMap<>(); for (int i = 0; i < theDocs.scoreDocs.length; i++) { int theDocumentID = theDocs.scoreDocs[i].doc; theUniqueDocumentsFound.add(theDocumentID); Document theDocument = theSearcher.doc(theDocumentID); String theUniqueID = theDocument.getField(IndexFields.UNIQUEID).stringValue(); String theFoundFileName = theDocument.getField(IndexFields.FILENAME).stringValue(); String theHash = theDocument.getField(IndexFields.CONTENTMD5).stringValue(); QueryResultDocument theExistingDocument = theDocumentsByHash.get(theHash); if (theExistingDocument != null) { theExistingDocument.addFileName(theFoundFileName); } else { Date theLastModified = new Date( theDocument.getField(IndexFields.LASTMODIFIED).numericValue().longValue()); SupportedLanguage theLanguage = SupportedLanguage .valueOf(theDocument.getField(IndexFields.LANGUAGESTORED).stringValue()); String theFieldName; if (analyzerCache.supportsLanguage(theLanguage)) { theFieldName = analyzerCache.getFieldNameFor(theLanguage); } else { theFieldName = IndexFields.CONTENT; } String theOriginalContent = theDocument.getField(theFieldName).stringValue(); final Query theFinalQuery = theQuery; ForkJoinTask<String> theHighligherResult = executorPool.submit(() -> { StringBuilder theResult = new StringBuilder(theDateFormat.format(theLastModified)); theResult.append(" - "); Highlighter theHighlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(theFinalQuery)); for (String theFragment : theHighlighter.getBestFragments(analyzer, theFieldName, theOriginalContent, NUMBER_OF_FRAGMENTS)) { if (theResult.length() > 0) { theResult = theResult.append("..."); } theResult = theResult.append(theFragment); } return theResult.toString(); }); int theNormalizedScore = (int) (theDocs.scoreDocs[i].score / theDocs.getMaxScore() * 5); File theFileOnDisk = new File(theFoundFileName); if (theFileOnDisk.exists()) { boolean thePreviewAvailable = previewProcessor.previewAvailableFor(theFileOnDisk); theExistingDocument = new QueryResultDocument(theDocumentID, theFoundFileName, theHighligherResult, Long.parseLong(theDocument.getField(IndexFields.LASTMODIFIED).stringValue()), theNormalizedScore, theUniqueID, thePreviewAvailable); theDocumentsByHash.put(theHash, theExistingDocument); theResultDocuments.add(theExistingDocument); } } } if (aConfiguration.isShowSimilarDocuments()) { MoreLikeThis theMoreLikeThis = new MoreLikeThis(theSearcher.getIndexReader()); theMoreLikeThis.setAnalyzer(analyzer); theMoreLikeThis.setMinTermFreq(1); theMoreLikeThis.setMinDocFreq(1); theMoreLikeThis.setFieldNames(analyzerCache.getAllFieldNames()); for (QueryResultDocument theDocument : theResultDocuments) { Query theMoreLikeThisQuery = theMoreLikeThis.like(theDocument.getDocumentID()); TopDocs theMoreLikeThisTopDocs = theSearcher.search(theMoreLikeThisQuery, 5); for (ScoreDoc theMoreLikeThisScoreDoc : theMoreLikeThisTopDocs.scoreDocs) { int theSimilarDocument = theMoreLikeThisScoreDoc.doc; if (theUniqueDocumentsFound.add(theSimilarDocument)) { Document theMoreLikeThisDocument = theSearcher.doc(theSimilarDocument); String theFilename = theMoreLikeThisDocument.getField(IndexFields.FILENAME) .stringValue(); theDocument.addSimilarFile(theFilename); } } } } LOGGER.info("Got Dimensions"); for (FacetResult theResult : theFacetCounts.getAllDims(20000)) { String theDimension = theResult.dim; if ("author".equals(theDimension)) { for (LabelAndValue theLabelAndValue : theResult.labelValues) { if (!StringUtils.isEmpty(theLabelAndValue.label)) { theAuthorFacets.add(new Facet(theLabelAndValue.label, theLabelAndValue.value.intValue(), aBasePath + "/" + encode( FacetSearchUtils.encode(theDimension, theLabelAndValue.label)))); } } } if ("extension".equals(theDimension)) { for (LabelAndValue theLabelAndValue : theResult.labelValues) { if (!StringUtils.isEmpty(theLabelAndValue.label)) { theFileTypesFacets.add(new Facet(theLabelAndValue.label, theLabelAndValue.value.intValue(), aBasePath + "/" + encode( FacetSearchUtils.encode(theDimension, theLabelAndValue.label)))); } } } if ("last-modified-year".equals(theDimension)) { for (LabelAndValue theLabelAndValue : theResult.labelValues) { if (!StringUtils.isEmpty(theLabelAndValue.label)) { theLastModifiedYearFacet.add(new Facet(theLabelAndValue.label, theLabelAndValue.value.intValue(), aBasePath + "/" + encode( FacetSearchUtils.encode(theDimension, theLabelAndValue.label)))); } } } if (IndexFields.LANGUAGEFACET.equals(theDimension)) { for (LabelAndValue theLabelAndValue : theResult.labelValues) { if (!StringUtils.isEmpty(theLabelAndValue.label)) { Locale theLocale = new Locale(theLabelAndValue.label); theLanguageFacet.add(new Facet(theLocale.getDisplayLanguage(Locale.ENGLISH), theLabelAndValue.value.intValue(), aBasePath + "/" + encode( FacetSearchUtils.encode(theDimension, theLabelAndValue.label)))); } } } LOGGER.info(" " + theDimension); } if (!theAuthorFacets.isEmpty()) { theDimensions.add(new FacetDimension("Author", theAuthorFacets)); } if (!theLastModifiedYearFacet.isEmpty()) { theDimensions.add(new FacetDimension("Last modified", theLastModifiedYearFacet)); } if (!theFileTypesFacets.isEmpty()) { theDimensions.add(new FacetDimension("File types", theFileTypesFacets)); } if (!theLanguageFacet.isEmpty()) { theDimensions.add(new FacetDimension("Language", theLanguageFacet)); } // Wait for all Tasks to complete for the search result highlighter ForkJoinTask.helpQuiesce(); } long theDuration = System.currentTimeMillis() - theStartTime; LOGGER.info("Total amount of time : " + theDuration + "ms"); return new QueryResult(System.currentTimeMillis() - theStartTime, theResultDocuments, theDimensions, theSearcher.getIndexReader().numDocs(), aBacklink); } catch (Exception e) { throw new RuntimeException(e); } finally { searcherManager.release(theSearcher); } }
From source file:org.efaps.admin.index.Searcher.java
License:Apache License
/** * Search./* w w w. j a va 2 s . c om*/ * * @param _search the search * @return the search result * @throws EFapsException on error */ protected SearchResult executeSearch(final ISearch _search) throws EFapsException { final SearchResult ret = new SearchResult(); try { LOG.debug("Starting search with: {}", _search.getQuery()); final StandardQueryParser queryParser = new StandardQueryParser(Index.getAnalyzer()); queryParser.setAllowLeadingWildcard(true); if (EFapsSystemConfiguration.get().containsAttributeValue(KernelSettings.INDEXDEFAULTOP)) { queryParser.setDefaultOperator(EnumUtils.getEnum(StandardQueryConfigHandler.Operator.class, EFapsSystemConfiguration.get().getAttributeValue(KernelSettings.INDEXDEFAULTOP))); } else { queryParser.setDefaultOperator(StandardQueryConfigHandler.Operator.AND); } final Query query = queryParser.parse(_search.getQuery(), "ALL"); final IndexReader reader = DirectoryReader.open(Index.getDirectory()); Sort sort = _search.getSort(); if (sort == null) { sort = new Sort(new SortField(Key.CREATED.name(), SortField.Type.LONG, true)); } final FacetsConfig facetConfig = Index.getFacetsConfig(); final DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(Index.getTaxonomyDirectory()); final IndexSearcher searcher = new IndexSearcher(reader); final FacetsCollector fc = new FacetsCollector(); final TopFieldDocs topFieldDocs = FacetsCollector.search(searcher, query, _search.getNumHits(), sort, fc); if (_search.getConfigs().contains(SearchConfig.ACTIVATE_DIMENSION)) { final Facets facets = new FastTaxonomyFacetCounts(taxoReader, facetConfig, fc); for (final FacetResult result : facets.getAllDims(1000)) { LOG.debug("FacetResult {}.", result); final DimConfig dimConfig = facetConfig.getDimConfig(result.dim); final Dimension retDim = new Dimension().setKey(result.dim); ret.getDimensions().add(retDim); for (final LabelAndValue labelValue : result.labelValues) { final DimValue dimValue = new DimValue().setLabel(labelValue.label) .setValue(labelValue.value.intValue()); dimValue.setPath(new String[] { retDim.getKey() }); retDim.getValues().add(dimValue); if (dimConfig.hierarchical) { addSubDimension(facets, dimValue, result.dim, labelValue.label); } } } } ret.setHitCount(topFieldDocs.totalHits); if (ret.getHitCount() > 0) { final ScoreDoc[] hits = topFieldDocs.scoreDocs; LOG.debug("Found {} hits.", hits.length); for (int i = 0; i < hits.length; ++i) { final Document doc = searcher.doc(hits[i].doc); final String oid = doc.get(Key.OID.name()); final String text = doc.get(Key.MSGPHRASE.name()); LOG.debug("{}. {}\t {}", i + 1, oid, text); final Instance instance = Instance.get(oid); final List<Instance> list; if (this.typeMapping.containsKey(instance.getType())) { list = this.typeMapping.get(instance.getType()); } else { list = new ArrayList<Instance>(); this.typeMapping.put(instance.getType(), list); } list.add(instance); final Element element = new Element().setOid(oid).setText(text); for (final Entry<String, Collection<String>> entry : _search.getResultFields().entrySet()) { for (final String name : entry.getValue()) { final String value = doc.get(name); if (value != null) { element.addField(name, value); } } } this.elements.put(instance, element); } } reader.close(); checkAccess(); ret.getElements().addAll(this.elements.values()); } catch (final IOException | QueryNodeException e) { LOG.error("Catched Exception", e); } return ret; }