List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader
public IndexReader getIndexReader()
From source file:de.blizzy.documentr.search.PageIndex.java
License:Open Source License
private SearchTextSuggestion getSearchTextSuggestion(String searchText, Authentication authentication, IndexSearcher searcher) throws IOException, ParseException, TimeoutException { List<WordPosition> words = Lists.newArrayList(); TokenStream tokenStream = null;/*from w w w .j av a2s .c o m*/ try { tokenStream = analyzer.tokenStream(ALL_TEXT_SUGGESTIONS, new StringReader(searchText)); tokenStream.addAttribute(CharTermAttribute.class); tokenStream.addAttribute(OffsetAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class); String text = charTerm.toString(); if (StringUtils.isNotBlank(text)) { OffsetAttribute offset = tokenStream.getAttribute(OffsetAttribute.class); WordPosition word = new WordPosition(text, offset.startOffset(), offset.endOffset()); words.add(word); } } tokenStream.end(); } finally { Closeables.closeQuietly(tokenStream); } Collections.reverse(words); StringBuilder suggestedSearchText = new StringBuilder(searchText); StringBuilder suggestedSearchTextHtml = new StringBuilder(searchText); boolean foundSuggestions = false; String now = String.valueOf(System.currentTimeMillis()); String startMarker = "__SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$ String endMarker = "__/SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$ DirectSpellChecker spellChecker = new DirectSpellChecker(); IndexReader reader = searcher.getIndexReader(); for (WordPosition word : words) { Term term = new Term(ALL_TEXT_SUGGESTIONS, word.getWord()); SuggestWord[] suggestions = spellChecker.suggestSimilar(term, 1, reader, SuggestMode.SUGGEST_MORE_POPULAR); if (suggestions.length > 0) { String suggestedWord = suggestions[0].string; int start = word.getStart(); int end = word.getEnd(); suggestedSearchText.replace(start, end, suggestedWord); suggestedSearchTextHtml.replace(start, end, startMarker + StringEscapeUtils.escapeHtml4(suggestedWord) + endMarker); foundSuggestions = true; } } if (foundSuggestions) { String suggestion = suggestedSearchText.toString(); SearchResult suggestionResult = findPages(suggestion, 1, authentication, searcher); int suggestionTotalHits = suggestionResult.getTotalHits(); if (suggestionTotalHits > 0) { String html = StringEscapeUtils.escapeHtml4(suggestedSearchTextHtml.toString()) .replaceAll(startMarker + "(.*?)" + endMarker, "<strong><em>$1</em></strong>"); //$NON-NLS-1$ //$NON-NLS-2$ return new SearchTextSuggestion(suggestedSearchText.toString(), html, suggestionTotalHits); } } return null; }
From source file:de.blizzy.documentr.search.PageIndex.java
License:Open Source License
public Set<String> getAllTags(Authentication authentication) throws IOException, TimeoutException { IndexReader reader = null;/*from w ww . j a v a2 s . c o m*/ IndexSearcher searcher = null; try { searcher = searcherManager.acquire(); Bits visibleDocs = getVisibleDocIds(searcher, authentication); Set<String> tags = Sets.newHashSet(); if (visibleDocs.length() > 0) { reader = searcher.getIndexReader(); Terms terms = MultiFields.getTerms(reader, TAG); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); BytesRef ref; while ((ref = termsEnum.next()) != null) { DocsEnum docsEnum = termsEnum.docs(visibleDocs, null, 0); if (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { tags.add(ref.utf8ToString()); } } } } return tags; } finally { if (searcher != null) { searcherManager.release(searcher); } } }
From source file:de.blizzy.documentr.search.TagFinder.java
License:Open Source License
public Set<String> getAllTags(Authentication authentication) throws IOException, TimeoutException { IndexReader reader = null;//w w w . ja v a 2 s . com IndexSearcher searcher = null; try { searcher = searcherManager.acquire(); // no point in running the task asynchronously here GetVisibleDocIdsTask visibleDocIdsTask = new GetVisibleDocIdsTask(searcher, authentication, userStore, permissionEvaluator, taskExecutor); Bits visibleDocIds = visibleDocIdsTask.call(); Set<String> tags = Sets.newHashSet(); if (visibleDocIds.length() > 0) { reader = searcher.getIndexReader(); Terms terms = MultiFields.getTerms(reader, PageIndex.TAG); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); BytesRef ref; while ((ref = termsEnum.next()) != null) { DocsEnum docsEnum = termsEnum.docs(visibleDocIds, null, 0); if (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { tags.add(ref.utf8ToString()); } } } } return tags; } finally { if (searcher != null) { searcherManager.release(searcher); } } }
From source file:de.ingrid.search.utils.facet.FacetClassProducerTest.java
License:EUPL
@Before public void setup() { IndexSearcher searcher = null; indexDir = DummyIndex.getTestIndex(); try {/*from w w w.j a v a2 s. c o m*/ searcher = new IndexSearcher(IndexReader.open(indexDir)); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } DummyQueryParsers qp = new DummyQueryParsers(); List<IQueryParser> parsers = new ArrayList<IQueryParser>(); parsers.add(new DummyTermQueryParser("content", null)); qp.setQueryParsers(parsers); fcp = new FacetClassProducer(); fcp.setIndexReaderWrapper(new LuceneIndexReaderWrapper(new IndexReader[] { searcher.getIndexReader() })); fcp.setQueryParsers(qp); }
From source file:de.ingrid.search.utils.facet.FacetManagerTest.java
License:EUPL
@Before public void setUp() throws Exception { IndexSearcher searcher = null; indexDir = DummyIndex.getTestIndex(); try {/*w w w. j ava 2s. c o m*/ searcher = new IndexSearcher(IndexReader.open(indexDir)); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } qps = new DummyQueryParsers(); List<IQueryParser> parsers = new ArrayList<IQueryParser>(); parsers.add(new DummyTermQueryParser("content", null)); qps.setQueryParsers(parsers); indexReader = searcher.getIndexReader(); FacetClassProducer fp = new FacetClassProducer(); fp.setIndexReaderWrapper(new LuceneIndexReaderWrapper(new IndexReader[] { indexReader })); fp.setQueryParsers(qps); FacetClassRegistry fr = new FacetClassRegistry(); fr.setFacetClassProducer(fp); IndexFacetCounter fc = new IndexFacetCounter(); fc.setFacetClassRegistry(fr); fm = new FacetManager(); fm.setIndexReaderWrapper(new LuceneIndexReaderWrapper(new IndexReader[] { indexReader })); fm.setQueryParsers(qps); fm.setFacetCounters(Arrays.asList(new IFacetCounter[] { fc })); }
From source file:de.innovationgate.wgpublisher.lucene.LuceneManager.java
License:Open Source License
private Query rewrite(Query query) throws CorruptIndexException, IOException, InterruptedException { IndexSearcher searcher = getIndexSearcher(); _indexSearcherSemaphore.acquire();//from ww w . jav a 2s . com try { return query.rewrite(searcher.getIndexReader()); } finally { _indexSearcherSemaphore.release(); } }
From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java
License:Open Source License
public QueryResult performQuery(String aQueryString, String aBacklink, String aBasePath, Configuration aConfiguration, Map<String, String> aDrilldownFields) throws IOException { searcherManager.maybeRefreshBlocking(); IndexSearcher theSearcher = searcherManager.acquire(); SortedSetDocValuesReaderState theSortedSetState = new DefaultSortedSetDocValuesReaderState( theSearcher.getIndexReader()); List<QueryResultDocument> theResultDocuments = new ArrayList<>(); long theStartTime = System.currentTimeMillis(); LOGGER.info("Querying for " + aQueryString); DateFormat theDateFormat = new SimpleDateFormat("dd.MMMM.yyyy", Locale.ENGLISH); try {/*from www.j a v a2 s . c om*/ List<FacetDimension> theDimensions = new ArrayList<>(); // Search only if a search query is given if (!StringUtils.isEmpty(aQueryString)) { Query theQuery = computeBooleanQueryFor(aQueryString); LOGGER.info(" query is " + theQuery); theQuery = theQuery.rewrite(theSearcher.getIndexReader()); LOGGER.info(" rewritten query is " + theQuery); DrillDownQuery theDrilldownQuery = new DrillDownQuery(facetsConfig, theQuery); aDrilldownFields.entrySet().stream().forEach(aEntry -> { LOGGER.info(" with Drilldown " + aEntry.getKey() + " for " + aEntry.getValue()); theDrilldownQuery.add(aEntry.getKey(), aEntry.getValue()); }); FacetsCollector theFacetCollector = new FacetsCollector(); TopDocs theDocs = FacetsCollector.search(theSearcher, theDrilldownQuery, null, aConfiguration.getNumberOfSearchResults(), theFacetCollector); SortedSetDocValuesFacetCounts theFacetCounts = new SortedSetDocValuesFacetCounts(theSortedSetState, theFacetCollector); List<Facet> theAuthorFacets = new ArrayList<>(); List<Facet> theFileTypesFacets = new ArrayList<>(); List<Facet> theLastModifiedYearFacet = new ArrayList<>(); List<Facet> theLanguageFacet = new ArrayList<>(); LOGGER.info("Found " + theDocs.scoreDocs.length + " documents"); // We need this cache to detect duplicate documents while searching for similarities Set<Integer> theUniqueDocumentsFound = new HashSet<>(); Map<String, QueryResultDocument> theDocumentsByHash = new HashMap<>(); for (int i = 0; i < theDocs.scoreDocs.length; i++) { int theDocumentID = theDocs.scoreDocs[i].doc; theUniqueDocumentsFound.add(theDocumentID); Document theDocument = theSearcher.doc(theDocumentID); String theUniqueID = theDocument.getField(IndexFields.UNIQUEID).stringValue(); String theFoundFileName = theDocument.getField(IndexFields.FILENAME).stringValue(); String theHash = theDocument.getField(IndexFields.CONTENTMD5).stringValue(); QueryResultDocument theExistingDocument = theDocumentsByHash.get(theHash); if (theExistingDocument != null) { theExistingDocument.addFileName(theFoundFileName); } else { Date theLastModified = new Date( theDocument.getField(IndexFields.LASTMODIFIED).numericValue().longValue()); SupportedLanguage theLanguage = SupportedLanguage .valueOf(theDocument.getField(IndexFields.LANGUAGESTORED).stringValue()); String theFieldName; if (analyzerCache.supportsLanguage(theLanguage)) { theFieldName = analyzerCache.getFieldNameFor(theLanguage); } else { theFieldName = IndexFields.CONTENT; } String theOriginalContent = theDocument.getField(theFieldName).stringValue(); final Query theFinalQuery = theQuery; ForkJoinTask<String> theHighligherResult = executorPool.submit(() -> { StringBuilder theResult = new StringBuilder(theDateFormat.format(theLastModified)); theResult.append(" - "); Highlighter theHighlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(theFinalQuery)); for (String theFragment : theHighlighter.getBestFragments(analyzer, theFieldName, theOriginalContent, NUMBER_OF_FRAGMENTS)) { if (theResult.length() > 0) { theResult = theResult.append("..."); } theResult = theResult.append(theFragment); } return theResult.toString(); }); int theNormalizedScore = (int) (theDocs.scoreDocs[i].score / theDocs.getMaxScore() * 5); File theFileOnDisk = new File(theFoundFileName); if (theFileOnDisk.exists()) { boolean thePreviewAvailable = previewProcessor.previewAvailableFor(theFileOnDisk); theExistingDocument = new QueryResultDocument(theDocumentID, theFoundFileName, theHighligherResult, Long.parseLong(theDocument.getField(IndexFields.LASTMODIFIED).stringValue()), theNormalizedScore, theUniqueID, thePreviewAvailable); theDocumentsByHash.put(theHash, theExistingDocument); theResultDocuments.add(theExistingDocument); } } } if (aConfiguration.isShowSimilarDocuments()) { MoreLikeThis theMoreLikeThis = new MoreLikeThis(theSearcher.getIndexReader()); theMoreLikeThis.setAnalyzer(analyzer); theMoreLikeThis.setMinTermFreq(1); theMoreLikeThis.setMinDocFreq(1); theMoreLikeThis.setFieldNames(analyzerCache.getAllFieldNames()); for (QueryResultDocument theDocument : theResultDocuments) { Query theMoreLikeThisQuery = theMoreLikeThis.like(theDocument.getDocumentID()); TopDocs theMoreLikeThisTopDocs = theSearcher.search(theMoreLikeThisQuery, 5); for (ScoreDoc theMoreLikeThisScoreDoc : theMoreLikeThisTopDocs.scoreDocs) { int theSimilarDocument = theMoreLikeThisScoreDoc.doc; if (theUniqueDocumentsFound.add(theSimilarDocument)) { Document theMoreLikeThisDocument = theSearcher.doc(theSimilarDocument); String theFilename = theMoreLikeThisDocument.getField(IndexFields.FILENAME) .stringValue(); theDocument.addSimilarFile(theFilename); } } } } LOGGER.info("Got Dimensions"); for (FacetResult theResult : theFacetCounts.getAllDims(20000)) { String theDimension = theResult.dim; if ("author".equals(theDimension)) { for (LabelAndValue theLabelAndValue : theResult.labelValues) { if (!StringUtils.isEmpty(theLabelAndValue.label)) { theAuthorFacets.add(new Facet(theLabelAndValue.label, theLabelAndValue.value.intValue(), aBasePath + "/" + encode( FacetSearchUtils.encode(theDimension, theLabelAndValue.label)))); } } } if ("extension".equals(theDimension)) { for (LabelAndValue theLabelAndValue : theResult.labelValues) { if (!StringUtils.isEmpty(theLabelAndValue.label)) { theFileTypesFacets.add(new Facet(theLabelAndValue.label, theLabelAndValue.value.intValue(), aBasePath + "/" + encode( FacetSearchUtils.encode(theDimension, theLabelAndValue.label)))); } } } if ("last-modified-year".equals(theDimension)) { for (LabelAndValue theLabelAndValue : theResult.labelValues) { if (!StringUtils.isEmpty(theLabelAndValue.label)) { theLastModifiedYearFacet.add(new Facet(theLabelAndValue.label, theLabelAndValue.value.intValue(), aBasePath + "/" + encode( FacetSearchUtils.encode(theDimension, theLabelAndValue.label)))); } } } if (IndexFields.LANGUAGEFACET.equals(theDimension)) { for (LabelAndValue theLabelAndValue : theResult.labelValues) { if (!StringUtils.isEmpty(theLabelAndValue.label)) { Locale theLocale = new Locale(theLabelAndValue.label); theLanguageFacet.add(new Facet(theLocale.getDisplayLanguage(Locale.ENGLISH), theLabelAndValue.value.intValue(), aBasePath + "/" + encode( FacetSearchUtils.encode(theDimension, theLabelAndValue.label)))); } } } LOGGER.info(" " + theDimension); } if (!theAuthorFacets.isEmpty()) { theDimensions.add(new FacetDimension("Author", theAuthorFacets)); } if (!theLastModifiedYearFacet.isEmpty()) { theDimensions.add(new FacetDimension("Last modified", theLastModifiedYearFacet)); } if (!theFileTypesFacets.isEmpty()) { theDimensions.add(new FacetDimension("File types", theFileTypesFacets)); } if (!theLanguageFacet.isEmpty()) { theDimensions.add(new FacetDimension("Language", theLanguageFacet)); } // Wait for all Tasks to complete for the search result highlighter ForkJoinTask.helpQuiesce(); } long theDuration = System.currentTimeMillis() - theStartTime; LOGGER.info("Total amount of time : " + theDuration + "ms"); return new QueryResult(System.currentTimeMillis() - theStartTime, theResultDocuments, theDimensions, theSearcher.getIndexReader().numDocs(), aBacklink); } catch (Exception e) { throw new RuntimeException(e); } finally { searcherManager.release(theSearcher); } }
From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java
License:Open Source License
public Suggestion[] findSuggestionTermsFor(String aTerm) throws IOException { searcherManager.maybeRefreshBlocking(); IndexSearcher theSearcher = searcherManager.acquire(); try {/*ww w .j a va 2 s .co m*/ SearchPhraseSuggester theSuggester = new SearchPhraseSuggester(theSearcher.getIndexReader(), analyzer, configuration); List<Suggestion> theResult = theSuggester.suggestSearchPhrase(IndexFields.CONTENT_NOT_STEMMED, aTerm); return theResult.toArray(new Suggestion[theResult.size()]); } finally { searcherManager.release(theSearcher); } }
From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java
License:Open Source License
public void cleanupDeadContent() throws IOException { searcherManager.maybeRefreshBlocking(); IndexSearcher theSearcher = searcherManager.acquire(); try {// w w w .ja v a2 s. c o m IndexReader theReader = theSearcher.getIndexReader(); for (int i = 0; i < theReader.maxDoc(); i++) { Document theDocument = theReader.document(i); File theFile = new File(theDocument.getField(IndexFields.FILENAME).stringValue()); if (!theFile.exists()) { LOGGER.info("Removing file " + theFile + " from index as it does not exist anymore."); String theUniqueID = theDocument.getField(IndexFields.UNIQUEID).stringValue(); indexWriter.deleteDocuments(new Term(IndexFields.UNIQUEID, theUniqueID)); } } } finally { searcherManager.release(theSearcher); } }
From source file:de.unihildesheim.iw.lucene.document.FeedbackQuery.java
License:Open Source License
/** * Tries to get the minimum number of document without {@link * RelaxableQuery#relax() relaxing} the query. If the minimum number of * documents is not reached without relaxing at most the maximum number of * documents is returned while relaxing the query. * * @param searcher Searcher to issue queries * @param query Relaxable query to get matching documents * @param minDocs Minimum number of documents to get. Must be greater than * zero.//from ww w . j a v a 2 s .c om * @param maxDocCount Maximum number of documents to get. {@code -1} for * unlimited or greater than zero. * @return List of documents matching the (relaxed) query. Ranking order is * not preserved! * @throws IOException Thrown on low-level I/O errors */ public static DocIdSet getMinMax(@NotNull final IndexSearcher searcher, @NotNull final RelaxableQuery query, final int minDocs, final int maxDocCount) throws IOException { final int maxDocs; if (maxDocCount == -1) { maxDocs = Integer.MAX_VALUE; } else if (maxDocCount < 0) { throw new IllegalArgumentException( "Maximum number of documents must " + "be -1 (unlimited) or greater than zero."); } else if (maxDocCount < minDocs) { throw new IllegalArgumentException( "Maximum number of documents must " + "be greater than minimum value."); } else { maxDocs = maxDocCount; } if (minDocs <= 0) { throw new IllegalArgumentException("Minimum number of documents must be" + " greater than zero."); } final int maxRetDocs = getMaxDocs(searcher.getIndexReader(), maxDocs); final FixedBitSet bits = new FixedBitSet(searcher.getIndexReader().maxDoc()); bits.or(BitsUtils.arrayToBits(getDocs(searcher, query.getQueryObj(), maxRetDocs))); // build a log-info string final String logInfo = "Got {} matching feedback documents. " + "Relaxing query to " + (maxDocCount > 0 ? "get additional" : "reach the minimum of") + " {} feedback documents..."; int docsToGet; int bitsCount; while ((bitsCount = bits.cardinality()) < minDocs && query.relax()) { docsToGet = maxRetDocs - bitsCount; LOG.info(logInfo, bitsCount, docsToGet); final int[] docs = getDocs(searcher, query.getQueryObj(), maxRetDocs); int maxAdd = maxDocs - bitsCount; for (int i = docs.length - 1; i >= 0 && maxAdd > 0; i--) { if (!bits.getAndSet(docs[i])) { maxAdd--; } } } LOG.info("Returning {} documents.", bits.cardinality()); return new BitDocIdSet(bits); }