Example usage for org.apache.lucene.search IndexSearcher getIndexReader

List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher getIndexReader.

Prototype

public IndexReader getIndexReader() 

Source Link

Document

Return the IndexReader this searches.

Usage

From source file:de.blizzy.documentr.search.PageIndex.java

License:Open Source License

private SearchTextSuggestion getSearchTextSuggestion(String searchText, Authentication authentication,
        IndexSearcher searcher) throws IOException, ParseException, TimeoutException {

    List<WordPosition> words = Lists.newArrayList();

    TokenStream tokenStream = null;/*from w w  w  .j  av  a2s  .c  o  m*/
    try {
        tokenStream = analyzer.tokenStream(ALL_TEXT_SUGGESTIONS, new StringReader(searchText));
        tokenStream.addAttribute(CharTermAttribute.class);
        tokenStream.addAttribute(OffsetAttribute.class);
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class);
            String text = charTerm.toString();
            if (StringUtils.isNotBlank(text)) {
                OffsetAttribute offset = tokenStream.getAttribute(OffsetAttribute.class);
                WordPosition word = new WordPosition(text, offset.startOffset(), offset.endOffset());
                words.add(word);
            }
        }
        tokenStream.end();
    } finally {
        Closeables.closeQuietly(tokenStream);
    }

    Collections.reverse(words);

    StringBuilder suggestedSearchText = new StringBuilder(searchText);
    StringBuilder suggestedSearchTextHtml = new StringBuilder(searchText);
    boolean foundSuggestions = false;
    String now = String.valueOf(System.currentTimeMillis());
    String startMarker = "__SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$
    String endMarker = "__/SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$
    DirectSpellChecker spellChecker = new DirectSpellChecker();
    IndexReader reader = searcher.getIndexReader();
    for (WordPosition word : words) {
        Term term = new Term(ALL_TEXT_SUGGESTIONS, word.getWord());
        SuggestWord[] suggestions = spellChecker.suggestSimilar(term, 1, reader,
                SuggestMode.SUGGEST_MORE_POPULAR);
        if (suggestions.length > 0) {
            String suggestedWord = suggestions[0].string;
            int start = word.getStart();
            int end = word.getEnd();
            suggestedSearchText.replace(start, end, suggestedWord);
            suggestedSearchTextHtml.replace(start, end,
                    startMarker + StringEscapeUtils.escapeHtml4(suggestedWord) + endMarker);

            foundSuggestions = true;
        }
    }

    if (foundSuggestions) {
        String suggestion = suggestedSearchText.toString();
        SearchResult suggestionResult = findPages(suggestion, 1, authentication, searcher);
        int suggestionTotalHits = suggestionResult.getTotalHits();
        if (suggestionTotalHits > 0) {
            String html = StringEscapeUtils.escapeHtml4(suggestedSearchTextHtml.toString())
                    .replaceAll(startMarker + "(.*?)" + endMarker, "<strong><em>$1</em></strong>"); //$NON-NLS-1$ //$NON-NLS-2$
            return new SearchTextSuggestion(suggestedSearchText.toString(), html, suggestionTotalHits);
        }
    }

    return null;
}

From source file:de.blizzy.documentr.search.PageIndex.java

License:Open Source License

public Set<String> getAllTags(Authentication authentication) throws IOException, TimeoutException {
    IndexReader reader = null;/*from   w ww  .  j a v  a2  s . c  o  m*/
    IndexSearcher searcher = null;
    try {
        searcher = searcherManager.acquire();
        Bits visibleDocs = getVisibleDocIds(searcher, authentication);
        Set<String> tags = Sets.newHashSet();
        if (visibleDocs.length() > 0) {
            reader = searcher.getIndexReader();
            Terms terms = MultiFields.getTerms(reader, TAG);
            if (terms != null) {
                TermsEnum termsEnum = terms.iterator(null);
                BytesRef ref;
                while ((ref = termsEnum.next()) != null) {
                    DocsEnum docsEnum = termsEnum.docs(visibleDocs, null, 0);
                    if (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                        tags.add(ref.utf8ToString());
                    }
                }
            }
        }
        return tags;
    } finally {
        if (searcher != null) {
            searcherManager.release(searcher);
        }
    }
}

From source file:de.blizzy.documentr.search.TagFinder.java

License:Open Source License

public Set<String> getAllTags(Authentication authentication) throws IOException, TimeoutException {
    IndexReader reader = null;//w  w w  .  ja  v a  2  s  .  com
    IndexSearcher searcher = null;
    try {
        searcher = searcherManager.acquire();

        // no point in running the task asynchronously here
        GetVisibleDocIdsTask visibleDocIdsTask = new GetVisibleDocIdsTask(searcher, authentication, userStore,
                permissionEvaluator, taskExecutor);
        Bits visibleDocIds = visibleDocIdsTask.call();

        Set<String> tags = Sets.newHashSet();
        if (visibleDocIds.length() > 0) {
            reader = searcher.getIndexReader();
            Terms terms = MultiFields.getTerms(reader, PageIndex.TAG);
            if (terms != null) {
                TermsEnum termsEnum = terms.iterator(null);
                BytesRef ref;
                while ((ref = termsEnum.next()) != null) {
                    DocsEnum docsEnum = termsEnum.docs(visibleDocIds, null, 0);
                    if (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                        tags.add(ref.utf8ToString());
                    }
                }
            }
        }
        return tags;
    } finally {
        if (searcher != null) {
            searcherManager.release(searcher);
        }
    }
}

From source file:de.ingrid.search.utils.facet.FacetClassProducerTest.java

License:EUPL

@Before
public void setup() {
    IndexSearcher searcher = null;
    indexDir = DummyIndex.getTestIndex();
    try {/*from w w w.j a v a2  s.  c o m*/
        searcher = new IndexSearcher(IndexReader.open(indexDir));
    } catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    DummyQueryParsers qp = new DummyQueryParsers();
    List<IQueryParser> parsers = new ArrayList<IQueryParser>();
    parsers.add(new DummyTermQueryParser("content", null));

    qp.setQueryParsers(parsers);

    fcp = new FacetClassProducer();
    fcp.setIndexReaderWrapper(new LuceneIndexReaderWrapper(new IndexReader[] { searcher.getIndexReader() }));
    fcp.setQueryParsers(qp);
}

From source file:de.ingrid.search.utils.facet.FacetManagerTest.java

License:EUPL

@Before
public void setUp() throws Exception {
    IndexSearcher searcher = null;
    indexDir = DummyIndex.getTestIndex();
    try {/*w  w w. j  ava  2s.  c  o m*/
        searcher = new IndexSearcher(IndexReader.open(indexDir));
    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }

    qps = new DummyQueryParsers();
    List<IQueryParser> parsers = new ArrayList<IQueryParser>();
    parsers.add(new DummyTermQueryParser("content", null));

    qps.setQueryParsers(parsers);
    indexReader = searcher.getIndexReader();

    FacetClassProducer fp = new FacetClassProducer();
    fp.setIndexReaderWrapper(new LuceneIndexReaderWrapper(new IndexReader[] { indexReader }));
    fp.setQueryParsers(qps);

    FacetClassRegistry fr = new FacetClassRegistry();
    fr.setFacetClassProducer(fp);

    IndexFacetCounter fc = new IndexFacetCounter();
    fc.setFacetClassRegistry(fr);

    fm = new FacetManager();
    fm.setIndexReaderWrapper(new LuceneIndexReaderWrapper(new IndexReader[] { indexReader }));
    fm.setQueryParsers(qps);
    fm.setFacetCounters(Arrays.asList(new IFacetCounter[] { fc }));
}

From source file:de.innovationgate.wgpublisher.lucene.LuceneManager.java

License:Open Source License

private Query rewrite(Query query) throws CorruptIndexException, IOException, InterruptedException {
    IndexSearcher searcher = getIndexSearcher();
    _indexSearcherSemaphore.acquire();//from  ww  w  . jav  a 2s  . com
    try {
        return query.rewrite(searcher.getIndexReader());
    } finally {
        _indexSearcherSemaphore.release();
    }
}

From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java

License:Open Source License

public QueryResult performQuery(String aQueryString, String aBacklink, String aBasePath,
        Configuration aConfiguration, Map<String, String> aDrilldownFields) throws IOException {

    searcherManager.maybeRefreshBlocking();
    IndexSearcher theSearcher = searcherManager.acquire();
    SortedSetDocValuesReaderState theSortedSetState = new DefaultSortedSetDocValuesReaderState(
            theSearcher.getIndexReader());

    List<QueryResultDocument> theResultDocuments = new ArrayList<>();

    long theStartTime = System.currentTimeMillis();

    LOGGER.info("Querying for " + aQueryString);

    DateFormat theDateFormat = new SimpleDateFormat("dd.MMMM.yyyy", Locale.ENGLISH);

    try {/*from www.j  a  v a2  s .  c  om*/

        List<FacetDimension> theDimensions = new ArrayList<>();

        // Search only if a search query is given
        if (!StringUtils.isEmpty(aQueryString)) {

            Query theQuery = computeBooleanQueryFor(aQueryString);

            LOGGER.info(" query is " + theQuery);

            theQuery = theQuery.rewrite(theSearcher.getIndexReader());

            LOGGER.info(" rewritten query is " + theQuery);

            DrillDownQuery theDrilldownQuery = new DrillDownQuery(facetsConfig, theQuery);
            aDrilldownFields.entrySet().stream().forEach(aEntry -> {
                LOGGER.info(" with Drilldown " + aEntry.getKey() + " for " + aEntry.getValue());
                theDrilldownQuery.add(aEntry.getKey(), aEntry.getValue());
            });

            FacetsCollector theFacetCollector = new FacetsCollector();

            TopDocs theDocs = FacetsCollector.search(theSearcher, theDrilldownQuery, null,
                    aConfiguration.getNumberOfSearchResults(), theFacetCollector);
            SortedSetDocValuesFacetCounts theFacetCounts = new SortedSetDocValuesFacetCounts(theSortedSetState,
                    theFacetCollector);

            List<Facet> theAuthorFacets = new ArrayList<>();
            List<Facet> theFileTypesFacets = new ArrayList<>();
            List<Facet> theLastModifiedYearFacet = new ArrayList<>();
            List<Facet> theLanguageFacet = new ArrayList<>();

            LOGGER.info("Found " + theDocs.scoreDocs.length + " documents");

            // We need this cache to detect duplicate documents while searching for similarities
            Set<Integer> theUniqueDocumentsFound = new HashSet<>();

            Map<String, QueryResultDocument> theDocumentsByHash = new HashMap<>();

            for (int i = 0; i < theDocs.scoreDocs.length; i++) {
                int theDocumentID = theDocs.scoreDocs[i].doc;
                theUniqueDocumentsFound.add(theDocumentID);
                Document theDocument = theSearcher.doc(theDocumentID);

                String theUniqueID = theDocument.getField(IndexFields.UNIQUEID).stringValue();
                String theFoundFileName = theDocument.getField(IndexFields.FILENAME).stringValue();
                String theHash = theDocument.getField(IndexFields.CONTENTMD5).stringValue();
                QueryResultDocument theExistingDocument = theDocumentsByHash.get(theHash);
                if (theExistingDocument != null) {
                    theExistingDocument.addFileName(theFoundFileName);
                } else {
                    Date theLastModified = new Date(
                            theDocument.getField(IndexFields.LASTMODIFIED).numericValue().longValue());
                    SupportedLanguage theLanguage = SupportedLanguage
                            .valueOf(theDocument.getField(IndexFields.LANGUAGESTORED).stringValue());
                    String theFieldName;
                    if (analyzerCache.supportsLanguage(theLanguage)) {
                        theFieldName = analyzerCache.getFieldNameFor(theLanguage);
                    } else {
                        theFieldName = IndexFields.CONTENT;
                    }

                    String theOriginalContent = theDocument.getField(theFieldName).stringValue();

                    final Query theFinalQuery = theQuery;

                    ForkJoinTask<String> theHighligherResult = executorPool.submit(() -> {
                        StringBuilder theResult = new StringBuilder(theDateFormat.format(theLastModified));
                        theResult.append("&nbsp;-&nbsp;");
                        Highlighter theHighlighter = new Highlighter(new SimpleHTMLFormatter(),
                                new QueryScorer(theFinalQuery));
                        for (String theFragment : theHighlighter.getBestFragments(analyzer, theFieldName,
                                theOriginalContent, NUMBER_OF_FRAGMENTS)) {
                            if (theResult.length() > 0) {
                                theResult = theResult.append("...");
                            }
                            theResult = theResult.append(theFragment);
                        }
                        return theResult.toString();
                    });

                    int theNormalizedScore = (int) (theDocs.scoreDocs[i].score / theDocs.getMaxScore() * 5);

                    File theFileOnDisk = new File(theFoundFileName);
                    if (theFileOnDisk.exists()) {

                        boolean thePreviewAvailable = previewProcessor.previewAvailableFor(theFileOnDisk);

                        theExistingDocument = new QueryResultDocument(theDocumentID, theFoundFileName,
                                theHighligherResult,
                                Long.parseLong(theDocument.getField(IndexFields.LASTMODIFIED).stringValue()),
                                theNormalizedScore, theUniqueID, thePreviewAvailable);
                        theDocumentsByHash.put(theHash, theExistingDocument);
                        theResultDocuments.add(theExistingDocument);
                    }
                }
            }

            if (aConfiguration.isShowSimilarDocuments()) {

                MoreLikeThis theMoreLikeThis = new MoreLikeThis(theSearcher.getIndexReader());
                theMoreLikeThis.setAnalyzer(analyzer);
                theMoreLikeThis.setMinTermFreq(1);
                theMoreLikeThis.setMinDocFreq(1);
                theMoreLikeThis.setFieldNames(analyzerCache.getAllFieldNames());

                for (QueryResultDocument theDocument : theResultDocuments) {
                    Query theMoreLikeThisQuery = theMoreLikeThis.like(theDocument.getDocumentID());
                    TopDocs theMoreLikeThisTopDocs = theSearcher.search(theMoreLikeThisQuery, 5);
                    for (ScoreDoc theMoreLikeThisScoreDoc : theMoreLikeThisTopDocs.scoreDocs) {
                        int theSimilarDocument = theMoreLikeThisScoreDoc.doc;
                        if (theUniqueDocumentsFound.add(theSimilarDocument)) {
                            Document theMoreLikeThisDocument = theSearcher.doc(theSimilarDocument);
                            String theFilename = theMoreLikeThisDocument.getField(IndexFields.FILENAME)
                                    .stringValue();
                            theDocument.addSimilarFile(theFilename);
                        }
                    }
                }
            }

            LOGGER.info("Got Dimensions");
            for (FacetResult theResult : theFacetCounts.getAllDims(20000)) {
                String theDimension = theResult.dim;
                if ("author".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theAuthorFacets.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if ("extension".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theFileTypesFacets.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if ("last-modified-year".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theLastModifiedYearFacet.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if (IndexFields.LANGUAGEFACET.equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            Locale theLocale = new Locale(theLabelAndValue.label);
                            theLanguageFacet.add(new Facet(theLocale.getDisplayLanguage(Locale.ENGLISH),
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }

                LOGGER.info(" " + theDimension);
            }

            if (!theAuthorFacets.isEmpty()) {
                theDimensions.add(new FacetDimension("Author", theAuthorFacets));
            }
            if (!theLastModifiedYearFacet.isEmpty()) {
                theDimensions.add(new FacetDimension("Last modified", theLastModifiedYearFacet));
            }
            if (!theFileTypesFacets.isEmpty()) {
                theDimensions.add(new FacetDimension("File types", theFileTypesFacets));
            }
            if (!theLanguageFacet.isEmpty()) {
                theDimensions.add(new FacetDimension("Language", theLanguageFacet));
            }

            // Wait for all Tasks to complete for the search result highlighter
            ForkJoinTask.helpQuiesce();
        }

        long theDuration = System.currentTimeMillis() - theStartTime;

        LOGGER.info("Total amount of time : " + theDuration + "ms");

        return new QueryResult(System.currentTimeMillis() - theStartTime, theResultDocuments, theDimensions,
                theSearcher.getIndexReader().numDocs(), aBacklink);
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        searcherManager.release(theSearcher);
    }
}

From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java

License:Open Source License

public Suggestion[] findSuggestionTermsFor(String aTerm) throws IOException {

    searcherManager.maybeRefreshBlocking();
    IndexSearcher theSearcher = searcherManager.acquire();

    try {/*ww  w  .j a  va 2 s  .co  m*/

        SearchPhraseSuggester theSuggester = new SearchPhraseSuggester(theSearcher.getIndexReader(), analyzer,
                configuration);
        List<Suggestion> theResult = theSuggester.suggestSearchPhrase(IndexFields.CONTENT_NOT_STEMMED, aTerm);

        return theResult.toArray(new Suggestion[theResult.size()]);

    } finally {
        searcherManager.release(theSearcher);
    }
}

From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java

License:Open Source License

public void cleanupDeadContent() throws IOException {
    searcherManager.maybeRefreshBlocking();
    IndexSearcher theSearcher = searcherManager.acquire();

    try {// w  w w .ja  v a2  s. c  o m
        IndexReader theReader = theSearcher.getIndexReader();
        for (int i = 0; i < theReader.maxDoc(); i++) {
            Document theDocument = theReader.document(i);
            File theFile = new File(theDocument.getField(IndexFields.FILENAME).stringValue());
            if (!theFile.exists()) {
                LOGGER.info("Removing file " + theFile + " from index as it does not exist anymore.");
                String theUniqueID = theDocument.getField(IndexFields.UNIQUEID).stringValue();
                indexWriter.deleteDocuments(new Term(IndexFields.UNIQUEID, theUniqueID));
            }
        }
    } finally {
        searcherManager.release(theSearcher);
    }
}

From source file:de.unihildesheim.iw.lucene.document.FeedbackQuery.java

License:Open Source License

/**
 * Tries to get the minimum number of document without {@link
 * RelaxableQuery#relax() relaxing} the query. If the minimum number of
 * documents is not reached without relaxing at most the maximum number of
 * documents is returned while relaxing the query.
 *
 * @param searcher Searcher to issue queries
 * @param query Relaxable query to get matching documents
 * @param minDocs Minimum number of documents to get. Must be greater than
 * zero.//from  ww  w  . j  a  v  a 2  s  .c  om
 * @param maxDocCount Maximum number of documents to get. {@code -1} for
 * unlimited or greater than zero.
 * @return List of documents matching the (relaxed) query. Ranking order is
 * not preserved!
 * @throws IOException Thrown on low-level I/O errors
 */
public static DocIdSet getMinMax(@NotNull final IndexSearcher searcher, @NotNull final RelaxableQuery query,
        final int minDocs, final int maxDocCount) throws IOException {
    final int maxDocs;

    if (maxDocCount == -1) {
        maxDocs = Integer.MAX_VALUE;
    } else if (maxDocCount < 0) {
        throw new IllegalArgumentException(
                "Maximum number of documents must " + "be -1 (unlimited) or greater than zero.");
    } else if (maxDocCount < minDocs) {
        throw new IllegalArgumentException(
                "Maximum number of documents must " + "be greater than minimum value.");
    } else {
        maxDocs = maxDocCount;
    }
    if (minDocs <= 0) {
        throw new IllegalArgumentException("Minimum number of documents must be" + " greater than zero.");
    }

    final int maxRetDocs = getMaxDocs(searcher.getIndexReader(), maxDocs);
    final FixedBitSet bits = new FixedBitSet(searcher.getIndexReader().maxDoc());
    bits.or(BitsUtils.arrayToBits(getDocs(searcher, query.getQueryObj(), maxRetDocs)));

    // build a log-info string
    final String logInfo = "Got {} matching feedback documents. " + "Relaxing query to "
            + (maxDocCount > 0 ? "get additional" : "reach the minimum of") + " {} feedback documents...";

    int docsToGet;
    int bitsCount;
    while ((bitsCount = bits.cardinality()) < minDocs && query.relax()) {
        docsToGet = maxRetDocs - bitsCount;
        LOG.info(logInfo, bitsCount, docsToGet);

        final int[] docs = getDocs(searcher, query.getQueryObj(), maxRetDocs);
        int maxAdd = maxDocs - bitsCount;

        for (int i = docs.length - 1; i >= 0 && maxAdd > 0; i--) {
            if (!bits.getAndSet(docs[i])) {
                maxAdd--;
            }
        }
    }

    LOG.info("Returning {} documents.", bits.cardinality());
    return new BitDocIdSet(bits);
}