Example usage for org.apache.lucene.queries.mlt MoreLikeThis setStopWords

List of usage examples for org.apache.lucene.queries.mlt MoreLikeThis setStopWords

Introduction

In this page you can find the example usage for org.apache.lucene.queries.mlt MoreLikeThis setStopWords.

Prototype

public void setStopWords(Set<?> stopWords) 

Source Link

Document

Set the set of stopwords.

Usage

From source file:com.qwazr.search.query.MoreLikeThisQuery.java

License:Apache License

@Override
final public Query getQuery(QueryContext queryContext) throws IOException, ParseException {
    Objects.requireNonNull(doc_num, "The doc_num field is missing");
    final MoreLikeThis mlt = new MoreLikeThis(queryContext.indexSearcher.getIndexReader());
    if (is_boost != null)
        mlt.setBoost(is_boost);// w w w. j a va 2 s. co m
    if (boost_factor != null)
        mlt.setBoostFactor(boost_factor);
    if (fieldnames != null)
        mlt.setFieldNames(fieldnames);
    if (max_doc_freq != null)
        mlt.setMaxDocFreq(max_doc_freq);
    if (max_doc_freq_pct != null)
        mlt.setMaxDocFreqPct(max_doc_freq_pct);
    if (max_num_tokens_parsed != null)
        mlt.setMaxNumTokensParsed(max_num_tokens_parsed);
    if (max_query_terms != null)
        mlt.setMaxQueryTerms(max_query_terms);
    if (max_word_len != null)
        mlt.setMaxWordLen(max_word_len);
    if (min_doc_freq != null)
        mlt.setMinDocFreq(min_doc_freq);
    if (min_term_freq != null)
        mlt.setMinTermFreq(min_term_freq);
    if (min_word_len != null)
        mlt.setMinWordLen(min_word_len);
    if (stop_words != null)
        mlt.setStopWords(stop_words);
    mlt.setAnalyzer(queryContext.analyzer);
    return mlt.like(doc_num);
}

From source file:org.elasticsearch.common.lucene.search.MoreLikeThisQuery.java

License:Apache License

@Override
public Query rewrite(IndexReader reader) throws IOException {
    MoreLikeThis mlt = new MoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity);

    mlt.setFieldNames(moreLikeFields);// ww  w . j a va  2  s.  c  om
    mlt.setAnalyzer(analyzer);
    mlt.setMinTermFreq(minTermFrequency);
    mlt.setMinDocFreq(minDocFreq);
    mlt.setMaxDocFreq(maxDocFreq);
    mlt.setMaxQueryTerms(maxQueryTerms);
    mlt.setMinWordLen(minWordLen);
    mlt.setMaxWordLen(maxWordLen);
    mlt.setStopWords(stopWords);
    mlt.setBoost(boostTerms);
    mlt.setBoostFactor(boostTermsFactor);
    //LUCENE 4 UPGRADE this mapps the 3.6 behavior (only use the first field)
    BooleanQuery bq = (BooleanQuery) mlt.like(new FastStringReader(likeText), moreLikeFields[0]);
    BooleanClause[] clauses = bq.getClauses();

    bq.setMinimumNumberShouldMatch((int) (clauses.length * percentTermsToMatch));

    bq.setBoost(getBoost());
    return bq;
}

From source file:org.ohdsi.usagi.UsagiSearchEngine.java

License:Apache License

public List<ScoredConcept> search(String searchTerm, boolean useMlt, Collection<Integer> filterConceptIds,
        String filterDomain, String filterConceptClass, String filterVocabulary, boolean filterInvalid) {
    List<ScoredConcept> results = new ArrayList<ScoredConcept>();
    try {//from   ww w.  j  a v a  2 s.  co m
        Query query;
        if (useMlt) {
            MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader());
            mlt.setMinTermFreq(1);
            mlt.setMinDocFreq(1);
            mlt.setMaxDocFreq(9999);
            mlt.setMinWordLen(1);
            mlt.setMaxWordLen(9999);
            mlt.setMaxDocFreqPct(100);
            mlt.setMaxNumTokensParsed(9999);
            mlt.setMaxQueryTerms(9999);
            mlt.setStopWords(null);
            mlt.setFieldNames(new String[] { "TERM" });
            mlt.setAnalyzer(analyzer);

            query = mlt.like("TERM", new StringReader(searchTerm));
        } else {
            try {
                query = keywordsQueryParser.parse(searchTerm);
                // if (query instanceof BooleanQuery) {
                // List<BooleanClause> clauses = ((BooleanQuery) query).clauses();
                // BooleanClause lastClause = clauses.get(clauses.size() - 1);
                // lastClause.setQuery(new PrefixQuery(((TermQuery) lastClause.getQuery()).getTerm()));
                // } else if (query instanceof TermQuery) {// It's a single term
                // query = new PrefixQuery(((TermQuery) query).getTerm());
                // }

            } catch (ParseException e) {
                return results;
            }
        }

        BooleanQuery booleanQuery = new BooleanQuery();
        booleanQuery.add(query, Occur.SHOULD);
        booleanQuery.add(conceptQuery, Occur.MUST);

        if (filterConceptIds != null && filterConceptIds.size() > 0) {
            Query conceptIdQuery = conceptIdQueryParser.parse(StringUtilities.join(filterConceptIds, " OR "));
            booleanQuery.add(conceptIdQuery, Occur.MUST);
        }

        if (filterDomain != null) {
            Query domainQuery = domainQueryParser.parse("\"" + filterDomain + "\"");
            booleanQuery.add(domainQuery, Occur.MUST);
        }
        if (filterConceptClass != null) {
            Query conceptClassQuery = conceptClassQueryParser
                    .parse("\"" + filterConceptClass.toString() + "\"");
            booleanQuery.add(conceptClassQuery, Occur.MUST);
        }
        if (filterVocabulary != null) {
            Query vocabularyQuery = vocabularyQueryParser.parse("\"" + filterVocabulary.toString() + "\"");
            booleanQuery.add(vocabularyQuery, Occur.MUST);
        }
        if (filterInvalid) {
            Query invalidQuery = invalidQueryParser.parse("\"\"");
            booleanQuery.add(invalidQuery, Occur.MUST);
        }
        TopDocs topDocs = searcher.search(booleanQuery, 100);

        recomputeScores(topDocs.scoreDocs, query);
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            Document document = reader.document(scoreDoc.doc);
            int conceptId = Integer.parseInt(document.get("CONCEPT_ID"));
            // If matchscore = 0 but it was the one concept that was automatically selected, still allow it:
            if (scoreDoc.score > 0 || (filterConceptIds != null && filterConceptIds.size() == 1
                    && filterConceptIds.contains(conceptId))) {
                TargetConcept targetConcept = new TargetConcept();
                targetConcept.term = document.get("TERM");
                targetConcept.conceptId = conceptId;
                targetConcept.conceptName = document.get("CONCEPT_NAME");
                targetConcept.conceptClass = document.get("CONCEPT_CLASS");
                targetConcept.vocabulary = document.get("VOCABULARY");
                targetConcept.conceptCode = document.get("CONCEPT_CODE");
                targetConcept.validStartDate = document.get("VALID_START_DATE");
                targetConcept.validEndDate = document.get("VALID_END_DATE");
                targetConcept.invalidReason = document.get("INVALID_REASON");
                for (String domain : document.get("DOMAINS").split("\n"))
                    targetConcept.domains.add(domain);
                targetConcept.additionalInformation = document.get("ADDITIONAL_INFORMATION");
                results.add(new ScoredConcept(scoreDoc.score, targetConcept));
            }
        }
        reorderTies(results);
        removeDuplicateConcepts(results);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
    }

    return results;
}