Example usage for org.apache.lucene.queries.mlt MoreLikeThis MoreLikeThis

Introduction

In this page you can find the example usage for org.apache.lucene.queries.mlt MoreLikeThis MoreLikeThis.

Prototype

public MoreLikeThis(IndexReader ir)

Source Link

Document

Constructor requiring an IndexReader.

Usage

From source file:KNearestNeighborClassifier.java

License:Apache License

/**
 * Creates a {@link KNearestNeighborClassifier}.
 *
 * @param leafReader     the reader on the index to be used for classification
 * @param analyzer       an {@link Analyzer} used to analyze unseen text
 * @param similarity     the {@link Similarity} to be used by the underlying {@link IndexSearcher} or {@code null}
 *                       (defaults to {@link org.apache.lucene.search.similarities.ClassicSimilarity})
 * @param query          a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
 *                       if all the indexed docs should be used
 * @param k              the no. of docs to select in the MLT results to find the nearest neighbor
 * @param minDocsFreq    {@link MoreLikeThis#minDocFreq} parameter
 * @param minTermFreq    {@link MoreLikeThis#minTermFreq} parameter
 * @param classFieldName the name of the field used as the output for the classifier
 * @param textFieldNames the name of the fields used as the inputs for the classifier, they can contain boosting indication e.g. title^10
 *//*from www .  j a v  a  2s .co m*/
public KNearestNeighborClassifier(IndexReader leafReader, Similarity similarity, Analyzer analyzer, Query query,
        int k, int minDocsFreq, int minTermFreq, String classFieldName, String... textFieldNames) {
    this.textFieldNames = textFieldNames;
    this.classFieldName = classFieldName;
    this.mlt = new MoreLikeThis(leafReader);
    this.mlt.setAnalyzer(analyzer);
    this.mlt.setFieldNames(textFieldNames);
    this.indexSearcher = new IndexSearcher(leafReader);
    if (similarity != null) {
        this.indexSearcher.setSimilarity(similarity);
    } else {
        this.indexSearcher.setSimilarity(new ClassicSimilarity());
    }
    if (minDocsFreq > 0) {
        mlt.setMinDocFreq(minDocsFreq);
    }
    if (minTermFreq > 0) {
        mlt.setMinTermFreq(minTermFreq);
    }
    this.query = query;
    this.k = k;
}

From source file:aos.lucene.tools.BooksMoreLikeThis.java

License:Apache License

public static void main(String[] args) throws Throwable {

    String indexDir = System.getProperty("index.dir");
    FSDirectory directory = FSDirectory.open(new File(indexDir));
    IndexReader reader = DirectoryReader.open(directory);

    IndexSearcher searcher = new IndexSearcher(reader);

    int numDocs = reader.maxDoc();

    MoreLikeThis mlt = new MoreLikeThis(reader);
    mlt.setFieldNames(new String[] { "title", "author" });
    mlt.setMinTermFreq(1);//  w w  w  .ja  va 2s  .  co m
    mlt.setMinDocFreq(1);

    for (int docID = 0; docID < numDocs; docID++) {
        LOGGER.info();
        Document doc = reader.document(docID);
        LOGGER.info(doc.get("title"));

        Query query = mlt.like(docID);
        LOGGER.info("  query=" + query);

        TopDocs similarDocs = searcher.search(query, 10);
        if (similarDocs.totalHits == 0)
            LOGGER.info("  None like this");
        for (int i = 0; i < similarDocs.scoreDocs.length; i++) {
            if (similarDocs.scoreDocs[i].doc != docID) {
                doc = reader.document(similarDocs.scoreDocs[i].doc);
                LOGGER.info("  -> " + doc.getField("title").stringValue());
            }
        }
    }

    reader.close();
    directory.close();
}

From source file:com.mathworks.xzheng.tools.BooksMoreLikeThis.java

License:Apache License

public static void main(String[] args) throws Throwable {

    String indexDir = System.getProperty("index.dir");
    FSDirectory directory = FSDirectory.open(new File(indexDir));
    IndexReader reader = IndexReader.open(directory);

    IndexSearcher searcher = new IndexSearcher(reader);

    int numDocs = reader.maxDoc();

    MoreLikeThis mlt = new MoreLikeThis(reader); // #A
    mlt.setFieldNames(new String[] { "title", "author" });
    mlt.setMinTermFreq(1); // #B
    mlt.setMinDocFreq(1);//  w w w . ja va2  s.c  om

    for (int docID = 0; docID < numDocs; docID++) { // #C
        System.out.println();
        Document doc = reader.document(docID);
        System.out.println(doc.get("title"));

        Query query = mlt.like(docID); // #D
        System.out.println("  query=" + query);

        TopDocs similarDocs = searcher.search(query, 10);
        if (similarDocs.totalHits == 0)
            System.out.println("  None like this");
        for (int i = 0; i < similarDocs.scoreDocs.length; i++) {
            if (similarDocs.scoreDocs[i].doc != docID) { // #E
                doc = reader.document(similarDocs.scoreDocs[i].doc);
                System.out.println("  -> " + doc.getField("title").stringValue());
            }
        }
    }

    reader.close();
    directory.close();
}

From source file:com.meizu.nlp.classification.KNearestNeighborClassifier.java

License:Apache License

/**
 * {@inheritDoc}// w  ww. j av a  2 s . c  o m
 */
@Override
public void train(LeafReader leafReader, String[] textFieldNames, String classFieldName, Analyzer analyzer,
        Query query) throws IOException {
    this.textFieldNames = textFieldNames;
    this.classFieldName = classFieldName;
    mlt = new MoreLikeThis(leafReader);
    mlt.setAnalyzer(analyzer);
    mlt.setFieldNames(textFieldNames);
    indexSearcher = new IndexSearcher(leafReader);
    if (minDocsFreq > 0) {
        mlt.setMinDocFreq(minDocsFreq);
    }
    if (minTermFreq > 0) {
        mlt.setMinTermFreq(minTermFreq);
    }
    this.query = query;
}

From source file:com.qwazr.search.query.MoreLikeThisQuery.java

License:Apache License

@Override
final public Query getQuery(QueryContext queryContext) throws IOException, ParseException {
    Objects.requireNonNull(doc_num, "The doc_num field is missing");
    final MoreLikeThis mlt = new MoreLikeThis(queryContext.indexSearcher.getIndexReader());
    if (is_boost != null)
        mlt.setBoost(is_boost);/*from  w  w w  .  j av a 2  s  . c o m*/
    if (boost_factor != null)
        mlt.setBoostFactor(boost_factor);
    if (fieldnames != null)
        mlt.setFieldNames(fieldnames);
    if (max_doc_freq != null)
        mlt.setMaxDocFreq(max_doc_freq);
    if (max_doc_freq_pct != null)
        mlt.setMaxDocFreqPct(max_doc_freq_pct);
    if (max_num_tokens_parsed != null)
        mlt.setMaxNumTokensParsed(max_num_tokens_parsed);
    if (max_query_terms != null)
        mlt.setMaxQueryTerms(max_query_terms);
    if (max_word_len != null)
        mlt.setMaxWordLen(max_word_len);
    if (min_doc_freq != null)
        mlt.setMinDocFreq(min_doc_freq);
    if (min_term_freq != null)
        mlt.setMinTermFreq(min_term_freq);
    if (min_word_len != null)
        mlt.setMinWordLen(min_word_len);
    if (stop_words != null)
        mlt.setStopWords(stop_words);
    mlt.setAnalyzer(queryContext.analyzer);
    return mlt.like(doc_num);
}

From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java

License:Open Source License

public QueryResult performQuery(String aQueryString, String aBacklink, String aBasePath,
        Configuration aConfiguration, Map<String, String> aDrilldownFields) throws IOException {

    searcherManager.maybeRefreshBlocking();
    IndexSearcher theSearcher = searcherManager.acquire();
    SortedSetDocValuesReaderState theSortedSetState = new DefaultSortedSetDocValuesReaderState(
            theSearcher.getIndexReader());

    List<QueryResultDocument> theResultDocuments = new ArrayList<>();

    long theStartTime = System.currentTimeMillis();

    LOGGER.info("Querying for " + aQueryString);

    DateFormat theDateFormat = new SimpleDateFormat("dd.MMMM.yyyy", Locale.ENGLISH);

    try {//w  w w  .j a  va2s  . c  om

        List<FacetDimension> theDimensions = new ArrayList<>();

        // Search only if a search query is given
        if (!StringUtils.isEmpty(aQueryString)) {

            Query theQuery = computeBooleanQueryFor(aQueryString);

            LOGGER.info(" query is " + theQuery);

            theQuery = theQuery.rewrite(theSearcher.getIndexReader());

            LOGGER.info(" rewritten query is " + theQuery);

            DrillDownQuery theDrilldownQuery = new DrillDownQuery(facetsConfig, theQuery);
            aDrilldownFields.entrySet().stream().forEach(aEntry -> {
                LOGGER.info(" with Drilldown " + aEntry.getKey() + " for " + aEntry.getValue());
                theDrilldownQuery.add(aEntry.getKey(), aEntry.getValue());
            });

            FacetsCollector theFacetCollector = new FacetsCollector();

            TopDocs theDocs = FacetsCollector.search(theSearcher, theDrilldownQuery, null,
                    aConfiguration.getNumberOfSearchResults(), theFacetCollector);
            SortedSetDocValuesFacetCounts theFacetCounts = new SortedSetDocValuesFacetCounts(theSortedSetState,
                    theFacetCollector);

            List<Facet> theAuthorFacets = new ArrayList<>();
            List<Facet> theFileTypesFacets = new ArrayList<>();
            List<Facet> theLastModifiedYearFacet = new ArrayList<>();
            List<Facet> theLanguageFacet = new ArrayList<>();

            LOGGER.info("Found " + theDocs.scoreDocs.length + " documents");

            // We need this cache to detect duplicate documents while searching for similarities
            Set<Integer> theUniqueDocumentsFound = new HashSet<>();

            Map<String, QueryResultDocument> theDocumentsByHash = new HashMap<>();

            for (int i = 0; i < theDocs.scoreDocs.length; i++) {
                int theDocumentID = theDocs.scoreDocs[i].doc;
                theUniqueDocumentsFound.add(theDocumentID);
                Document theDocument = theSearcher.doc(theDocumentID);

                String theUniqueID = theDocument.getField(IndexFields.UNIQUEID).stringValue();
                String theFoundFileName = theDocument.getField(IndexFields.FILENAME).stringValue();
                String theHash = theDocument.getField(IndexFields.CONTENTMD5).stringValue();
                QueryResultDocument theExistingDocument = theDocumentsByHash.get(theHash);
                if (theExistingDocument != null) {
                    theExistingDocument.addFileName(theFoundFileName);
                } else {
                    Date theLastModified = new Date(
                            theDocument.getField(IndexFields.LASTMODIFIED).numericValue().longValue());
                    SupportedLanguage theLanguage = SupportedLanguage
                            .valueOf(theDocument.getField(IndexFields.LANGUAGESTORED).stringValue());
                    String theFieldName;
                    if (analyzerCache.supportsLanguage(theLanguage)) {
                        theFieldName = analyzerCache.getFieldNameFor(theLanguage);
                    } else {
                        theFieldName = IndexFields.CONTENT;
                    }

                    String theOriginalContent = theDocument.getField(theFieldName).stringValue();

                    final Query theFinalQuery = theQuery;

                    ForkJoinTask<String> theHighligherResult = executorPool.submit(() -> {
                        StringBuilder theResult = new StringBuilder(theDateFormat.format(theLastModified));
                        theResult.append("&nbsp;-&nbsp;");
                        Highlighter theHighlighter = new Highlighter(new SimpleHTMLFormatter(),
                                new QueryScorer(theFinalQuery));
                        for (String theFragment : theHighlighter.getBestFragments(analyzer, theFieldName,
                                theOriginalContent, NUMBER_OF_FRAGMENTS)) {
                            if (theResult.length() > 0) {
                                theResult = theResult.append("...");
                            }
                            theResult = theResult.append(theFragment);
                        }
                        return theResult.toString();
                    });

                    int theNormalizedScore = (int) (theDocs.scoreDocs[i].score / theDocs.getMaxScore() * 5);

                    File theFileOnDisk = new File(theFoundFileName);
                    if (theFileOnDisk.exists()) {

                        boolean thePreviewAvailable = previewProcessor.previewAvailableFor(theFileOnDisk);

                        theExistingDocument = new QueryResultDocument(theDocumentID, theFoundFileName,
                                theHighligherResult,
                                Long.parseLong(theDocument.getField(IndexFields.LASTMODIFIED).stringValue()),
                                theNormalizedScore, theUniqueID, thePreviewAvailable);
                        theDocumentsByHash.put(theHash, theExistingDocument);
                        theResultDocuments.add(theExistingDocument);
                    }
                }
            }

            if (aConfiguration.isShowSimilarDocuments()) {

                MoreLikeThis theMoreLikeThis = new MoreLikeThis(theSearcher.getIndexReader());
                theMoreLikeThis.setAnalyzer(analyzer);
                theMoreLikeThis.setMinTermFreq(1);
                theMoreLikeThis.setMinDocFreq(1);
                theMoreLikeThis.setFieldNames(analyzerCache.getAllFieldNames());

                for (QueryResultDocument theDocument : theResultDocuments) {
                    Query theMoreLikeThisQuery = theMoreLikeThis.like(theDocument.getDocumentID());
                    TopDocs theMoreLikeThisTopDocs = theSearcher.search(theMoreLikeThisQuery, 5);
                    for (ScoreDoc theMoreLikeThisScoreDoc : theMoreLikeThisTopDocs.scoreDocs) {
                        int theSimilarDocument = theMoreLikeThisScoreDoc.doc;
                        if (theUniqueDocumentsFound.add(theSimilarDocument)) {
                            Document theMoreLikeThisDocument = theSearcher.doc(theSimilarDocument);
                            String theFilename = theMoreLikeThisDocument.getField(IndexFields.FILENAME)
                                    .stringValue();
                            theDocument.addSimilarFile(theFilename);
                        }
                    }
                }
            }

            LOGGER.info("Got Dimensions");
            for (FacetResult theResult : theFacetCounts.getAllDims(20000)) {
                String theDimension = theResult.dim;
                if ("author".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theAuthorFacets.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if ("extension".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theFileTypesFacets.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if ("last-modified-year".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theLastModifiedYearFacet.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if (IndexFields.LANGUAGEFACET.equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            Locale theLocale = new Locale(theLabelAndValue.label);
                            theLanguageFacet.add(new Facet(theLocale.getDisplayLanguage(Locale.ENGLISH),
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }

                LOGGER.info(" " + theDimension);
            }

            if (!theAuthorFacets.isEmpty()) {
                theDimensions.add(new FacetDimension("Author", theAuthorFacets));
            }
            if (!theLastModifiedYearFacet.isEmpty()) {
                theDimensions.add(new FacetDimension("Last modified", theLastModifiedYearFacet));
            }
            if (!theFileTypesFacets.isEmpty()) {
                theDimensions.add(new FacetDimension("File types", theFileTypesFacets));
            }
            if (!theLanguageFacet.isEmpty()) {
                theDimensions.add(new FacetDimension("Language", theLanguageFacet));
            }

            // Wait for all Tasks to complete for the search result highlighter
            ForkJoinTask.helpQuiesce();
        }

        long theDuration = System.currentTimeMillis() - theStartTime;

        LOGGER.info("Total amount of time : " + theDuration + "ms");

        return new QueryResult(System.currentTimeMillis() - theStartTime, theResultDocuments, theDimensions,
                theSearcher.getIndexReader().numDocs(), aBacklink);
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        searcherManager.release(theSearcher);
    }
}

From source file:fr.univ_tours.etu.searcher.LikeThisTest.java

private void findSilimar(String searchForSimilar) throws IOException {
    IndexReader reader = DirectoryReader.open(indexDir);
    IndexSearcher indexSearcher = new IndexSearcher(reader);

    MoreLikeThis mlt = new MoreLikeThis(reader);
    mlt.setMinTermFreq(0);/* w  ww. ja v a  2  s .co  m*/
    mlt.setMinDocFreq(0);
    mlt.setFieldNames(new String[] { "title", "content" });
    mlt.setAnalyzer(analyzer);

    Reader sReader = new StringReader(searchForSimilar);
    Query query = mlt.like("content", sReader);

    TopDocs topDocs = indexSearcher.search(query, 10);

    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        Document aSimilar = indexSearcher.doc(scoreDoc.doc);
        String similarTitle = aSimilar.get("title");
        String similarContent = aSimilar.get("content");

        System.out.println("====similar finded====");
        System.out.println("title: " + similarTitle);
        System.out.println("content: " + similarContent);
    }

}

From source file:fr.univ_tours.etu.searcher.Searcher.java

public List<ResultObject> search(SearchQueriesRequest query) throws IOException, ParseException {

    Map<String, String> queriesDictionary = query.getQueriesDictionary();
    boolean useQueryExpansion = query.isUseQueryExpansion();
    List<Integer> docsToExpand = (useQueryExpansion) ? new ArrayList<>() : null;

    List<String> fsa = new ArrayList<>();
    List<String> qsa = new ArrayList<>();
    String contentLemmas = "";
    if (queriesDictionary.containsKey(DocFields.CONTENTS)) {
        regularTokenizer.tokenize(queriesDictionary.get(DocFields.CONTENTS), true);
        caselessTokenizer.tokenize(queriesDictionary.get(DocFields.CONTENTS), true);
        contentLemmas = caselessTokenizer.getLemmaString();
        System.out.println("Lemmas: " + caselessTokenizer.getLemmaList());
        String neString = "";
        if (caselessTokenizer.getNeList() != null && caselessTokenizer.getNeList().size() != 0) {
            neString = caselessTokenizer.getNeString(";", true);
            System.out.println("NE caseless: " + neString);
        }/*w  ww  . j  av  a  2 s .co m*/
        if (regularTokenizer.getNeList() != null && regularTokenizer.getNeList().size() != 0) {
            neString += ";" + regularTokenizer.getNeString(";", true);
            System.out.println("NE all: " + neString);
        }
        if (!"".equals(neString)) {
            fsa.add(DocFields.NAMED_ENTITIES);
            qsa.add(neString);
        }

    }

    for (Map.Entry<String, String> entry : queriesDictionary.entrySet()) {
        fsa.add(entry.getKey());
        if (entry.getKey().equals(DocFields.CONTENTS) || entry.getKey().equals(DocFields.SYNONYMS)) {
            qsa.add(contentLemmas);
        } else {
            qsa.add(entry.getValue());
        }
    }

    Query q = MultiFieldQueryParser.parse(qsa.toArray(new String[qsa.size()]),
            fsa.toArray(new String[fsa.size()]), analyzer);

    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(q, this.numRetrievedDocs);
    ScoreDoc[] hits = docs.scoreDocs;

    List<ResultObject> resultObjects = new ArrayList<>();

    String result = "";
    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        if (useQueryExpansion) {
            docsToExpand.add(docId);
        }
        Document d = searcher.doc(docId);
        resultObjects.add(new ResultObject(docId, i, d.get(DocFields.TITLE), d.get(DocFields.AUTHOR),
                d.get(DocFields.FILE_PATH), d.get(DocFields.SUMMARY), d.get(DocFields.FILE_NAME)));
        result = d.get(DocFields.SUMMARY);
    }

    if (useQueryExpansion) {
        reader.close();

        this.reader = DirectoryReader.open(FSDirectory.open(new File(this.indexDir).toPath()));
        searcher = new IndexSearcher(reader);
        MoreLikeThis mlt = new MoreLikeThis(reader);
        mlt.setMinTermFreq(0);
        mlt.setMinDocFreq(0);
        mlt.setAnalyzer(analyzer);
        for (int i = 0; i < Math.min(docsToExpand.size(), 5); i++) {

            Reader r = new StringReader(resultObjects.get(i).getSummary());
            Query expandedQuery = mlt.like(DocFields.CONTENTS, r);

            TopDocs topDocs = searcher.search(expandedQuery, 5);

            for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                if (!docsToExpand.contains(scoreDoc.doc)) {
                    docsToExpand.add(scoreDoc.doc);
                    Document aSimilar = searcher.doc(scoreDoc.doc);

                    resultObjects.add(new ResultObject(1, resultObjects.size(), aSimilar.get(DocFields.TITLE),
                            aSimilar.get(DocFields.AUTHOR), aSimilar.get(DocFields.FILE_PATH),
                            aSimilar.get(DocFields.SUMMARY), aSimilar.get(DocFields.FILE_NAME)));
                } else {
                }

            }
        }
    }

    return resultObjects;
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.util.MoreLikeThisHelper.java

License:Apache License

public static Query getMoreLikeThis(IndexReader reader, Analyzer analyzer, String mltQueryString) {
    Query moreLikeThisQuery = null;
    MoreLikeThis mlt = new MoreLikeThis(reader);
    mlt.setAnalyzer(analyzer);/* www. ja  va  2s .  co m*/
    try {
        String text = null;
        String[] fields = {};
        for (String param : mltQueryString.split("&")) {
            String[] keyValuePair = param.split("=");
            if (keyValuePair.length != 2 || keyValuePair[0] == null || keyValuePair[1] == null) {
                throw new RuntimeException("Unparsable native Lucene MLT query: " + mltQueryString);
            } else {
                if ("stream.body".equals(keyValuePair[0])) {
                    text = keyValuePair[1];
                } else if ("mlt.fl".equals(keyValuePair[0])) {
                    fields = keyValuePair[1].split(",");
                } else if ("mlt.mindf".equals(keyValuePair[0])) {
                    mlt.setMinDocFreq(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.mintf".equals(keyValuePair[0])) {
                    mlt.setMinTermFreq(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.boost".equals(keyValuePair[0])) {
                    mlt.setBoost(Boolean.parseBoolean(keyValuePair[1]));
                } else if ("mlt.qf".equals(keyValuePair[0])) {
                    mlt.setBoostFactor(Float.parseFloat(keyValuePair[1]));
                } else if ("mlt.maxdf".equals(keyValuePair[0])) {
                    mlt.setMaxDocFreq(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.maxdfp".equals(keyValuePair[0])) {
                    mlt.setMaxDocFreqPct(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.maxntp".equals(keyValuePair[0])) {
                    mlt.setMaxNumTokensParsed(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.maxqt".equals(keyValuePair[0])) {
                    mlt.setMaxQueryTerms(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.maxwl".equals(keyValuePair[0])) {
                    mlt.setMaxWordLen(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.minwl".equals(keyValuePair[0])) {
                    mlt.setMinWordLen(Integer.parseInt(keyValuePair[1]));
                }
            }
        }
        if (text != null) {
            if (FieldNames.PATH.equals(fields[0])) {
                IndexSearcher searcher = new IndexSearcher(reader);
                TermQuery q = new TermQuery(new Term(FieldNames.PATH, text));
                TopDocs top = searcher.search(q, 1);
                if (top.totalHits == 0) {
                    mlt.setFieldNames(fields);
                    moreLikeThisQuery = mlt.like(new StringReader(text), mlt.getFieldNames()[0]);
                } else {
                    ScoreDoc d = top.scoreDocs[0];
                    Document doc = reader.document(d.doc);
                    List<String> fieldNames = new ArrayList<String>();
                    for (IndexableField f : doc.getFields()) {
                        if (!FieldNames.PATH.equals(f.name())) {
                            fieldNames.add(f.name());
                        }
                    }
                    String[] docFields = fieldNames.toArray(new String[fieldNames.size()]);
                    mlt.setFieldNames(docFields);
                    moreLikeThisQuery = mlt.like(d.doc);
                }
            } else {
                mlt.setFieldNames(fields);
                moreLikeThisQuery = mlt.like(new StringReader(text), mlt.getFieldNames()[0]);
            }
        }
        return moreLikeThisQuery;
    } catch (Exception e) {
        throw new RuntimeException("could not handle MLT query " + mltQueryString);
    }
}

From source file:org.apache.solr.handler.RedbubbleMoreLikeThisHandler.java

License:Apache License

@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    SolrParams params = req.getParams();
    Integer originalDocId = params.getInt(DOC_ID_PARAM);
    String[] similarityFields = params.getParams(MoreLikeThisParams.SIMILARITY_FIELDS);

    if (similarityFields == null || similarityFields.length < 1) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                "MoreLikeThis requires at least one similarity field: " + MoreLikeThisParams.SIMILARITY_FIELDS);
    }//from w  ww . ja  v a  2  s . c o m

    if (originalDocId == null) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "id parameter missing!");
    }

    SolrIndexSearcher searcher = req.getSearcher();
    MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader());
    setMLTparams(params, similarityFields, mlt);

    Query originalDocQuery = createOriginalDocQuery(originalDocId, req);
    int matchLuceneDocId = getOriginalDocLuceneDocId(originalDocQuery, searcher);

    //TODO: cache interesting terms per doc?
    String[] interestingTerms = mlt.retrieveInterestingTerms(matchLuceneDocId);

    Query edismaxQuery = createEdismaxQuery(interestingTerms, params, req);

    DocList results = executeEdismaxQuery(edismaxQuery, params, searcher);

    boolean returnInterestingTerms = params.getBool(INTERESTING_TERMS_PARAM, false);

    if (returnInterestingTerms) {
        rsp.add("interestingTerms", interestingTerms);
    }

    rsp.add("response", results);
}