Example usage for org.apache.lucene.queries.mlt MoreLikeThis setMinTermFreq

List of usage examples for org.apache.lucene.queries.mlt MoreLikeThis setMinTermFreq

Introduction

In this page you can find the example usage for org.apache.lucene.queries.mlt MoreLikeThis setMinTermFreq.

Prototype

public void setMinTermFreq(int minTermFreq) 

Source Link

Document

Sets the frequency below which terms will be ignored in the source doc.

Usage

From source file:aos.lucene.tools.BooksMoreLikeThis.java

License:Apache License

public static void main(String[] args) throws Throwable {

    String indexDir = System.getProperty("index.dir");
    FSDirectory directory = FSDirectory.open(new File(indexDir));
    IndexReader reader = DirectoryReader.open(directory);

    IndexSearcher searcher = new IndexSearcher(reader);

    int numDocs = reader.maxDoc();

    MoreLikeThis mlt = new MoreLikeThis(reader);
    mlt.setFieldNames(new String[] { "title", "author" });
    mlt.setMinTermFreq(1);
    mlt.setMinDocFreq(1);/* ww w  .  ja v a  2s .  com*/

    for (int docID = 0; docID < numDocs; docID++) {
        LOGGER.info();
        Document doc = reader.document(docID);
        LOGGER.info(doc.get("title"));

        Query query = mlt.like(docID);
        LOGGER.info("  query=" + query);

        TopDocs similarDocs = searcher.search(query, 10);
        if (similarDocs.totalHits == 0)
            LOGGER.info("  None like this");
        for (int i = 0; i < similarDocs.scoreDocs.length; i++) {
            if (similarDocs.scoreDocs[i].doc != docID) {
                doc = reader.document(similarDocs.scoreDocs[i].doc);
                LOGGER.info("  -> " + doc.getField("title").stringValue());
            }
        }
    }

    reader.close();
    directory.close();
}

From source file:com.mathworks.xzheng.tools.BooksMoreLikeThis.java

License:Apache License

public static void main(String[] args) throws Throwable {

    String indexDir = System.getProperty("index.dir");
    FSDirectory directory = FSDirectory.open(new File(indexDir));
    IndexReader reader = IndexReader.open(directory);

    IndexSearcher searcher = new IndexSearcher(reader);

    int numDocs = reader.maxDoc();

    MoreLikeThis mlt = new MoreLikeThis(reader); // #A
    mlt.setFieldNames(new String[] { "title", "author" });
    mlt.setMinTermFreq(1); // #B
    mlt.setMinDocFreq(1);/* www  .  j  av a 2  s  . c o m*/

    for (int docID = 0; docID < numDocs; docID++) { // #C
        System.out.println();
        Document doc = reader.document(docID);
        System.out.println(doc.get("title"));

        Query query = mlt.like(docID); // #D
        System.out.println("  query=" + query);

        TopDocs similarDocs = searcher.search(query, 10);
        if (similarDocs.totalHits == 0)
            System.out.println("  None like this");
        for (int i = 0; i < similarDocs.scoreDocs.length; i++) {
            if (similarDocs.scoreDocs[i].doc != docID) { // #E
                doc = reader.document(similarDocs.scoreDocs[i].doc);
                System.out.println("  -> " + doc.getField("title").stringValue());
            }
        }
    }

    reader.close();
    directory.close();
}

From source file:com.qwazr.search.query.MoreLikeThisQuery.java

License:Apache License

@Override
final public Query getQuery(QueryContext queryContext) throws IOException, ParseException {
    Objects.requireNonNull(doc_num, "The doc_num field is missing");
    final MoreLikeThis mlt = new MoreLikeThis(queryContext.indexSearcher.getIndexReader());
    if (is_boost != null)
        mlt.setBoost(is_boost);//from  w ww  .  ja  v  a 2 s.  c om
    if (boost_factor != null)
        mlt.setBoostFactor(boost_factor);
    if (fieldnames != null)
        mlt.setFieldNames(fieldnames);
    if (max_doc_freq != null)
        mlt.setMaxDocFreq(max_doc_freq);
    if (max_doc_freq_pct != null)
        mlt.setMaxDocFreqPct(max_doc_freq_pct);
    if (max_num_tokens_parsed != null)
        mlt.setMaxNumTokensParsed(max_num_tokens_parsed);
    if (max_query_terms != null)
        mlt.setMaxQueryTerms(max_query_terms);
    if (max_word_len != null)
        mlt.setMaxWordLen(max_word_len);
    if (min_doc_freq != null)
        mlt.setMinDocFreq(min_doc_freq);
    if (min_term_freq != null)
        mlt.setMinTermFreq(min_term_freq);
    if (min_word_len != null)
        mlt.setMinWordLen(min_word_len);
    if (stop_words != null)
        mlt.setStopWords(stop_words);
    mlt.setAnalyzer(queryContext.analyzer);
    return mlt.like(doc_num);
}

From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java

License:Open Source License

public QueryResult performQuery(String aQueryString, String aBacklink, String aBasePath,
        Configuration aConfiguration, Map<String, String> aDrilldownFields) throws IOException {

    searcherManager.maybeRefreshBlocking();
    IndexSearcher theSearcher = searcherManager.acquire();
    SortedSetDocValuesReaderState theSortedSetState = new DefaultSortedSetDocValuesReaderState(
            theSearcher.getIndexReader());

    List<QueryResultDocument> theResultDocuments = new ArrayList<>();

    long theStartTime = System.currentTimeMillis();

    LOGGER.info("Querying for " + aQueryString);

    DateFormat theDateFormat = new SimpleDateFormat("dd.MMMM.yyyy", Locale.ENGLISH);

    try {//w w w. j a  v  a2  s .  c om

        List<FacetDimension> theDimensions = new ArrayList<>();

        // Search only if a search query is given
        if (!StringUtils.isEmpty(aQueryString)) {

            Query theQuery = computeBooleanQueryFor(aQueryString);

            LOGGER.info(" query is " + theQuery);

            theQuery = theQuery.rewrite(theSearcher.getIndexReader());

            LOGGER.info(" rewritten query is " + theQuery);

            DrillDownQuery theDrilldownQuery = new DrillDownQuery(facetsConfig, theQuery);
            aDrilldownFields.entrySet().stream().forEach(aEntry -> {
                LOGGER.info(" with Drilldown " + aEntry.getKey() + " for " + aEntry.getValue());
                theDrilldownQuery.add(aEntry.getKey(), aEntry.getValue());
            });

            FacetsCollector theFacetCollector = new FacetsCollector();

            TopDocs theDocs = FacetsCollector.search(theSearcher, theDrilldownQuery, null,
                    aConfiguration.getNumberOfSearchResults(), theFacetCollector);
            SortedSetDocValuesFacetCounts theFacetCounts = new SortedSetDocValuesFacetCounts(theSortedSetState,
                    theFacetCollector);

            List<Facet> theAuthorFacets = new ArrayList<>();
            List<Facet> theFileTypesFacets = new ArrayList<>();
            List<Facet> theLastModifiedYearFacet = new ArrayList<>();
            List<Facet> theLanguageFacet = new ArrayList<>();

            LOGGER.info("Found " + theDocs.scoreDocs.length + " documents");

            // We need this cache to detect duplicate documents while searching for similarities
            Set<Integer> theUniqueDocumentsFound = new HashSet<>();

            Map<String, QueryResultDocument> theDocumentsByHash = new HashMap<>();

            for (int i = 0; i < theDocs.scoreDocs.length; i++) {
                int theDocumentID = theDocs.scoreDocs[i].doc;
                theUniqueDocumentsFound.add(theDocumentID);
                Document theDocument = theSearcher.doc(theDocumentID);

                String theUniqueID = theDocument.getField(IndexFields.UNIQUEID).stringValue();
                String theFoundFileName = theDocument.getField(IndexFields.FILENAME).stringValue();
                String theHash = theDocument.getField(IndexFields.CONTENTMD5).stringValue();
                QueryResultDocument theExistingDocument = theDocumentsByHash.get(theHash);
                if (theExistingDocument != null) {
                    theExistingDocument.addFileName(theFoundFileName);
                } else {
                    Date theLastModified = new Date(
                            theDocument.getField(IndexFields.LASTMODIFIED).numericValue().longValue());
                    SupportedLanguage theLanguage = SupportedLanguage
                            .valueOf(theDocument.getField(IndexFields.LANGUAGESTORED).stringValue());
                    String theFieldName;
                    if (analyzerCache.supportsLanguage(theLanguage)) {
                        theFieldName = analyzerCache.getFieldNameFor(theLanguage);
                    } else {
                        theFieldName = IndexFields.CONTENT;
                    }

                    String theOriginalContent = theDocument.getField(theFieldName).stringValue();

                    final Query theFinalQuery = theQuery;

                    ForkJoinTask<String> theHighligherResult = executorPool.submit(() -> {
                        StringBuilder theResult = new StringBuilder(theDateFormat.format(theLastModified));
                        theResult.append("&nbsp;-&nbsp;");
                        Highlighter theHighlighter = new Highlighter(new SimpleHTMLFormatter(),
                                new QueryScorer(theFinalQuery));
                        for (String theFragment : theHighlighter.getBestFragments(analyzer, theFieldName,
                                theOriginalContent, NUMBER_OF_FRAGMENTS)) {
                            if (theResult.length() > 0) {
                                theResult = theResult.append("...");
                            }
                            theResult = theResult.append(theFragment);
                        }
                        return theResult.toString();
                    });

                    int theNormalizedScore = (int) (theDocs.scoreDocs[i].score / theDocs.getMaxScore() * 5);

                    File theFileOnDisk = new File(theFoundFileName);
                    if (theFileOnDisk.exists()) {

                        boolean thePreviewAvailable = previewProcessor.previewAvailableFor(theFileOnDisk);

                        theExistingDocument = new QueryResultDocument(theDocumentID, theFoundFileName,
                                theHighligherResult,
                                Long.parseLong(theDocument.getField(IndexFields.LASTMODIFIED).stringValue()),
                                theNormalizedScore, theUniqueID, thePreviewAvailable);
                        theDocumentsByHash.put(theHash, theExistingDocument);
                        theResultDocuments.add(theExistingDocument);
                    }
                }
            }

            if (aConfiguration.isShowSimilarDocuments()) {

                MoreLikeThis theMoreLikeThis = new MoreLikeThis(theSearcher.getIndexReader());
                theMoreLikeThis.setAnalyzer(analyzer);
                theMoreLikeThis.setMinTermFreq(1);
                theMoreLikeThis.setMinDocFreq(1);
                theMoreLikeThis.setFieldNames(analyzerCache.getAllFieldNames());

                for (QueryResultDocument theDocument : theResultDocuments) {
                    Query theMoreLikeThisQuery = theMoreLikeThis.like(theDocument.getDocumentID());
                    TopDocs theMoreLikeThisTopDocs = theSearcher.search(theMoreLikeThisQuery, 5);
                    for (ScoreDoc theMoreLikeThisScoreDoc : theMoreLikeThisTopDocs.scoreDocs) {
                        int theSimilarDocument = theMoreLikeThisScoreDoc.doc;
                        if (theUniqueDocumentsFound.add(theSimilarDocument)) {
                            Document theMoreLikeThisDocument = theSearcher.doc(theSimilarDocument);
                            String theFilename = theMoreLikeThisDocument.getField(IndexFields.FILENAME)
                                    .stringValue();
                            theDocument.addSimilarFile(theFilename);
                        }
                    }
                }
            }

            LOGGER.info("Got Dimensions");
            for (FacetResult theResult : theFacetCounts.getAllDims(20000)) {
                String theDimension = theResult.dim;
                if ("author".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theAuthorFacets.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if ("extension".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theFileTypesFacets.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if ("last-modified-year".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theLastModifiedYearFacet.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if (IndexFields.LANGUAGEFACET.equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            Locale theLocale = new Locale(theLabelAndValue.label);
                            theLanguageFacet.add(new Facet(theLocale.getDisplayLanguage(Locale.ENGLISH),
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }

                LOGGER.info(" " + theDimension);
            }

            if (!theAuthorFacets.isEmpty()) {
                theDimensions.add(new FacetDimension("Author", theAuthorFacets));
            }
            if (!theLastModifiedYearFacet.isEmpty()) {
                theDimensions.add(new FacetDimension("Last modified", theLastModifiedYearFacet));
            }
            if (!theFileTypesFacets.isEmpty()) {
                theDimensions.add(new FacetDimension("File types", theFileTypesFacets));
            }
            if (!theLanguageFacet.isEmpty()) {
                theDimensions.add(new FacetDimension("Language", theLanguageFacet));
            }

            // Wait for all Tasks to complete for the search result highlighter
            ForkJoinTask.helpQuiesce();
        }

        long theDuration = System.currentTimeMillis() - theStartTime;

        LOGGER.info("Total amount of time : " + theDuration + "ms");

        return new QueryResult(System.currentTimeMillis() - theStartTime, theResultDocuments, theDimensions,
                theSearcher.getIndexReader().numDocs(), aBacklink);
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        searcherManager.release(theSearcher);
    }
}

From source file:fr.univ_tours.etu.searcher.LikeThisTest.java

private void findSilimar(String searchForSimilar) throws IOException {
    IndexReader reader = DirectoryReader.open(indexDir);
    IndexSearcher indexSearcher = new IndexSearcher(reader);

    MoreLikeThis mlt = new MoreLikeThis(reader);
    mlt.setMinTermFreq(0);
    mlt.setMinDocFreq(0);/*from w  w w.  j a  v  a2s .co m*/
    mlt.setFieldNames(new String[] { "title", "content" });
    mlt.setAnalyzer(analyzer);

    Reader sReader = new StringReader(searchForSimilar);
    Query query = mlt.like("content", sReader);

    TopDocs topDocs = indexSearcher.search(query, 10);

    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        Document aSimilar = indexSearcher.doc(scoreDoc.doc);
        String similarTitle = aSimilar.get("title");
        String similarContent = aSimilar.get("content");

        System.out.println("====similar finded====");
        System.out.println("title: " + similarTitle);
        System.out.println("content: " + similarContent);
    }

}

From source file:fr.univ_tours.etu.searcher.Searcher.java

public List<ResultObject> search(SearchQueriesRequest query) throws IOException, ParseException {

    Map<String, String> queriesDictionary = query.getQueriesDictionary();
    boolean useQueryExpansion = query.isUseQueryExpansion();
    List<Integer> docsToExpand = (useQueryExpansion) ? new ArrayList<>() : null;

    List<String> fsa = new ArrayList<>();
    List<String> qsa = new ArrayList<>();
    String contentLemmas = "";
    if (queriesDictionary.containsKey(DocFields.CONTENTS)) {
        regularTokenizer.tokenize(queriesDictionary.get(DocFields.CONTENTS), true);
        caselessTokenizer.tokenize(queriesDictionary.get(DocFields.CONTENTS), true);
        contentLemmas = caselessTokenizer.getLemmaString();
        System.out.println("Lemmas: " + caselessTokenizer.getLemmaList());
        String neString = "";
        if (caselessTokenizer.getNeList() != null && caselessTokenizer.getNeList().size() != 0) {
            neString = caselessTokenizer.getNeString(";", true);
            System.out.println("NE caseless: " + neString);
        }/*www .  j a  va  2  s .  com*/
        if (regularTokenizer.getNeList() != null && regularTokenizer.getNeList().size() != 0) {
            neString += ";" + regularTokenizer.getNeString(";", true);
            System.out.println("NE all: " + neString);
        }
        if (!"".equals(neString)) {
            fsa.add(DocFields.NAMED_ENTITIES);
            qsa.add(neString);
        }

    }

    for (Map.Entry<String, String> entry : queriesDictionary.entrySet()) {
        fsa.add(entry.getKey());
        if (entry.getKey().equals(DocFields.CONTENTS) || entry.getKey().equals(DocFields.SYNONYMS)) {
            qsa.add(contentLemmas);
        } else {
            qsa.add(entry.getValue());
        }
    }

    Query q = MultiFieldQueryParser.parse(qsa.toArray(new String[qsa.size()]),
            fsa.toArray(new String[fsa.size()]), analyzer);

    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(q, this.numRetrievedDocs);
    ScoreDoc[] hits = docs.scoreDocs;

    List<ResultObject> resultObjects = new ArrayList<>();

    String result = "";
    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        if (useQueryExpansion) {
            docsToExpand.add(docId);
        }
        Document d = searcher.doc(docId);
        resultObjects.add(new ResultObject(docId, i, d.get(DocFields.TITLE), d.get(DocFields.AUTHOR),
                d.get(DocFields.FILE_PATH), d.get(DocFields.SUMMARY), d.get(DocFields.FILE_NAME)));
        result = d.get(DocFields.SUMMARY);
    }

    if (useQueryExpansion) {
        reader.close();

        this.reader = DirectoryReader.open(FSDirectory.open(new File(this.indexDir).toPath()));
        searcher = new IndexSearcher(reader);
        MoreLikeThis mlt = new MoreLikeThis(reader);
        mlt.setMinTermFreq(0);
        mlt.setMinDocFreq(0);
        mlt.setAnalyzer(analyzer);
        for (int i = 0; i < Math.min(docsToExpand.size(), 5); i++) {

            Reader r = new StringReader(resultObjects.get(i).getSummary());
            Query expandedQuery = mlt.like(DocFields.CONTENTS, r);

            TopDocs topDocs = searcher.search(expandedQuery, 5);

            for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                if (!docsToExpand.contains(scoreDoc.doc)) {
                    docsToExpand.add(scoreDoc.doc);
                    Document aSimilar = searcher.doc(scoreDoc.doc);

                    resultObjects.add(new ResultObject(1, resultObjects.size(), aSimilar.get(DocFields.TITLE),
                            aSimilar.get(DocFields.AUTHOR), aSimilar.get(DocFields.FILE_PATH),
                            aSimilar.get(DocFields.SUMMARY), aSimilar.get(DocFields.FILE_NAME)));
                } else {
                }

            }
        }
    }

    return resultObjects;
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.util.MoreLikeThisHelper.java

License:Apache License

public static Query getMoreLikeThis(IndexReader reader, Analyzer analyzer, String mltQueryString) {
    Query moreLikeThisQuery = null;
    MoreLikeThis mlt = new MoreLikeThis(reader);
    mlt.setAnalyzer(analyzer);/*from  w  ww  .  j  a  va 2s. c  om*/
    try {
        String text = null;
        String[] fields = {};
        for (String param : mltQueryString.split("&")) {
            String[] keyValuePair = param.split("=");
            if (keyValuePair.length != 2 || keyValuePair[0] == null || keyValuePair[1] == null) {
                throw new RuntimeException("Unparsable native Lucene MLT query: " + mltQueryString);
            } else {
                if ("stream.body".equals(keyValuePair[0])) {
                    text = keyValuePair[1];
                } else if ("mlt.fl".equals(keyValuePair[0])) {
                    fields = keyValuePair[1].split(",");
                } else if ("mlt.mindf".equals(keyValuePair[0])) {
                    mlt.setMinDocFreq(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.mintf".equals(keyValuePair[0])) {
                    mlt.setMinTermFreq(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.boost".equals(keyValuePair[0])) {
                    mlt.setBoost(Boolean.parseBoolean(keyValuePair[1]));
                } else if ("mlt.qf".equals(keyValuePair[0])) {
                    mlt.setBoostFactor(Float.parseFloat(keyValuePair[1]));
                } else if ("mlt.maxdf".equals(keyValuePair[0])) {
                    mlt.setMaxDocFreq(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.maxdfp".equals(keyValuePair[0])) {
                    mlt.setMaxDocFreqPct(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.maxntp".equals(keyValuePair[0])) {
                    mlt.setMaxNumTokensParsed(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.maxqt".equals(keyValuePair[0])) {
                    mlt.setMaxQueryTerms(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.maxwl".equals(keyValuePair[0])) {
                    mlt.setMaxWordLen(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.minwl".equals(keyValuePair[0])) {
                    mlt.setMinWordLen(Integer.parseInt(keyValuePair[1]));
                }
            }
        }
        if (text != null) {
            if (FieldNames.PATH.equals(fields[0])) {
                IndexSearcher searcher = new IndexSearcher(reader);
                TermQuery q = new TermQuery(new Term(FieldNames.PATH, text));
                TopDocs top = searcher.search(q, 1);
                if (top.totalHits == 0) {
                    mlt.setFieldNames(fields);
                    moreLikeThisQuery = mlt.like(new StringReader(text), mlt.getFieldNames()[0]);
                } else {
                    ScoreDoc d = top.scoreDocs[0];
                    Document doc = reader.document(d.doc);
                    List<String> fieldNames = new ArrayList<String>();
                    for (IndexableField f : doc.getFields()) {
                        if (!FieldNames.PATH.equals(f.name())) {
                            fieldNames.add(f.name());
                        }
                    }
                    String[] docFields = fieldNames.toArray(new String[fieldNames.size()]);
                    mlt.setFieldNames(docFields);
                    moreLikeThisQuery = mlt.like(d.doc);
                }
            } else {
                mlt.setFieldNames(fields);
                moreLikeThisQuery = mlt.like(new StringReader(text), mlt.getFieldNames()[0]);
            }
        }
        return moreLikeThisQuery;
    } catch (Exception e) {
        throw new RuntimeException("could not handle MLT query " + mltQueryString);
    }
}

From source file:org.apache.solr.handler.RedbubbleMoreLikeThisHandler.java

License:Apache License

private void setMLTparams(SolrParams params, String[] similarityFields, MoreLikeThis mlt) {
    mlt.setMinTermFreq(params.getInt(MoreLikeThisParams.MIN_TERM_FREQ, MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
    mlt.setMinDocFreq(params.getInt(MoreLikeThisParams.MIN_DOC_FREQ, MoreLikeThis.DEFAULT_MIN_DOC_FREQ));
    mlt.setMaxDocFreq(params.getInt(MoreLikeThisParams.MAX_DOC_FREQ, MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
    mlt.setMinWordLen(params.getInt(MoreLikeThisParams.MIN_WORD_LEN, MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
    mlt.setMaxWordLen(params.getInt(MoreLikeThisParams.MAX_WORD_LEN, MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
    mlt.setMaxQueryTerms(/*from ww  w.j  a  va  2s.  c o  m*/
            params.getInt(MoreLikeThisParams.MAX_QUERY_TERMS, MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
    mlt.setMaxNumTokensParsed(params.getInt(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED,
            MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
    mlt.setBoost(params.getBool(MoreLikeThisParams.BOOST, false));
    mlt.setFieldNames(similarityFields);
}

From source file:org.apache.solr.search.mlt.CloudMLTQParser.java

License:Apache License

public Query parse() {
    String id = localParams.get(QueryParsing.V);
    // Do a Real Time Get for the document
    SolrDocument doc = getDocument(id);//from w  w  w  .  jav  a  2  s .  c  o m

    MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
    // TODO: Are the mintf and mindf defaults ok at 1/0 ?

    mlt.setMinTermFreq(localParams.getInt("mintf", 1));
    mlt.setMinDocFreq(localParams.getInt("mindf", 0));
    if (localParams.get("minwl") != null)
        mlt.setMinWordLen(localParams.getInt("minwl"));

    if (localParams.get("maxwl") != null)
        mlt.setMaxWordLen(localParams.getInt("maxwl"));

    mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());

    String[] qf = localParams.getParams("qf");
    Map<String, Collection<Object>> filteredDocument = new HashMap();

    if (qf != null) {
        mlt.setFieldNames(qf);
        for (String field : qf) {
            filteredDocument.put(field, doc.getFieldValues(field));
        }
    } else {
        Map<String, SchemaField> fields = req.getSchema().getFields();
        ArrayList<String> fieldNames = new ArrayList();
        for (String field : doc.getFieldNames()) {
            // Only use fields that are stored and have an explicit analyzer.
            // This makes sense as the query uses tf/idf/.. for query construction.
            // We might want to relook and change this in the future though.
            if (fields.get(field).stored() && fields.get(field).getType().isExplicitAnalyzer()) {
                fieldNames.add(field);
                filteredDocument.put(field, doc.getFieldValues(field));
            }
        }
        mlt.setFieldNames(fieldNames.toArray(new String[fieldNames.size()]));
    }

    try {
        return mlt.like(filteredDocument);
    } catch (IOException e) {
        e.printStackTrace();
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad Request");
    }

}

From source file:org.apache.solr.search.mlt.SimpleMLTQParser.java

License:Apache License

public Query parse() {

    String defaultField = req.getSchema().getUniqueKeyField().getName();
    String uniqueValue = localParams.get(QueryParsing.V);
    String[] qf = localParams.getParams("qf");

    SolrIndexSearcher searcher = req.getSearcher();
    Query docIdQuery = createIdQuery(defaultField, uniqueValue);

    try {/*w ww . java2  s . co  m*/
        TopDocs td = searcher.search(docIdQuery, 1);
        if (td.totalHits != 1)
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    "Error completing MLT request. Could not fetch " + "document with id [" + uniqueValue
                            + "]");
        ScoreDoc[] scoreDocs = td.scoreDocs;
        MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
        // TODO: Are the mintf and mindf defaults ok at '1' ?
        mlt.setMinTermFreq(localParams.getInt("mintf", 1));
        mlt.setMinDocFreq(localParams.getInt("mindf", 1));
        if (localParams.get("minwl") != null)
            mlt.setMinWordLen(localParams.getInt("minwl"));

        if (localParams.get("maxwl") != null)
            mlt.setMaxWordLen(localParams.getInt("maxwl"));

        ArrayList<String> fields = new ArrayList();

        if (qf != null) {
            mlt.setFieldNames(qf);
        } else {

            Map<String, SchemaField> fieldNames = req.getSearcher().getSchema().getFields();
            for (String fieldName : fieldNames.keySet()) {
                if (fieldNames.get(fieldName).indexed() && fieldNames.get(fieldName).stored())
                    if (fieldNames.get(fieldName).getType().getNumericType() == null)
                        fields.add(fieldName);
            }
            mlt.setFieldNames(fields.toArray(new String[fields.size()]));
        }

        mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());

        return mlt.like(scoreDocs[0].doc);

    } catch (IOException e) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                "Error completing MLT request" + e.getMessage());
    }
}