List of usage examples for org.apache.lucene.queries.mlt MoreLikeThis setMinTermFreq
public void setMinTermFreq(int minTermFreq)
From source file:aos.lucene.tools.BooksMoreLikeThis.java
License:Apache License
public static void main(String[] args) throws Throwable { String indexDir = System.getProperty("index.dir"); FSDirectory directory = FSDirectory.open(new File(indexDir)); IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); int numDocs = reader.maxDoc(); MoreLikeThis mlt = new MoreLikeThis(reader); mlt.setFieldNames(new String[] { "title", "author" }); mlt.setMinTermFreq(1); mlt.setMinDocFreq(1);/* ww w . ja v a 2s . com*/ for (int docID = 0; docID < numDocs; docID++) { LOGGER.info(); Document doc = reader.document(docID); LOGGER.info(doc.get("title")); Query query = mlt.like(docID); LOGGER.info(" query=" + query); TopDocs similarDocs = searcher.search(query, 10); if (similarDocs.totalHits == 0) LOGGER.info(" None like this"); for (int i = 0; i < similarDocs.scoreDocs.length; i++) { if (similarDocs.scoreDocs[i].doc != docID) { doc = reader.document(similarDocs.scoreDocs[i].doc); LOGGER.info(" -> " + doc.getField("title").stringValue()); } } } reader.close(); directory.close(); }
From source file:com.mathworks.xzheng.tools.BooksMoreLikeThis.java
License:Apache License
public static void main(String[] args) throws Throwable { String indexDir = System.getProperty("index.dir"); FSDirectory directory = FSDirectory.open(new File(indexDir)); IndexReader reader = IndexReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); int numDocs = reader.maxDoc(); MoreLikeThis mlt = new MoreLikeThis(reader); // #A mlt.setFieldNames(new String[] { "title", "author" }); mlt.setMinTermFreq(1); // #B mlt.setMinDocFreq(1);/* www . j av a 2 s . c o m*/ for (int docID = 0; docID < numDocs; docID++) { // #C System.out.println(); Document doc = reader.document(docID); System.out.println(doc.get("title")); Query query = mlt.like(docID); // #D System.out.println(" query=" + query); TopDocs similarDocs = searcher.search(query, 10); if (similarDocs.totalHits == 0) System.out.println(" None like this"); for (int i = 0; i < similarDocs.scoreDocs.length; i++) { if (similarDocs.scoreDocs[i].doc != docID) { // #E doc = reader.document(similarDocs.scoreDocs[i].doc); System.out.println(" -> " + doc.getField("title").stringValue()); } } } reader.close(); directory.close(); }
From source file:com.qwazr.search.query.MoreLikeThisQuery.java
License:Apache License
@Override final public Query getQuery(QueryContext queryContext) throws IOException, ParseException { Objects.requireNonNull(doc_num, "The doc_num field is missing"); final MoreLikeThis mlt = new MoreLikeThis(queryContext.indexSearcher.getIndexReader()); if (is_boost != null) mlt.setBoost(is_boost);//from w ww . ja v a 2 s. c om if (boost_factor != null) mlt.setBoostFactor(boost_factor); if (fieldnames != null) mlt.setFieldNames(fieldnames); if (max_doc_freq != null) mlt.setMaxDocFreq(max_doc_freq); if (max_doc_freq_pct != null) mlt.setMaxDocFreqPct(max_doc_freq_pct); if (max_num_tokens_parsed != null) mlt.setMaxNumTokensParsed(max_num_tokens_parsed); if (max_query_terms != null) mlt.setMaxQueryTerms(max_query_terms); if (max_word_len != null) mlt.setMaxWordLen(max_word_len); if (min_doc_freq != null) mlt.setMinDocFreq(min_doc_freq); if (min_term_freq != null) mlt.setMinTermFreq(min_term_freq); if (min_word_len != null) mlt.setMinWordLen(min_word_len); if (stop_words != null) mlt.setStopWords(stop_words); mlt.setAnalyzer(queryContext.analyzer); return mlt.like(doc_num); }
From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java
License:Open Source License
public QueryResult performQuery(String aQueryString, String aBacklink, String aBasePath, Configuration aConfiguration, Map<String, String> aDrilldownFields) throws IOException { searcherManager.maybeRefreshBlocking(); IndexSearcher theSearcher = searcherManager.acquire(); SortedSetDocValuesReaderState theSortedSetState = new DefaultSortedSetDocValuesReaderState( theSearcher.getIndexReader()); List<QueryResultDocument> theResultDocuments = new ArrayList<>(); long theStartTime = System.currentTimeMillis(); LOGGER.info("Querying for " + aQueryString); DateFormat theDateFormat = new SimpleDateFormat("dd.MMMM.yyyy", Locale.ENGLISH); try {//w w w. j a v a2 s . c om List<FacetDimension> theDimensions = new ArrayList<>(); // Search only if a search query is given if (!StringUtils.isEmpty(aQueryString)) { Query theQuery = computeBooleanQueryFor(aQueryString); LOGGER.info(" query is " + theQuery); theQuery = theQuery.rewrite(theSearcher.getIndexReader()); LOGGER.info(" rewritten query is " + theQuery); DrillDownQuery theDrilldownQuery = new DrillDownQuery(facetsConfig, theQuery); aDrilldownFields.entrySet().stream().forEach(aEntry -> { LOGGER.info(" with Drilldown " + aEntry.getKey() + " for " + aEntry.getValue()); theDrilldownQuery.add(aEntry.getKey(), aEntry.getValue()); }); FacetsCollector theFacetCollector = new FacetsCollector(); TopDocs theDocs = FacetsCollector.search(theSearcher, theDrilldownQuery, null, aConfiguration.getNumberOfSearchResults(), theFacetCollector); SortedSetDocValuesFacetCounts theFacetCounts = new SortedSetDocValuesFacetCounts(theSortedSetState, theFacetCollector); List<Facet> theAuthorFacets = new ArrayList<>(); List<Facet> theFileTypesFacets = new ArrayList<>(); List<Facet> theLastModifiedYearFacet = new ArrayList<>(); List<Facet> theLanguageFacet = new ArrayList<>(); LOGGER.info("Found " + theDocs.scoreDocs.length + " documents"); // We need this cache to detect duplicate documents while searching for similarities Set<Integer> theUniqueDocumentsFound = new HashSet<>(); Map<String, QueryResultDocument> theDocumentsByHash = new HashMap<>(); for (int i = 0; i < theDocs.scoreDocs.length; i++) { int theDocumentID = theDocs.scoreDocs[i].doc; theUniqueDocumentsFound.add(theDocumentID); Document theDocument = theSearcher.doc(theDocumentID); String theUniqueID = theDocument.getField(IndexFields.UNIQUEID).stringValue(); String theFoundFileName = theDocument.getField(IndexFields.FILENAME).stringValue(); String theHash = theDocument.getField(IndexFields.CONTENTMD5).stringValue(); QueryResultDocument theExistingDocument = theDocumentsByHash.get(theHash); if (theExistingDocument != null) { theExistingDocument.addFileName(theFoundFileName); } else { Date theLastModified = new Date( theDocument.getField(IndexFields.LASTMODIFIED).numericValue().longValue()); SupportedLanguage theLanguage = SupportedLanguage .valueOf(theDocument.getField(IndexFields.LANGUAGESTORED).stringValue()); String theFieldName; if (analyzerCache.supportsLanguage(theLanguage)) { theFieldName = analyzerCache.getFieldNameFor(theLanguage); } else { theFieldName = IndexFields.CONTENT; } String theOriginalContent = theDocument.getField(theFieldName).stringValue(); final Query theFinalQuery = theQuery; ForkJoinTask<String> theHighligherResult = executorPool.submit(() -> { StringBuilder theResult = new StringBuilder(theDateFormat.format(theLastModified)); theResult.append(" - "); Highlighter theHighlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(theFinalQuery)); for (String theFragment : theHighlighter.getBestFragments(analyzer, theFieldName, theOriginalContent, NUMBER_OF_FRAGMENTS)) { if (theResult.length() > 0) { theResult = theResult.append("..."); } theResult = theResult.append(theFragment); } return theResult.toString(); }); int theNormalizedScore = (int) (theDocs.scoreDocs[i].score / theDocs.getMaxScore() * 5); File theFileOnDisk = new File(theFoundFileName); if (theFileOnDisk.exists()) { boolean thePreviewAvailable = previewProcessor.previewAvailableFor(theFileOnDisk); theExistingDocument = new QueryResultDocument(theDocumentID, theFoundFileName, theHighligherResult, Long.parseLong(theDocument.getField(IndexFields.LASTMODIFIED).stringValue()), theNormalizedScore, theUniqueID, thePreviewAvailable); theDocumentsByHash.put(theHash, theExistingDocument); theResultDocuments.add(theExistingDocument); } } } if (aConfiguration.isShowSimilarDocuments()) { MoreLikeThis theMoreLikeThis = new MoreLikeThis(theSearcher.getIndexReader()); theMoreLikeThis.setAnalyzer(analyzer); theMoreLikeThis.setMinTermFreq(1); theMoreLikeThis.setMinDocFreq(1); theMoreLikeThis.setFieldNames(analyzerCache.getAllFieldNames()); for (QueryResultDocument theDocument : theResultDocuments) { Query theMoreLikeThisQuery = theMoreLikeThis.like(theDocument.getDocumentID()); TopDocs theMoreLikeThisTopDocs = theSearcher.search(theMoreLikeThisQuery, 5); for (ScoreDoc theMoreLikeThisScoreDoc : theMoreLikeThisTopDocs.scoreDocs) { int theSimilarDocument = theMoreLikeThisScoreDoc.doc; if (theUniqueDocumentsFound.add(theSimilarDocument)) { Document theMoreLikeThisDocument = theSearcher.doc(theSimilarDocument); String theFilename = theMoreLikeThisDocument.getField(IndexFields.FILENAME) .stringValue(); theDocument.addSimilarFile(theFilename); } } } } LOGGER.info("Got Dimensions"); for (FacetResult theResult : theFacetCounts.getAllDims(20000)) { String theDimension = theResult.dim; if ("author".equals(theDimension)) { for (LabelAndValue theLabelAndValue : theResult.labelValues) { if (!StringUtils.isEmpty(theLabelAndValue.label)) { theAuthorFacets.add(new Facet(theLabelAndValue.label, theLabelAndValue.value.intValue(), aBasePath + "/" + encode( FacetSearchUtils.encode(theDimension, theLabelAndValue.label)))); } } } if ("extension".equals(theDimension)) { for (LabelAndValue theLabelAndValue : theResult.labelValues) { if (!StringUtils.isEmpty(theLabelAndValue.label)) { theFileTypesFacets.add(new Facet(theLabelAndValue.label, theLabelAndValue.value.intValue(), aBasePath + "/" + encode( FacetSearchUtils.encode(theDimension, theLabelAndValue.label)))); } } } if ("last-modified-year".equals(theDimension)) { for (LabelAndValue theLabelAndValue : theResult.labelValues) { if (!StringUtils.isEmpty(theLabelAndValue.label)) { theLastModifiedYearFacet.add(new Facet(theLabelAndValue.label, theLabelAndValue.value.intValue(), aBasePath + "/" + encode( FacetSearchUtils.encode(theDimension, theLabelAndValue.label)))); } } } if (IndexFields.LANGUAGEFACET.equals(theDimension)) { for (LabelAndValue theLabelAndValue : theResult.labelValues) { if (!StringUtils.isEmpty(theLabelAndValue.label)) { Locale theLocale = new Locale(theLabelAndValue.label); theLanguageFacet.add(new Facet(theLocale.getDisplayLanguage(Locale.ENGLISH), theLabelAndValue.value.intValue(), aBasePath + "/" + encode( FacetSearchUtils.encode(theDimension, theLabelAndValue.label)))); } } } LOGGER.info(" " + theDimension); } if (!theAuthorFacets.isEmpty()) { theDimensions.add(new FacetDimension("Author", theAuthorFacets)); } if (!theLastModifiedYearFacet.isEmpty()) { theDimensions.add(new FacetDimension("Last modified", theLastModifiedYearFacet)); } if (!theFileTypesFacets.isEmpty()) { theDimensions.add(new FacetDimension("File types", theFileTypesFacets)); } if (!theLanguageFacet.isEmpty()) { theDimensions.add(new FacetDimension("Language", theLanguageFacet)); } // Wait for all Tasks to complete for the search result highlighter ForkJoinTask.helpQuiesce(); } long theDuration = System.currentTimeMillis() - theStartTime; LOGGER.info("Total amount of time : " + theDuration + "ms"); return new QueryResult(System.currentTimeMillis() - theStartTime, theResultDocuments, theDimensions, theSearcher.getIndexReader().numDocs(), aBacklink); } catch (Exception e) { throw new RuntimeException(e); } finally { searcherManager.release(theSearcher); } }
From source file:fr.univ_tours.etu.searcher.LikeThisTest.java
private void findSilimar(String searchForSimilar) throws IOException { IndexReader reader = DirectoryReader.open(indexDir); IndexSearcher indexSearcher = new IndexSearcher(reader); MoreLikeThis mlt = new MoreLikeThis(reader); mlt.setMinTermFreq(0); mlt.setMinDocFreq(0);/*from w w w. j a v a2s .co m*/ mlt.setFieldNames(new String[] { "title", "content" }); mlt.setAnalyzer(analyzer); Reader sReader = new StringReader(searchForSimilar); Query query = mlt.like("content", sReader); TopDocs topDocs = indexSearcher.search(query, 10); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { Document aSimilar = indexSearcher.doc(scoreDoc.doc); String similarTitle = aSimilar.get("title"); String similarContent = aSimilar.get("content"); System.out.println("====similar finded===="); System.out.println("title: " + similarTitle); System.out.println("content: " + similarContent); } }
From source file:fr.univ_tours.etu.searcher.Searcher.java
public List<ResultObject> search(SearchQueriesRequest query) throws IOException, ParseException { Map<String, String> queriesDictionary = query.getQueriesDictionary(); boolean useQueryExpansion = query.isUseQueryExpansion(); List<Integer> docsToExpand = (useQueryExpansion) ? new ArrayList<>() : null; List<String> fsa = new ArrayList<>(); List<String> qsa = new ArrayList<>(); String contentLemmas = ""; if (queriesDictionary.containsKey(DocFields.CONTENTS)) { regularTokenizer.tokenize(queriesDictionary.get(DocFields.CONTENTS), true); caselessTokenizer.tokenize(queriesDictionary.get(DocFields.CONTENTS), true); contentLemmas = caselessTokenizer.getLemmaString(); System.out.println("Lemmas: " + caselessTokenizer.getLemmaList()); String neString = ""; if (caselessTokenizer.getNeList() != null && caselessTokenizer.getNeList().size() != 0) { neString = caselessTokenizer.getNeString(";", true); System.out.println("NE caseless: " + neString); }/*www . j a va 2 s . com*/ if (regularTokenizer.getNeList() != null && regularTokenizer.getNeList().size() != 0) { neString += ";" + regularTokenizer.getNeString(";", true); System.out.println("NE all: " + neString); } if (!"".equals(neString)) { fsa.add(DocFields.NAMED_ENTITIES); qsa.add(neString); } } for (Map.Entry<String, String> entry : queriesDictionary.entrySet()) { fsa.add(entry.getKey()); if (entry.getKey().equals(DocFields.CONTENTS) || entry.getKey().equals(DocFields.SYNONYMS)) { qsa.add(contentLemmas); } else { qsa.add(entry.getValue()); } } Query q = MultiFieldQueryParser.parse(qsa.toArray(new String[qsa.size()]), fsa.toArray(new String[fsa.size()]), analyzer); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(q, this.numRetrievedDocs); ScoreDoc[] hits = docs.scoreDocs; List<ResultObject> resultObjects = new ArrayList<>(); String result = ""; for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; if (useQueryExpansion) { docsToExpand.add(docId); } Document d = searcher.doc(docId); resultObjects.add(new ResultObject(docId, i, d.get(DocFields.TITLE), d.get(DocFields.AUTHOR), d.get(DocFields.FILE_PATH), d.get(DocFields.SUMMARY), d.get(DocFields.FILE_NAME))); result = d.get(DocFields.SUMMARY); } if (useQueryExpansion) { reader.close(); this.reader = DirectoryReader.open(FSDirectory.open(new File(this.indexDir).toPath())); searcher = new IndexSearcher(reader); MoreLikeThis mlt = new MoreLikeThis(reader); mlt.setMinTermFreq(0); mlt.setMinDocFreq(0); mlt.setAnalyzer(analyzer); for (int i = 0; i < Math.min(docsToExpand.size(), 5); i++) { Reader r = new StringReader(resultObjects.get(i).getSummary()); Query expandedQuery = mlt.like(DocFields.CONTENTS, r); TopDocs topDocs = searcher.search(expandedQuery, 5); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { if (!docsToExpand.contains(scoreDoc.doc)) { docsToExpand.add(scoreDoc.doc); Document aSimilar = searcher.doc(scoreDoc.doc); resultObjects.add(new ResultObject(1, resultObjects.size(), aSimilar.get(DocFields.TITLE), aSimilar.get(DocFields.AUTHOR), aSimilar.get(DocFields.FILE_PATH), aSimilar.get(DocFields.SUMMARY), aSimilar.get(DocFields.FILE_NAME))); } else { } } } } return resultObjects; }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.util.MoreLikeThisHelper.java
License:Apache License
public static Query getMoreLikeThis(IndexReader reader, Analyzer analyzer, String mltQueryString) { Query moreLikeThisQuery = null; MoreLikeThis mlt = new MoreLikeThis(reader); mlt.setAnalyzer(analyzer);/*from w ww . j a va 2s. c om*/ try { String text = null; String[] fields = {}; for (String param : mltQueryString.split("&")) { String[] keyValuePair = param.split("="); if (keyValuePair.length != 2 || keyValuePair[0] == null || keyValuePair[1] == null) { throw new RuntimeException("Unparsable native Lucene MLT query: " + mltQueryString); } else { if ("stream.body".equals(keyValuePair[0])) { text = keyValuePair[1]; } else if ("mlt.fl".equals(keyValuePair[0])) { fields = keyValuePair[1].split(","); } else if ("mlt.mindf".equals(keyValuePair[0])) { mlt.setMinDocFreq(Integer.parseInt(keyValuePair[1])); } else if ("mlt.mintf".equals(keyValuePair[0])) { mlt.setMinTermFreq(Integer.parseInt(keyValuePair[1])); } else if ("mlt.boost".equals(keyValuePair[0])) { mlt.setBoost(Boolean.parseBoolean(keyValuePair[1])); } else if ("mlt.qf".equals(keyValuePair[0])) { mlt.setBoostFactor(Float.parseFloat(keyValuePair[1])); } else if ("mlt.maxdf".equals(keyValuePair[0])) { mlt.setMaxDocFreq(Integer.parseInt(keyValuePair[1])); } else if ("mlt.maxdfp".equals(keyValuePair[0])) { mlt.setMaxDocFreqPct(Integer.parseInt(keyValuePair[1])); } else if ("mlt.maxntp".equals(keyValuePair[0])) { mlt.setMaxNumTokensParsed(Integer.parseInt(keyValuePair[1])); } else if ("mlt.maxqt".equals(keyValuePair[0])) { mlt.setMaxQueryTerms(Integer.parseInt(keyValuePair[1])); } else if ("mlt.maxwl".equals(keyValuePair[0])) { mlt.setMaxWordLen(Integer.parseInt(keyValuePair[1])); } else if ("mlt.minwl".equals(keyValuePair[0])) { mlt.setMinWordLen(Integer.parseInt(keyValuePair[1])); } } } if (text != null) { if (FieldNames.PATH.equals(fields[0])) { IndexSearcher searcher = new IndexSearcher(reader); TermQuery q = new TermQuery(new Term(FieldNames.PATH, text)); TopDocs top = searcher.search(q, 1); if (top.totalHits == 0) { mlt.setFieldNames(fields); moreLikeThisQuery = mlt.like(new StringReader(text), mlt.getFieldNames()[0]); } else { ScoreDoc d = top.scoreDocs[0]; Document doc = reader.document(d.doc); List<String> fieldNames = new ArrayList<String>(); for (IndexableField f : doc.getFields()) { if (!FieldNames.PATH.equals(f.name())) { fieldNames.add(f.name()); } } String[] docFields = fieldNames.toArray(new String[fieldNames.size()]); mlt.setFieldNames(docFields); moreLikeThisQuery = mlt.like(d.doc); } } else { mlt.setFieldNames(fields); moreLikeThisQuery = mlt.like(new StringReader(text), mlt.getFieldNames()[0]); } } return moreLikeThisQuery; } catch (Exception e) { throw new RuntimeException("could not handle MLT query " + mltQueryString); } }
From source file:org.apache.solr.handler.RedbubbleMoreLikeThisHandler.java
License:Apache License
private void setMLTparams(SolrParams params, String[] similarityFields, MoreLikeThis mlt) { mlt.setMinTermFreq(params.getInt(MoreLikeThisParams.MIN_TERM_FREQ, MoreLikeThis.DEFAULT_MIN_TERM_FREQ)); mlt.setMinDocFreq(params.getInt(MoreLikeThisParams.MIN_DOC_FREQ, MoreLikeThis.DEFAULT_MIN_DOC_FREQ)); mlt.setMaxDocFreq(params.getInt(MoreLikeThisParams.MAX_DOC_FREQ, MoreLikeThis.DEFAULT_MAX_DOC_FREQ)); mlt.setMinWordLen(params.getInt(MoreLikeThisParams.MIN_WORD_LEN, MoreLikeThis.DEFAULT_MIN_WORD_LENGTH)); mlt.setMaxWordLen(params.getInt(MoreLikeThisParams.MAX_WORD_LEN, MoreLikeThis.DEFAULT_MAX_WORD_LENGTH)); mlt.setMaxQueryTerms(/*from ww w.j a va 2s. c o m*/ params.getInt(MoreLikeThisParams.MAX_QUERY_TERMS, MoreLikeThis.DEFAULT_MAX_QUERY_TERMS)); mlt.setMaxNumTokensParsed(params.getInt(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED, MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED)); mlt.setBoost(params.getBool(MoreLikeThisParams.BOOST, false)); mlt.setFieldNames(similarityFields); }
From source file:org.apache.solr.search.mlt.CloudMLTQParser.java
License:Apache License
public Query parse() { String id = localParams.get(QueryParsing.V); // Do a Real Time Get for the document SolrDocument doc = getDocument(id);//from w w w . jav a 2 s . c o m MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader()); // TODO: Are the mintf and mindf defaults ok at 1/0 ? mlt.setMinTermFreq(localParams.getInt("mintf", 1)); mlt.setMinDocFreq(localParams.getInt("mindf", 0)); if (localParams.get("minwl") != null) mlt.setMinWordLen(localParams.getInt("minwl")); if (localParams.get("maxwl") != null) mlt.setMaxWordLen(localParams.getInt("maxwl")); mlt.setAnalyzer(req.getSchema().getIndexAnalyzer()); String[] qf = localParams.getParams("qf"); Map<String, Collection<Object>> filteredDocument = new HashMap(); if (qf != null) { mlt.setFieldNames(qf); for (String field : qf) { filteredDocument.put(field, doc.getFieldValues(field)); } } else { Map<String, SchemaField> fields = req.getSchema().getFields(); ArrayList<String> fieldNames = new ArrayList(); for (String field : doc.getFieldNames()) { // Only use fields that are stored and have an explicit analyzer. // This makes sense as the query uses tf/idf/.. for query construction. // We might want to relook and change this in the future though. if (fields.get(field).stored() && fields.get(field).getType().isExplicitAnalyzer()) { fieldNames.add(field); filteredDocument.put(field, doc.getFieldValues(field)); } } mlt.setFieldNames(fieldNames.toArray(new String[fieldNames.size()])); } try { return mlt.like(filteredDocument); } catch (IOException e) { e.printStackTrace(); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad Request"); } }
From source file:org.apache.solr.search.mlt.SimpleMLTQParser.java
License:Apache License
public Query parse() { String defaultField = req.getSchema().getUniqueKeyField().getName(); String uniqueValue = localParams.get(QueryParsing.V); String[] qf = localParams.getParams("qf"); SolrIndexSearcher searcher = req.getSearcher(); Query docIdQuery = createIdQuery(defaultField, uniqueValue); try {/*w ww . java2 s . co m*/ TopDocs td = searcher.search(docIdQuery, 1); if (td.totalHits != 1) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + uniqueValue + "]"); ScoreDoc[] scoreDocs = td.scoreDocs; MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader()); // TODO: Are the mintf and mindf defaults ok at '1' ? mlt.setMinTermFreq(localParams.getInt("mintf", 1)); mlt.setMinDocFreq(localParams.getInt("mindf", 1)); if (localParams.get("minwl") != null) mlt.setMinWordLen(localParams.getInt("minwl")); if (localParams.get("maxwl") != null) mlt.setMaxWordLen(localParams.getInt("maxwl")); ArrayList<String> fields = new ArrayList(); if (qf != null) { mlt.setFieldNames(qf); } else { Map<String, SchemaField> fieldNames = req.getSearcher().getSchema().getFields(); for (String fieldName : fieldNames.keySet()) { if (fieldNames.get(fieldName).indexed() && fieldNames.get(fieldName).stored()) if (fieldNames.get(fieldName).getType().getNumericType() == null) fields.add(fieldName); } mlt.setFieldNames(fields.toArray(new String[fields.size()])); } mlt.setAnalyzer(req.getSchema().getIndexAnalyzer()); return mlt.like(scoreDocs[0].doc); } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request" + e.getMessage()); } }