List of usage examples for org.apache.lucene.queries.mlt MoreLikeThis setAnalyzer
public void setAnalyzer(Analyzer analyzer)
From source file:com.qwazr.search.query.MoreLikeThisQuery.java
License:Apache License
@Override final public Query getQuery(QueryContext queryContext) throws IOException, ParseException { Objects.requireNonNull(doc_num, "The doc_num field is missing"); final MoreLikeThis mlt = new MoreLikeThis(queryContext.indexSearcher.getIndexReader()); if (is_boost != null) mlt.setBoost(is_boost);//from ww w . j a va 2 s . com if (boost_factor != null) mlt.setBoostFactor(boost_factor); if (fieldnames != null) mlt.setFieldNames(fieldnames); if (max_doc_freq != null) mlt.setMaxDocFreq(max_doc_freq); if (max_doc_freq_pct != null) mlt.setMaxDocFreqPct(max_doc_freq_pct); if (max_num_tokens_parsed != null) mlt.setMaxNumTokensParsed(max_num_tokens_parsed); if (max_query_terms != null) mlt.setMaxQueryTerms(max_query_terms); if (max_word_len != null) mlt.setMaxWordLen(max_word_len); if (min_doc_freq != null) mlt.setMinDocFreq(min_doc_freq); if (min_term_freq != null) mlt.setMinTermFreq(min_term_freq); if (min_word_len != null) mlt.setMinWordLen(min_word_len); if (stop_words != null) mlt.setStopWords(stop_words); mlt.setAnalyzer(queryContext.analyzer); return mlt.like(doc_num); }
From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java
License:Open Source License
public QueryResult performQuery(String aQueryString, String aBacklink, String aBasePath, Configuration aConfiguration, Map<String, String> aDrilldownFields) throws IOException { searcherManager.maybeRefreshBlocking(); IndexSearcher theSearcher = searcherManager.acquire(); SortedSetDocValuesReaderState theSortedSetState = new DefaultSortedSetDocValuesReaderState( theSearcher.getIndexReader()); List<QueryResultDocument> theResultDocuments = new ArrayList<>(); long theStartTime = System.currentTimeMillis(); LOGGER.info("Querying for " + aQueryString); DateFormat theDateFormat = new SimpleDateFormat("dd.MMMM.yyyy", Locale.ENGLISH); try {//from w w w.ja v a2s .c om List<FacetDimension> theDimensions = new ArrayList<>(); // Search only if a search query is given if (!StringUtils.isEmpty(aQueryString)) { Query theQuery = computeBooleanQueryFor(aQueryString); LOGGER.info(" query is " + theQuery); theQuery = theQuery.rewrite(theSearcher.getIndexReader()); LOGGER.info(" rewritten query is " + theQuery); DrillDownQuery theDrilldownQuery = new DrillDownQuery(facetsConfig, theQuery); aDrilldownFields.entrySet().stream().forEach(aEntry -> { LOGGER.info(" with Drilldown " + aEntry.getKey() + " for " + aEntry.getValue()); theDrilldownQuery.add(aEntry.getKey(), aEntry.getValue()); }); FacetsCollector theFacetCollector = new FacetsCollector(); TopDocs theDocs = FacetsCollector.search(theSearcher, theDrilldownQuery, null, aConfiguration.getNumberOfSearchResults(), theFacetCollector); SortedSetDocValuesFacetCounts theFacetCounts = new SortedSetDocValuesFacetCounts(theSortedSetState, theFacetCollector); List<Facet> theAuthorFacets = new ArrayList<>(); List<Facet> theFileTypesFacets = new ArrayList<>(); List<Facet> theLastModifiedYearFacet = new ArrayList<>(); List<Facet> theLanguageFacet = new ArrayList<>(); LOGGER.info("Found " + theDocs.scoreDocs.length + " documents"); // We need this cache to detect duplicate documents while searching for similarities Set<Integer> theUniqueDocumentsFound = new HashSet<>(); Map<String, QueryResultDocument> theDocumentsByHash = new HashMap<>(); for (int i = 0; i < theDocs.scoreDocs.length; i++) { int theDocumentID = theDocs.scoreDocs[i].doc; theUniqueDocumentsFound.add(theDocumentID); Document theDocument = theSearcher.doc(theDocumentID); String theUniqueID = theDocument.getField(IndexFields.UNIQUEID).stringValue(); String theFoundFileName = theDocument.getField(IndexFields.FILENAME).stringValue(); String theHash = theDocument.getField(IndexFields.CONTENTMD5).stringValue(); QueryResultDocument theExistingDocument = theDocumentsByHash.get(theHash); if (theExistingDocument != null) { theExistingDocument.addFileName(theFoundFileName); } else { Date theLastModified = new Date( theDocument.getField(IndexFields.LASTMODIFIED).numericValue().longValue()); SupportedLanguage theLanguage = SupportedLanguage .valueOf(theDocument.getField(IndexFields.LANGUAGESTORED).stringValue()); String theFieldName; if (analyzerCache.supportsLanguage(theLanguage)) { theFieldName = analyzerCache.getFieldNameFor(theLanguage); } else { theFieldName = IndexFields.CONTENT; } String theOriginalContent = theDocument.getField(theFieldName).stringValue(); final Query theFinalQuery = theQuery; ForkJoinTask<String> theHighligherResult = executorPool.submit(() -> { StringBuilder theResult = new StringBuilder(theDateFormat.format(theLastModified)); theResult.append(" - "); Highlighter theHighlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(theFinalQuery)); for (String theFragment : theHighlighter.getBestFragments(analyzer, theFieldName, theOriginalContent, NUMBER_OF_FRAGMENTS)) { if (theResult.length() > 0) { theResult = theResult.append("..."); } theResult = theResult.append(theFragment); } return theResult.toString(); }); int theNormalizedScore = (int) (theDocs.scoreDocs[i].score / theDocs.getMaxScore() * 5); File theFileOnDisk = new File(theFoundFileName); if (theFileOnDisk.exists()) { boolean thePreviewAvailable = previewProcessor.previewAvailableFor(theFileOnDisk); theExistingDocument = new QueryResultDocument(theDocumentID, theFoundFileName, theHighligherResult, Long.parseLong(theDocument.getField(IndexFields.LASTMODIFIED).stringValue()), theNormalizedScore, theUniqueID, thePreviewAvailable); theDocumentsByHash.put(theHash, theExistingDocument); theResultDocuments.add(theExistingDocument); } } } if (aConfiguration.isShowSimilarDocuments()) { MoreLikeThis theMoreLikeThis = new MoreLikeThis(theSearcher.getIndexReader()); theMoreLikeThis.setAnalyzer(analyzer); theMoreLikeThis.setMinTermFreq(1); theMoreLikeThis.setMinDocFreq(1); theMoreLikeThis.setFieldNames(analyzerCache.getAllFieldNames()); for (QueryResultDocument theDocument : theResultDocuments) { Query theMoreLikeThisQuery = theMoreLikeThis.like(theDocument.getDocumentID()); TopDocs theMoreLikeThisTopDocs = theSearcher.search(theMoreLikeThisQuery, 5); for (ScoreDoc theMoreLikeThisScoreDoc : theMoreLikeThisTopDocs.scoreDocs) { int theSimilarDocument = theMoreLikeThisScoreDoc.doc; if (theUniqueDocumentsFound.add(theSimilarDocument)) { Document theMoreLikeThisDocument = theSearcher.doc(theSimilarDocument); String theFilename = theMoreLikeThisDocument.getField(IndexFields.FILENAME) .stringValue(); theDocument.addSimilarFile(theFilename); } } } } LOGGER.info("Got Dimensions"); for (FacetResult theResult : theFacetCounts.getAllDims(20000)) { String theDimension = theResult.dim; if ("author".equals(theDimension)) { for (LabelAndValue theLabelAndValue : theResult.labelValues) { if (!StringUtils.isEmpty(theLabelAndValue.label)) { theAuthorFacets.add(new Facet(theLabelAndValue.label, theLabelAndValue.value.intValue(), aBasePath + "/" + encode( FacetSearchUtils.encode(theDimension, theLabelAndValue.label)))); } } } if ("extension".equals(theDimension)) { for (LabelAndValue theLabelAndValue : theResult.labelValues) { if (!StringUtils.isEmpty(theLabelAndValue.label)) { theFileTypesFacets.add(new Facet(theLabelAndValue.label, theLabelAndValue.value.intValue(), aBasePath + "/" + encode( FacetSearchUtils.encode(theDimension, theLabelAndValue.label)))); } } } if ("last-modified-year".equals(theDimension)) { for (LabelAndValue theLabelAndValue : theResult.labelValues) { if (!StringUtils.isEmpty(theLabelAndValue.label)) { theLastModifiedYearFacet.add(new Facet(theLabelAndValue.label, theLabelAndValue.value.intValue(), aBasePath + "/" + encode( FacetSearchUtils.encode(theDimension, theLabelAndValue.label)))); } } } if (IndexFields.LANGUAGEFACET.equals(theDimension)) { for (LabelAndValue theLabelAndValue : theResult.labelValues) { if (!StringUtils.isEmpty(theLabelAndValue.label)) { Locale theLocale = new Locale(theLabelAndValue.label); theLanguageFacet.add(new Facet(theLocale.getDisplayLanguage(Locale.ENGLISH), theLabelAndValue.value.intValue(), aBasePath + "/" + encode( FacetSearchUtils.encode(theDimension, theLabelAndValue.label)))); } } } LOGGER.info(" " + theDimension); } if (!theAuthorFacets.isEmpty()) { theDimensions.add(new FacetDimension("Author", theAuthorFacets)); } if (!theLastModifiedYearFacet.isEmpty()) { theDimensions.add(new FacetDimension("Last modified", theLastModifiedYearFacet)); } if (!theFileTypesFacets.isEmpty()) { theDimensions.add(new FacetDimension("File types", theFileTypesFacets)); } if (!theLanguageFacet.isEmpty()) { theDimensions.add(new FacetDimension("Language", theLanguageFacet)); } // Wait for all Tasks to complete for the search result highlighter ForkJoinTask.helpQuiesce(); } long theDuration = System.currentTimeMillis() - theStartTime; LOGGER.info("Total amount of time : " + theDuration + "ms"); return new QueryResult(System.currentTimeMillis() - theStartTime, theResultDocuments, theDimensions, theSearcher.getIndexReader().numDocs(), aBacklink); } catch (Exception e) { throw new RuntimeException(e); } finally { searcherManager.release(theSearcher); } }
From source file:fr.univ_tours.etu.searcher.LikeThisTest.java
private void findSilimar(String searchForSimilar) throws IOException { IndexReader reader = DirectoryReader.open(indexDir); IndexSearcher indexSearcher = new IndexSearcher(reader); MoreLikeThis mlt = new MoreLikeThis(reader); mlt.setMinTermFreq(0);/*from w w w .j a v a 2 s . c o m*/ mlt.setMinDocFreq(0); mlt.setFieldNames(new String[] { "title", "content" }); mlt.setAnalyzer(analyzer); Reader sReader = new StringReader(searchForSimilar); Query query = mlt.like("content", sReader); TopDocs topDocs = indexSearcher.search(query, 10); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { Document aSimilar = indexSearcher.doc(scoreDoc.doc); String similarTitle = aSimilar.get("title"); String similarContent = aSimilar.get("content"); System.out.println("====similar finded===="); System.out.println("title: " + similarTitle); System.out.println("content: " + similarContent); } }
From source file:fr.univ_tours.etu.searcher.Searcher.java
public List<ResultObject> search(SearchQueriesRequest query) throws IOException, ParseException { Map<String, String> queriesDictionary = query.getQueriesDictionary(); boolean useQueryExpansion = query.isUseQueryExpansion(); List<Integer> docsToExpand = (useQueryExpansion) ? new ArrayList<>() : null; List<String> fsa = new ArrayList<>(); List<String> qsa = new ArrayList<>(); String contentLemmas = ""; if (queriesDictionary.containsKey(DocFields.CONTENTS)) { regularTokenizer.tokenize(queriesDictionary.get(DocFields.CONTENTS), true); caselessTokenizer.tokenize(queriesDictionary.get(DocFields.CONTENTS), true); contentLemmas = caselessTokenizer.getLemmaString(); System.out.println("Lemmas: " + caselessTokenizer.getLemmaList()); String neString = ""; if (caselessTokenizer.getNeList() != null && caselessTokenizer.getNeList().size() != 0) { neString = caselessTokenizer.getNeString(";", true); System.out.println("NE caseless: " + neString); }/*www.j a v a 2 s.c o m*/ if (regularTokenizer.getNeList() != null && regularTokenizer.getNeList().size() != 0) { neString += ";" + regularTokenizer.getNeString(";", true); System.out.println("NE all: " + neString); } if (!"".equals(neString)) { fsa.add(DocFields.NAMED_ENTITIES); qsa.add(neString); } } for (Map.Entry<String, String> entry : queriesDictionary.entrySet()) { fsa.add(entry.getKey()); if (entry.getKey().equals(DocFields.CONTENTS) || entry.getKey().equals(DocFields.SYNONYMS)) { qsa.add(contentLemmas); } else { qsa.add(entry.getValue()); } } Query q = MultiFieldQueryParser.parse(qsa.toArray(new String[qsa.size()]), fsa.toArray(new String[fsa.size()]), analyzer); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(q, this.numRetrievedDocs); ScoreDoc[] hits = docs.scoreDocs; List<ResultObject> resultObjects = new ArrayList<>(); String result = ""; for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; if (useQueryExpansion) { docsToExpand.add(docId); } Document d = searcher.doc(docId); resultObjects.add(new ResultObject(docId, i, d.get(DocFields.TITLE), d.get(DocFields.AUTHOR), d.get(DocFields.FILE_PATH), d.get(DocFields.SUMMARY), d.get(DocFields.FILE_NAME))); result = d.get(DocFields.SUMMARY); } if (useQueryExpansion) { reader.close(); this.reader = DirectoryReader.open(FSDirectory.open(new File(this.indexDir).toPath())); searcher = new IndexSearcher(reader); MoreLikeThis mlt = new MoreLikeThis(reader); mlt.setMinTermFreq(0); mlt.setMinDocFreq(0); mlt.setAnalyzer(analyzer); for (int i = 0; i < Math.min(docsToExpand.size(), 5); i++) { Reader r = new StringReader(resultObjects.get(i).getSummary()); Query expandedQuery = mlt.like(DocFields.CONTENTS, r); TopDocs topDocs = searcher.search(expandedQuery, 5); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { if (!docsToExpand.contains(scoreDoc.doc)) { docsToExpand.add(scoreDoc.doc); Document aSimilar = searcher.doc(scoreDoc.doc); resultObjects.add(new ResultObject(1, resultObjects.size(), aSimilar.get(DocFields.TITLE), aSimilar.get(DocFields.AUTHOR), aSimilar.get(DocFields.FILE_PATH), aSimilar.get(DocFields.SUMMARY), aSimilar.get(DocFields.FILE_NAME))); } else { } } } } return resultObjects; }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.util.MoreLikeThisHelper.java
License:Apache License
public static Query getMoreLikeThis(IndexReader reader, Analyzer analyzer, String mltQueryString) { Query moreLikeThisQuery = null; MoreLikeThis mlt = new MoreLikeThis(reader); mlt.setAnalyzer(analyzer); try {/*from w w w . ja v a2 s.c o m*/ String text = null; String[] fields = {}; for (String param : mltQueryString.split("&")) { String[] keyValuePair = param.split("="); if (keyValuePair.length != 2 || keyValuePair[0] == null || keyValuePair[1] == null) { throw new RuntimeException("Unparsable native Lucene MLT query: " + mltQueryString); } else { if ("stream.body".equals(keyValuePair[0])) { text = keyValuePair[1]; } else if ("mlt.fl".equals(keyValuePair[0])) { fields = keyValuePair[1].split(","); } else if ("mlt.mindf".equals(keyValuePair[0])) { mlt.setMinDocFreq(Integer.parseInt(keyValuePair[1])); } else if ("mlt.mintf".equals(keyValuePair[0])) { mlt.setMinTermFreq(Integer.parseInt(keyValuePair[1])); } else if ("mlt.boost".equals(keyValuePair[0])) { mlt.setBoost(Boolean.parseBoolean(keyValuePair[1])); } else if ("mlt.qf".equals(keyValuePair[0])) { mlt.setBoostFactor(Float.parseFloat(keyValuePair[1])); } else if ("mlt.maxdf".equals(keyValuePair[0])) { mlt.setMaxDocFreq(Integer.parseInt(keyValuePair[1])); } else if ("mlt.maxdfp".equals(keyValuePair[0])) { mlt.setMaxDocFreqPct(Integer.parseInt(keyValuePair[1])); } else if ("mlt.maxntp".equals(keyValuePair[0])) { mlt.setMaxNumTokensParsed(Integer.parseInt(keyValuePair[1])); } else if ("mlt.maxqt".equals(keyValuePair[0])) { mlt.setMaxQueryTerms(Integer.parseInt(keyValuePair[1])); } else if ("mlt.maxwl".equals(keyValuePair[0])) { mlt.setMaxWordLen(Integer.parseInt(keyValuePair[1])); } else if ("mlt.minwl".equals(keyValuePair[0])) { mlt.setMinWordLen(Integer.parseInt(keyValuePair[1])); } } } if (text != null) { if (FieldNames.PATH.equals(fields[0])) { IndexSearcher searcher = new IndexSearcher(reader); TermQuery q = new TermQuery(new Term(FieldNames.PATH, text)); TopDocs top = searcher.search(q, 1); if (top.totalHits == 0) { mlt.setFieldNames(fields); moreLikeThisQuery = mlt.like(new StringReader(text), mlt.getFieldNames()[0]); } else { ScoreDoc d = top.scoreDocs[0]; Document doc = reader.document(d.doc); List<String> fieldNames = new ArrayList<String>(); for (IndexableField f : doc.getFields()) { if (!FieldNames.PATH.equals(f.name())) { fieldNames.add(f.name()); } } String[] docFields = fieldNames.toArray(new String[fieldNames.size()]); mlt.setFieldNames(docFields); moreLikeThisQuery = mlt.like(d.doc); } } else { mlt.setFieldNames(fields); moreLikeThisQuery = mlt.like(new StringReader(text), mlt.getFieldNames()[0]); } } return moreLikeThisQuery; } catch (Exception e) { throw new RuntimeException("could not handle MLT query " + mltQueryString); } }
From source file:org.apache.solr.search.mlt.CloudMLTQParser.java
License:Apache License
public Query parse() { String id = localParams.get(QueryParsing.V); // Do a Real Time Get for the document SolrDocument doc = getDocument(id);/*from w w w. j a v a 2 s. c om*/ MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader()); // TODO: Are the mintf and mindf defaults ok at 1/0 ? mlt.setMinTermFreq(localParams.getInt("mintf", 1)); mlt.setMinDocFreq(localParams.getInt("mindf", 0)); if (localParams.get("minwl") != null) mlt.setMinWordLen(localParams.getInt("minwl")); if (localParams.get("maxwl") != null) mlt.setMaxWordLen(localParams.getInt("maxwl")); mlt.setAnalyzer(req.getSchema().getIndexAnalyzer()); String[] qf = localParams.getParams("qf"); Map<String, Collection<Object>> filteredDocument = new HashMap(); if (qf != null) { mlt.setFieldNames(qf); for (String field : qf) { filteredDocument.put(field, doc.getFieldValues(field)); } } else { Map<String, SchemaField> fields = req.getSchema().getFields(); ArrayList<String> fieldNames = new ArrayList(); for (String field : doc.getFieldNames()) { // Only use fields that are stored and have an explicit analyzer. // This makes sense as the query uses tf/idf/.. for query construction. // We might want to relook and change this in the future though. if (fields.get(field).stored() && fields.get(field).getType().isExplicitAnalyzer()) { fieldNames.add(field); filteredDocument.put(field, doc.getFieldValues(field)); } } mlt.setFieldNames(fieldNames.toArray(new String[fieldNames.size()])); } try { return mlt.like(filteredDocument); } catch (IOException e) { e.printStackTrace(); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad Request"); } }
From source file:org.apache.solr.search.mlt.SimpleMLTQParser.java
License:Apache License
public Query parse() { String defaultField = req.getSchema().getUniqueKeyField().getName(); String uniqueValue = localParams.get(QueryParsing.V); String[] qf = localParams.getParams("qf"); SolrIndexSearcher searcher = req.getSearcher(); Query docIdQuery = createIdQuery(defaultField, uniqueValue); try {// w w w .j a v a 2 s . co m TopDocs td = searcher.search(docIdQuery, 1); if (td.totalHits != 1) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + uniqueValue + "]"); ScoreDoc[] scoreDocs = td.scoreDocs; MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader()); // TODO: Are the mintf and mindf defaults ok at '1' ? mlt.setMinTermFreq(localParams.getInt("mintf", 1)); mlt.setMinDocFreq(localParams.getInt("mindf", 1)); if (localParams.get("minwl") != null) mlt.setMinWordLen(localParams.getInt("minwl")); if (localParams.get("maxwl") != null) mlt.setMaxWordLen(localParams.getInt("maxwl")); ArrayList<String> fields = new ArrayList(); if (qf != null) { mlt.setFieldNames(qf); } else { Map<String, SchemaField> fieldNames = req.getSearcher().getSchema().getFields(); for (String fieldName : fieldNames.keySet()) { if (fieldNames.get(fieldName).indexed() && fieldNames.get(fieldName).stored()) if (fieldNames.get(fieldName).getType().getNumericType() == null) fields.add(fieldName); } mlt.setFieldNames(fields.toArray(new String[fields.size()])); } mlt.setAnalyzer(req.getSchema().getIndexAnalyzer()); return mlt.like(scoreDocs[0].doc); } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request" + e.getMessage()); } }
From source file:org.elasticsearch.common.lucene.search.morelikethis.XMoreLikeThisTests.java
License:Apache License
@Test public void testTopN() throws Exception { int numDocs = 100; int topN = 25; // add series of docs with terms of decreasing df Directory dir = newDirectory();//from w w w .j ava 2s .co m RandomIndexWriter writer = new RandomIndexWriter(random(), dir); for (int i = 0; i < numDocs; i++) { addDoc(writer, generateStrSeq(0, i + 1)); } IndexReader reader = writer.getReader(); writer.close(); // setup MLT query MoreLikeThis mlt = new MoreLikeThis(reader); mlt.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)); mlt.setMaxQueryTerms(topN); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] { "text" }); // perform MLT query String likeText = ""; for (String text : generateStrSeq(0, numDocs)) { likeText += text + " "; } BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(likeText)); // check best terms are topN of highest idf List<BooleanClause> clauses = query.clauses(); assertEquals("Expected" + topN + "clauses only!", topN, clauses.size()); Term[] expectedTerms = new Term[topN]; int idx = 0; for (String text : generateStrSeq(numDocs - topN, topN)) { expectedTerms[idx++] = new Term("text", text); } for (BooleanClause clause : clauses) { Term term = ((TermQuery) clause.getQuery()).getTerm(); assertTrue(Arrays.asList(expectedTerms).contains(term)); } // clean up reader.close(); dir.close(); }
From source file:org.elasticsearch.common.lucene.search.MoreLikeThisQuery.java
License:Apache License
@Override public Query rewrite(IndexReader reader) throws IOException { MoreLikeThis mlt = new MoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity); mlt.setFieldNames(moreLikeFields);//from ww w . j a va2 s . com mlt.setAnalyzer(analyzer); mlt.setMinTermFreq(minTermFrequency); mlt.setMinDocFreq(minDocFreq); mlt.setMaxDocFreq(maxDocFreq); mlt.setMaxQueryTerms(maxQueryTerms); mlt.setMinWordLen(minWordLen); mlt.setMaxWordLen(maxWordLen); mlt.setStopWords(stopWords); mlt.setBoost(boostTerms); mlt.setBoostFactor(boostTermsFactor); //LUCENE 4 UPGRADE this mapps the 3.6 behavior (only use the first field) BooleanQuery bq = (BooleanQuery) mlt.like(new FastStringReader(likeText), moreLikeFields[0]); BooleanClause[] clauses = bq.getClauses(); bq.setMinimumNumberShouldMatch((int) (clauses.length * percentTermsToMatch)); bq.setBoost(getBoost()); return bq; }
From source file:org.eu.bitzone.Leia.java
License:Apache License
/** More Like this query from the current doc (or selected fields) */ public void actionMLT(final Object docNum, final Object docTable) { if (ir == null) { errorMsg(MSG_NOINDEX);/*from w ww . ja va2s. co m*/ return; } int id = 0; try { id = Integer.parseInt(getString(docNum, "text")); } catch (final NumberFormatException nfe) { errorMsg("Invalid document number"); return; } final MoreLikeThis mlt = new MoreLikeThis(ir); try { mlt.setFieldNames(Util.fieldNames(ir, true).toArray(new String[0])); } catch (final Exception e) { errorMsg("Exception collecting field names: " + e.toString()); return; } mlt.setMinTermFreq(1); mlt.setMaxQueryTerms(50); final Analyzer a = createAnalyzer(find("srchOptTabs")); if (a == null) { return; } mlt.setAnalyzer(a); final Object[] rows = getSelectedItems(docTable); BooleanQuery similar = null; if (rows != null && rows.length > 0) { // collect text from fields final StringBuilder sb = new StringBuilder(); for (int i = 0; i < rows.length; i++) { final Field f = (Field) getProperty(rows[i], "field"); if (f == null) { continue; } final String s = f.stringValue(); if (s == null || s.trim().length() == 0) { continue; } if (sb.length() > 0) { sb.append(" "); } sb.append(s); } try { similar = (BooleanQuery) mlt.like(new StringReader(sb.toString()), "field"); } catch (final Exception e) { e.printStackTrace(); errorMsg("FAILED: " + e.getMessage()); return; } } else { try { similar = (BooleanQuery) mlt.like(id); } catch (final Exception e) { e.printStackTrace(); errorMsg("FAILED: " + e.getMessage()); return; } } if (similar.clauses() != null && similar.clauses().size() > 0) { // System.err.println("SIMILAR: " + similar); final Object tabpane = find("maintpane"); setInteger(tabpane, "selected", 2); final Object qField = find("qField"); setString(qField, "text", similar.toString()); } else { showStatus("WARN: empty query - check Analyzer settings"); } }