Java tutorial
// ============================================================================ // // Copyright (C) 2006-2015 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dataquality.standardization.index; import java.io.File; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * The similarity scoring formula of Lucene: * * (t in q) (tf(t in d) * idf(t) * boost(t.field in d) * lengthNorm(t.field in d )) * coord(q,d) * queryNorm(q) * * in which we have the following factors: * * -tf: factor of term frequency in document. -idf: factor of documents with term in index boost: field-level boost * coord: factor-based # of query terms in document queryNorm: normalization for query weights * * The purpose of this formula is to measure the similarity between a query and each document that matches the query. * The score is computed for each document (d) matching each term (t) in a query (q). * * The scoring formula seems dauntingand it is. Were talking about factors that rank one document higher than another * based on a query; that in and of itself deserves the sophistication going on. If you want to see how all these * factors play out, Lucene provides a helpful feature called Explanation. IndexSearcher has an explain method, which * requires a Query and a document ID and returns an Explanation object. */ public class Explainer { public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: Explainer <index dir> <query>"); System.exit(1); } String indexDir = args[0]; String queryExpression = args[1]; Directory directory = FSDirectory.open(new File(indexDir)); QueryParser parser = new QueryParser(Version.LUCENE_30, "contents", new SimpleAnalyzer()); Query query = parser.parse(queryExpression); System.out.println("Query: " + queryExpression); IndexSearcher searcher = new IndexSearcher(directory); TopDocs topDocs = searcher.search(query, 10); for (ScoreDoc match : topDocs.scoreDocs) { Explanation explanation = searcher.explain(query, match.doc); System.out.println("----------"); Document doc = searcher.doc(match.doc); System.out.println(doc.get("title")); System.out.println(explanation.toString()); } searcher.close(); directory.close(); } }