List of usage examples for org.apache.lucene.search IndexSearcher setSimilarity
public void setSimilarity(Similarity similarity)
From source file:IrqaQuery.java
License:Apache License
public static List<Document> query(String index, String stoppath, String question, int numResult, String sim) throws Exception { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stoppath))); if (sim.equals("TFIDF")) searcher.setSimilarity(new ClassicSimilarity()); else if (sim.equals("BM25")) searcher.setSimilarity(new BM25Similarity()); else//from w ww .ja va 2s . c o m searcher.setSimilarity(new BM25Similarity()); String field = "contents"; QueryParser parser = new QueryParser(field, analyzer); Query query = parser.parse(parser.escape(question)); TopDocs results = searcher.search(query, numResult); ScoreDoc[] hits = results.scoreDocs; List<Document> docs = new ArrayList<Document>(); int numTotalHits = results.totalHits; // System.out.println(numTotalHits + " total matching documents"); int end = Math.min(numTotalHits, numResult); String searchResult = ""; // System.out.println("Only results 1 - " + hits.length); for (int i = 0; i < end; i++) { Document doc = searcher.doc(hits[i].doc); docs.add(doc); } return docs; }
From source file:luceneInterface.java
License:Apache License
public static List<Document> query(String index, String stoppath, String question, int numResult, String sim) throws Exception { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stoppath))); if (sim.equals("TFIDF")) searcher.setSimilarity(new ClassicSimilarity()); else if (sim.equals("BM25")) searcher.setSimilarity(new BM25Similarity()); else/*from w ww. j av a 2 s. com*/ searcher.setSimilarity(new BM25Similarity()); String field = "contents"; QueryParser parser = new QueryParser(field, analyzer); Query query = parser.parse(parser.escape(question)); BooleanQuery.Builder bqb = new BooleanQuery.Builder(); bqb.add(new TermQuery(new Term("contents", parser.escape(question))), BooleanClause.Occur.SHOULD); bqb.add(new TermQuery(new Term("sec", parser.escape(question))), BooleanClause.Occur.SHOULD); // Term term = new Term(field, question); // Query query = new TermQuery(term); // TopDocs results = searcher.search(query, numResult); TopDocs results = searcher.search(parser.parse(bqb.build().toString()), numResult); ScoreDoc[] hits = results.scoreDocs; List<Document> docs = new ArrayList<Document>(); int numTotalHits = results.totalHits; // System.out.println(numTotalHits + " total matching documents"); int end = Math.min(numTotalHits, numResult); String searchResult = ""; // System.out.println("Only results 1 - " + hits.length); for (int i = 0; i < end; i++) { Document doc = searcher.doc(hits[i].doc); docs.add(doc); } return docs; }
From source file:aos.lucene.search.ext.payloads.PayloadsTest.java
License:Apache License
public void testPayloadTermQuery() throws Throwable { addDoc("Hurricane warning", "Bulletin: A hurricane warning was issued at " + "6 AM for the outer great banks"); addDoc("Warning label maker", "The warning label maker is a delightful toy for " + "your precocious seven year old's warning needs"); addDoc("Tornado warning", "Bulletin: There is a tornado warning for " + "Worcester county until 6 PM today"); IndexReader r = writer.getReader();// ww w .ja v a 2s. c om writer.close(); IndexSearcher searcher = new IndexSearcher(r); searcher.setSimilarity(new BoostingSimilarity()); Term warning = new Term("contents", "warning"); Query query1 = new TermQuery(warning); LOGGER.info("\nTermQuery results:"); TopDocs hits = searcher.search(query1, 10); TestUtil.dumpHits(searcher, hits); assertEquals("Warning label maker", // #B searcher.doc(hits.scoreDocs[0].doc).get("title")); // #B Query query2 = new PayloadTermQuery(warning, new AveragePayloadFunction()); LOGGER.info("\nPayloadTermQuery results:"); hits = searcher.search(query2, 10); TestUtil.dumpHits(searcher, hits); assertEquals("Warning label maker", // #C searcher.doc(hits.scoreDocs[2].doc).get("title")); // #C r.close(); searcher.close(); }
From source file:aos.lucene.search.msc.ScoreTest.java
License:Apache License
public void testSimple() throws Exception { indexSingleFieldDocs(new Field[] { new Field("contents", "x", Field.Store.YES, Field.Index.ANALYZED) }); IndexSearcher searcher = new IndexSearcher(directory); searcher.setSimilarity(new SimpleSimilarity()); Query query = new TermQuery(new Term("contents", "x")); Explanation explanation = searcher.explain(query, 0); LOGGER.info(explanation);// w ww . jav a 2s . co m TopDocs matches = searcher.search(query, 10); assertEquals(1, matches.totalHits); assertEquals(1F, matches.scoreDocs[0].score, 0.0); searcher.close(); }
From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java
License:Open Source License
private void testQuery(IndexReader ir, Graph query, IndexSearcher is) throws IOException, ParseException { // create results from mcs: LinkedList<ResultHolder> resultsMcs = new LinkedList<ResultHolder>(); for (int j = 0; j < ir.numDocs(); j++) { Graph model = new Graph(ir.document(j).getValues("graph")[0]); float mcsSimilarity = query.getMcsSimilarity(model); resultsMcs.add(new ResultHolder(j, model.toString(), mcsSimilarity)); }/* w w w . j a va2 s . co m*/ Collections.sort(resultsMcs); // for (Iterator<ResultHolder> iterator = resultsMcs.iterator(); iterator.hasNext();) { // ResultHolder r = iterator.next(); // System.out.println(r.getDocumentNumber() + ": " + r.getSimilarity()); // } // create results from search: // set to another similarity if necessary: is.setSimilarity(new TermFrequencySimilarity()); // is.setSimilarity(new SimpleTfIdfSimilarity()); LucenePathIndexRetrievalEngine engine = new LucenePathIndexRetrievalEngine(50); String gQuery = LucenePathIndexRetrievalEngine.createLucenePathQuery(query); // System.out.println(query); QueryParser qParse = new QueryParser("paths", new WhitespaceAnalyzer()); Query q = qParse.parse(gQuery); Hits hits = is.search(q); LinkedList<ResultHolder> resultsSearch = new LinkedList<ResultHolder>(); for (int i = 0; i < hits.length(); i++) { String graph = hits.doc(i).getValues("graph")[0]; int docID = -1; for (int j = 0; j < ir.numDocs(); j++) { Graph model = new Graph(ir.document(j).getValues("graph")[0]); if (model.toString().equals(graph)) docID = j; } resultsSearch.add(new ResultHolder(docID, graph, hits.score(i))); } Collections.sort(resultsSearch); printPrecisionRecallPlot(resultsMcs, resultsSearch); }
From source file:br.usp.icmc.gazetteer.SemanticSearchTest.LuceneSearcher.java
License:Open Source License
public void search1(String q) throws IOException, ParseException { IndexReader ireader = IndexReader.open(dir); // read-only=true IndexSearcher reader = new IndexSearcher(ireader); reader.setSimilarity(similarityltc()); //ddd.qqq busca(reader, q);/* w w w .j a v a 2 s.c o m*/ numQ++; }
From source file:cc.twittertools.search.api.TrecSearchThriftServer.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(HELP_OPTION, "show help")); options.addOption(OptionBuilder.withArgName("port").hasArg().withDescription("port").create(PORT_OPTION)); options.addOption(/* ww w. j ava 2s .c o m*/ OptionBuilder.withArgName("index").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg() .withDescription("max number of threads in thread pool").create(MAX_THREADS_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("file containing access tokens").create(CREDENTIALS_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(TrecSearchThriftServer.class.getName(), options); System.exit(-1); } int port = cmdline.hasOption(PORT_OPTION) ? Integer.parseInt(cmdline.getOptionValue(PORT_OPTION)) : DEFAULT_PORT; int maxThreads = cmdline.hasOption(MAX_THREADS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(MAX_THREADS_OPTION)) : DEFAULT_MAX_THREADS; File index = new File(cmdline.getOptionValue(INDEX_OPTION)); Map<String, String> credentials = null; if (cmdline.hasOption(CREDENTIALS_OPTION)) { credentials = Maps.newHashMap(); File cfile = new File(cmdline.getOptionValue(CREDENTIALS_OPTION)); if (!cfile.exists()) { System.err.println("Error: " + cfile + " does not exist!"); System.exit(-1); } for (String s : Files.readLines(cfile, Charsets.UTF_8)) { try { String[] arr = s.split(":"); credentials.put(arr[0], arr[1]); } catch (Exception e) { // Catch any exceptions from parsing file contain access tokens System.err.println("Error reading access tokens from " + cfile + "!"); System.exit(-1); } } } if (!index.exists()) { System.err.println("Error: " + index + " does not exist!"); System.exit(-1); } IndexReader reader = DirectoryReader.open(MMapDirectory.open(index)); IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new LMDirichletSimilarity(DEFAULT_MU)); QueryLikelihoodModel qlModel = new QueryLikelihoodModel(DEFAULT_MU); TServerSocket serverSocket = new TServerSocket(port); TrecSearch.Processor<TrecSearch.Iface> searchProcessor = new TrecSearch.Processor<TrecSearch.Iface>( new TrecSearchHandler(searcher, qlModel, credentials)); TThreadPoolServer.Args serverArgs = new TThreadPoolServer.Args(serverSocket); serverArgs.maxWorkerThreads(maxThreads); TServer thriftServer = new TThreadPoolServer( serverArgs.processor(searchProcessor).protocolFactory(new TBinaryProtocol.Factory())); thriftServer.serve(); }
From source file:cc.twittertools.search.local.RunQueries.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(/*from www.j a v a 2s . c om*/ OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return") .create(NUM_RESULTS_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("file containing topics in TREC format").create(QUERIES_OPTION)); options.addOption(OptionBuilder.withArgName("similarity").hasArg() .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION)); options.addOption(new Option(VERBOSE_OPTION, "print out complete document")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(RunQueries.class.getName(), options); System.exit(-1); } File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION)); if (!indexLocation.exists()) { System.err.println("Error: " + indexLocation + " does not exist!"); System.exit(-1); } String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG; String topicsFile = cmdline.getOptionValue(QUERIES_OPTION); int numResults = 1000; try { if (cmdline.hasOption(NUM_RESULTS_OPTION)) { numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION)); } } catch (NumberFormatException e) { System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION)); System.exit(-1); } String similarity = "LM"; if (cmdline.hasOption(SIMILARITY_OPTION)) { similarity = cmdline.getOptionValue(SIMILARITY_OPTION); } boolean verbose = cmdline.hasOption(VERBOSE_OPTION); PrintStream out = new PrintStream(System.out, true, "UTF-8"); IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation)); IndexSearcher searcher = new IndexSearcher(reader); if (similarity.equalsIgnoreCase("BM25")) { searcher.setSimilarity(new BM25Similarity()); } else if (similarity.equalsIgnoreCase("LM")) { searcher.setSimilarity(new LMDirichletSimilarity(2500.0f)); } QueryParser p = new QueryParser(Version.LUCENE_43, StatusField.TEXT.name, IndexStatuses.ANALYZER); TrecTopicSet topics = TrecTopicSet.fromFile(new File(topicsFile)); for (TrecTopic topic : topics) { Query query = p.parse(topic.getQuery()); Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, topic.getQueryTweetTime(), true, true); TopDocs rs = searcher.search(query, filter, numResults); int i = 1; for (ScoreDoc scoreDoc : rs.scoreDocs) { Document hit = searcher.doc(scoreDoc.doc); out.println(String.format("%s Q0 %s %d %f %s", topic.getId(), hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag)); if (verbose) { out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " ")); } i++; } } reader.close(); out.close(); }
From source file:cc.twittertools.search.local.SearchStatuses.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(//from w w w.j a v a 2s. c o m OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("query id").create(QID_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("query text").create(QUERY_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("maxid").create(MAX_ID_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return") .create(NUM_RESULTS_OPTION)); options.addOption(OptionBuilder.withArgName("similarity").hasArg() .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION)); options.addOption(new Option(VERBOSE_OPTION, "print out complete document")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(QUERY_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(SearchStatuses.class.getName(), options); System.exit(-1); } File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION)); if (!indexLocation.exists()) { System.err.println("Error: " + indexLocation + " does not exist!"); System.exit(-1); } String qid = cmdline.hasOption(QID_OPTION) ? cmdline.getOptionValue(QID_OPTION) : DEFAULT_QID; String queryText = cmdline.hasOption(QUERY_OPTION) ? cmdline.getOptionValue(QUERY_OPTION) : DEFAULT_Q; String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG; long maxId = cmdline.hasOption(MAX_ID_OPTION) ? Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION)) : DEFAULT_MAX_ID; int numResults = cmdline.hasOption(NUM_RESULTS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION)) : DEFAULT_NUM_RESULTS; boolean verbose = cmdline.hasOption(VERBOSE_OPTION); String similarity = "LM"; if (cmdline.hasOption(SIMILARITY_OPTION)) { similarity = cmdline.getOptionValue(SIMILARITY_OPTION); } PrintStream out = new PrintStream(System.out, true, "UTF-8"); IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation)); IndexSearcher searcher = new IndexSearcher(reader); if (similarity.equalsIgnoreCase("BM25")) { searcher.setSimilarity(new BM25Similarity()); } else if (similarity.equalsIgnoreCase("LM")) { searcher.setSimilarity(new LMDirichletSimilarity(2500.0f)); } QueryParser p = new QueryParser(Version.LUCENE_43, IndexStatuses.StatusField.TEXT.name, IndexStatuses.ANALYZER); Query query = p.parse(queryText); Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, maxId, true, true); TopDocs rs = searcher.search(query, filter, numResults); int i = 1; for (ScoreDoc scoreDoc : rs.scoreDocs) { Document hit = searcher.doc(scoreDoc.doc); out.println(String.format("%s Q0 %s %d %f %s", qid, hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag)); if (verbose) { out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " ")); } i++; } reader.close(); out.close(); }
From source file:ci6226.eval_index_reader.java
public eval_index_reader(Analyzer _analyzer, String _dir, String[] _searchList, int _topn) throws IOException, org.apache.lucene.queryparser.classic.ParseException, InvalidTokenOffsetsException { String indexdir = "./" + _dir; String field = "text"; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexdir))); IndexSearcher searcher = new IndexSearcher(reader); PrintWriter writer = new PrintWriter(_dir + ".csv", "UTF-8"); Searchit(reader, searcher, _analyzer, field, _searchList, _topn, writer); searcher.setSimilarity(new similarity_tf_rm()); Searchit(reader, searcher, _analyzer, field, _searchList, _topn, writer); searcher.setSimilarity(new similiarty_queryNorm()); Searchit(reader, searcher, _analyzer, field, _searchList, _topn, writer); writer.close();//from w w w. ja va2 s. c o m reader.close(); /// searcher.setSimilarity(null); }