List of usage examples for org.apache.lucene.search IndexSearcher IndexSearcher
public IndexSearcher(IndexReaderContext context)
From source file:SimpleNaiveBayesClassifier.java
License:Apache License
/** * Creates a new NaiveBayes classifier./*from www .j a v a2 s .c om*/ * * @param leafReader the reader on the index to be used for classification * @param analyzer an {@link Analyzer} used to analyze unseen text * @param query a {@link Query} to eventually filter the docs used for training the classifier, or {@code null} * if all the indexed docs should be used * @param classFieldName the name of the field used as the output for the classifier NOTE: must not be havely analyzed * as the returned class will be a token indexed for this field * @param textFieldNames the name of the fields used as the inputs for the classifier, NO boosting supported per field */ public SimpleNaiveBayesClassifier(IndexReader leafReader, Analyzer analyzer, Query query, String classFieldName, String... textFieldNames) { this.leafReader = leafReader; this.indexSearcher = new IndexSearcher(this.leafReader); this.textFieldNames = textFieldNames; this.classFieldName = classFieldName; this.analyzer = analyzer; this.query = query; }
From source file:GUIFrame.java
public int SearchResults(PorterStemAnalyzer Analyzer, Directory Index, String userInput, DefaultListModel DocList) throws ParseException, IOException { // The query// www.j a v a 2s . co m userInput = userInput.replace("\"", ""); Query q = new QueryParser(Version.LATEST, "summary", Analyzer).parse(userInput); // The search int hitsPerPage = 20; // return 20 top documents IndexReader indoReader = DirectoryReader.open(Index); IndexSearcher indoSearcher = new IndexSearcher(indoReader); TopScoreDocCollector docCollector = TopScoreDocCollector.create(hitsPerPage, true); indoSearcher.search(q, docCollector); ScoreDoc[] hits = docCollector.topDocs().scoreDocs; // Copy results to list models for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = indoSearcher.doc(docId); DocList.addElement(d.get("docID")); SumListModel.addElement(d.get("summary")); } GetTerms(Index, Analyzer, "summary", userInput); return hits.length; }
From source file:SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);// www .j ava2s .co m } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:MyServlet.java
private void gotoSearch(PrintWriter out, HttpServletRequest request, HttpServletResponse response) { try {/*from w w w . j a va 2s.com*/ // Text to search String querystr = request.getParameter("keyword"); log.addHistory(querystr); // The \"title\" arg specifies the default field to use when no field is explicitly specified in the query Query q = new QueryParser("Classes", analyzer).parse(querystr); // Searching code int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // Code to display the results of search //out.println("Found " + hits.length + " Classes Matching your Requirement"); courseList = new ArrayList(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); Course course = new Course(d.get("Number"), d.get("Classes"), d.get("Time"), d.get("Department")); //out.println((i + 1) + ". " + d.get("Number")+ d.get("Classes") ); courseList.add(course); } request.setAttribute("course", courseList); RequestDispatcher de = request.getRequestDispatcher("/table.jsp"); de.forward(request, response); // reader can only be closed when there is no need to access the documents any more reader.close(); } catch (Exception e) { System.out.println(e.getMessage()); } }
From source file:ContentBasedAnalysis.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava QueryConvert [-index dir]"; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);/* w w w. j a v a 2 s . c om*/ } String index = "index"; String field = "contents"; String queries = null; String queryString = null; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } } // Creation of reader and a searcher for the index IndexReader reader = IndexReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); // Reader to read File Names BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } while (true) { System.out.println("Enter filename 1 (or hit <RETURN>): "); String f1 = in.readLine(); if (f1 == null || f1.length() == -1) break; f1 = f1.trim(); if (f1.length() == 0) break; System.out.println("Enter filename 2: "); String f2 = in.readLine(); int id1 = findDocId(searcher, f1); if (id1 < 0) { System.out.println("No file " + f1 + " found in index!"); break; } int id2 = findDocId(searcher, f2); if (id1 < 0) { System.out.println("No file " + f1 + " found in index!"); break; } // Conversion to TF-IDF format TermWeight[] v1 = toTfIdf(reader, id1); TermWeight[] v2 = toTfIdf(reader, id2); System.out.println("The cosine similarity of the two files is: " + cosineSimilarity(v1, v2)); } searcher.close(); reader.close(); }
From source file:SearcherTest.java
@Before public void setUp() throws Exception { dir = FSDirectory.open(Paths.get("E:\\lucene\\idx")); reader = DirectoryReader.open(dir);/*from w ww . ja va 2 s . com*/ is = new IndexSearcher(reader); }
From source file:SearchHelpDocs.java
License:Open Source License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage: java SearchFiles index-dir"; if (args.length != 1) { System.out.println(usage); System.exit(0);/*from w ww.j av a 2s . c o m*/ } String index = args[0]; String field = LUC_KEY_CONTENT; String queries = null; int repeat = 0; boolean raw = false; String normsField = null; System.out.println("INFO: index-directory=" + index); IndexReader reader = IndexReader.open(index); if (normsField != null) reader = new OneNormsReader(reader, normsField); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); String[] fields = { LUC_KEY_CONTENT, LUC_KEY_FULL_PATH, LUC_KEY_FILE_NAME }; MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer); printHelpInformation(); while (true) { // prompt the user System.out.print(CMDLINE_PREFIX); System.out.flush(); String line = in.readLine(); if (line == null || line.length() < 0) break; if (line.trim().length() == 0) { continue; } // Exit gracefully. if (line.trim().equalsIgnoreCase(":quit")) { System.out.println("INFO: quit successful"); break; } // Modify for fuzzy query (E.g. ~0.58), also use wildcard postfix (*) line = line + "~"; Object obj = parser.parse(line); Query query = parser.parse(line); System.out.println(CMDLINE_PREFIX + "Searching for: [" + line + "] query=" + query.toString(field)); System.out.flush(); // Search and also add the sort element Hits hits = searcher.search(query, createSort()); if (repeat > 0) { Date start = new Date(); for (int i = 0; i < repeat; i++) { hits = searcher.search(query); } Date end = new Date(); System.out.println(CMDLINE_PREFIX + "Time: " + (end.getTime() - start.getTime()) + "ms"); } System.out.println(hits.length() + " total matching documents"); for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) { int end = Math.min(hits.length(), start + HITS_PER_PAGE); for (int i = start; i < end; i++) { System.out.println(CMDLINE_PREFIX + "doc=" + hits.id(i) + " score=" + hits.score(i)); // Ignore scores based on a certain threshold if (hits.score(i) < 0.09) continue; Document doc = hits.doc(i); String path = doc.get(LUC_KEY_CONTENT); if (path != null) { // Attempt to pretty print help document information System.out.println("\n == Help Document Found; docid=" + hits.id(i)); System.out.println("*************************"); String fullpath = doc.get(LUC_KEY_FULL_PATH); String filename = doc.get(LUC_KEY_FILE_NAME); String content = doc.get(LUC_KEY_CONTENT); String id = doc.get(LUC_KEY_IDENTITY); if (filename != null) { System.out.println(" +Filename: " + doc.get(filename)); } if (fullpath != null) { System.out.println(" +Path: " + doc.get(fullpath)); } System.out.println(" id: " + id); System.out.println(" == Content:"); System.out.println(prettyPrintContent(content)); System.out.println("-------------------------"); System.out.println(); } else { System.out.println((i + 1) + ". " + "No content for this document"); } } if (queries != null) // non-interactive break; if (hits.length() > end) { System.out.print("more (y/n) ? "); line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') break; } } } reader.close(); }
From source file:IrqaQuery.java
License:Apache License
public static List<Document> query(String index, String stoppath, String question, int numResult, String sim) throws Exception { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stoppath))); if (sim.equals("TFIDF")) searcher.setSimilarity(new ClassicSimilarity()); else if (sim.equals("BM25")) searcher.setSimilarity(new BM25Similarity()); else//ww w. j a v a2 s. c o m searcher.setSimilarity(new BM25Similarity()); String field = "contents"; QueryParser parser = new QueryParser(field, analyzer); Query query = parser.parse(parser.escape(question)); TopDocs results = searcher.search(query, numResult); ScoreDoc[] hits = results.scoreDocs; List<Document> docs = new ArrayList<Document>(); int numTotalHits = results.totalHits; // System.out.println(numTotalHits + " total matching documents"); int end = Math.min(numTotalHits, numResult); String searchResult = ""; // System.out.println("Only results 1 - " + hits.length); for (int i = 0; i < end; i++) { Document doc = searcher.doc(hits[i].doc); docs.add(doc); } return docs; }
From source file:Get_Top_Documents_Based_on_Lucene.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);//from w w w . jav a 2s . co m } //String index = "index"; //String index = "index_wiki_2"; String index = "index_external_links_v1/"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 20; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(field, analyzer); // Read Question in Training Data line by line //String path_train = "data/training_set.tsv"; //String path_output = "data/lucene_search_result_train.txt"; //String path_train = "data/validation_set.tsv"; //String path_output = "data/lucene_search_result_validation_index_wiki_2.txt"; String path_train = "data/training_set.tsv"; String path_output = "data/lucene_search_result_train_index_wiki_external_links_v1.txt"; Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path_output), "UTF-8")); try (BufferedReader br = new BufferedReader(new FileReader(path_train))) { String line; while ((line = br.readLine()) != null) { line = line.trim(); String[] lst = line.split("\t"); String query_s = lst[1]; if (query_s == "question") { continue; } System.out.println("query_s: " + query_s); writer.write(query_s + "\t"); try { Query query = parser.parse(query_s); System.out.println("Searching for: " + query.toString(field)); doPagingSearch(in, writer, searcher, query, hitsPerPage, raw, queries == null && queryString == null); } catch (org.apache.lucene.queryparser.classic.ParseException e) { continue; } } // while } writer.close(); /* while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100); } Date end = new Date(); System.out.println("Time: "+(end.getTime()-start.getTime())+"ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } // while (True) */ reader.close(); }
From source file:QueryLuceneIndex.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\t[-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);/*from www . java 2 s . c o m*/ } String index = "/home/chrisschaefer/enwiki-20130604-lucene_bkp"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } QueryParser parser = new QueryParser(Version.LUCENE_43, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } //Query query = new TermQuery(new Term("title", line)); Query query = parser.parse(line); //System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }