List of usage examples for org.apache.lucene.index IndexReader close
@Override public final synchronized void close() throws IOException
From source file:SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);/* w w w.j av a 2 s . com*/ } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:MyServlet.java
private void gotoSearch(PrintWriter out, HttpServletRequest request, HttpServletResponse response) { try {/* w w w .ja v a 2 s .c o m*/ // Text to search String querystr = request.getParameter("keyword"); log.addHistory(querystr); // The \"title\" arg specifies the default field to use when no field is explicitly specified in the query Query q = new QueryParser("Classes", analyzer).parse(querystr); // Searching code int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // Code to display the results of search //out.println("Found " + hits.length + " Classes Matching your Requirement"); courseList = new ArrayList(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); Course course = new Course(d.get("Number"), d.get("Classes"), d.get("Time"), d.get("Department")); //out.println((i + 1) + ". " + d.get("Number")+ d.get("Classes") ); courseList.add(course); } request.setAttribute("course", courseList); RequestDispatcher de = request.getRequestDispatcher("/table.jsp"); de.forward(request, response); // reader can only be closed when there is no need to access the documents any more reader.close(); } catch (Exception e) { System.out.println(e.getMessage()); } }
From source file:ContentBasedAnalysis.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava QueryConvert [-index dir]"; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);/*from w w w . j a v a 2 s.co m*/ } String index = "index"; String field = "contents"; String queries = null; String queryString = null; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } } // Creation of reader and a searcher for the index IndexReader reader = IndexReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); // Reader to read File Names BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } while (true) { System.out.println("Enter filename 1 (or hit <RETURN>): "); String f1 = in.readLine(); if (f1 == null || f1.length() == -1) break; f1 = f1.trim(); if (f1.length() == 0) break; System.out.println("Enter filename 2: "); String f2 = in.readLine(); int id1 = findDocId(searcher, f1); if (id1 < 0) { System.out.println("No file " + f1 + " found in index!"); break; } int id2 = findDocId(searcher, f2); if (id1 < 0) { System.out.println("No file " + f1 + " found in index!"); break; } // Conversion to TF-IDF format TermWeight[] v1 = toTfIdf(reader, id1); TermWeight[] v2 = toTfIdf(reader, id2); System.out.println("The cosine similarity of the two files is: " + cosineSimilarity(v1, v2)); } searcher.close(); reader.close(); }
From source file:SearchHelpDocs.java
License:Open Source License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage: java SearchFiles index-dir"; if (args.length != 1) { System.out.println(usage); System.exit(0);/*from w w w.ja v a2s.c om*/ } String index = args[0]; String field = LUC_KEY_CONTENT; String queries = null; int repeat = 0; boolean raw = false; String normsField = null; System.out.println("INFO: index-directory=" + index); IndexReader reader = IndexReader.open(index); if (normsField != null) reader = new OneNormsReader(reader, normsField); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); String[] fields = { LUC_KEY_CONTENT, LUC_KEY_FULL_PATH, LUC_KEY_FILE_NAME }; MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer); printHelpInformation(); while (true) { // prompt the user System.out.print(CMDLINE_PREFIX); System.out.flush(); String line = in.readLine(); if (line == null || line.length() < 0) break; if (line.trim().length() == 0) { continue; } // Exit gracefully. if (line.trim().equalsIgnoreCase(":quit")) { System.out.println("INFO: quit successful"); break; } // Modify for fuzzy query (E.g. ~0.58), also use wildcard postfix (*) line = line + "~"; Object obj = parser.parse(line); Query query = parser.parse(line); System.out.println(CMDLINE_PREFIX + "Searching for: [" + line + "] query=" + query.toString(field)); System.out.flush(); // Search and also add the sort element Hits hits = searcher.search(query, createSort()); if (repeat > 0) { Date start = new Date(); for (int i = 0; i < repeat; i++) { hits = searcher.search(query); } Date end = new Date(); System.out.println(CMDLINE_PREFIX + "Time: " + (end.getTime() - start.getTime()) + "ms"); } System.out.println(hits.length() + " total matching documents"); for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) { int end = Math.min(hits.length(), start + HITS_PER_PAGE); for (int i = start; i < end; i++) { System.out.println(CMDLINE_PREFIX + "doc=" + hits.id(i) + " score=" + hits.score(i)); // Ignore scores based on a certain threshold if (hits.score(i) < 0.09) continue; Document doc = hits.doc(i); String path = doc.get(LUC_KEY_CONTENT); if (path != null) { // Attempt to pretty print help document information System.out.println("\n == Help Document Found; docid=" + hits.id(i)); System.out.println("*************************"); String fullpath = doc.get(LUC_KEY_FULL_PATH); String filename = doc.get(LUC_KEY_FILE_NAME); String content = doc.get(LUC_KEY_CONTENT); String id = doc.get(LUC_KEY_IDENTITY); if (filename != null) { System.out.println(" +Filename: " + doc.get(filename)); } if (fullpath != null) { System.out.println(" +Path: " + doc.get(fullpath)); } System.out.println(" id: " + id); System.out.println(" == Content:"); System.out.println(prettyPrintContent(content)); System.out.println("-------------------------"); System.out.println(); } else { System.out.println((i + 1) + ". " + "No content for this document"); } } if (queries != null) // non-interactive break; if (hits.length() > end) { System.out.print("more (y/n) ? "); line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') break; } } } reader.close(); }
From source file:Get_Top_Documents_Based_on_Lucene.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);/*w w w. j av a 2s .co m*/ } //String index = "index"; //String index = "index_wiki_2"; String index = "index_external_links_v1/"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 20; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(field, analyzer); // Read Question in Training Data line by line //String path_train = "data/training_set.tsv"; //String path_output = "data/lucene_search_result_train.txt"; //String path_train = "data/validation_set.tsv"; //String path_output = "data/lucene_search_result_validation_index_wiki_2.txt"; String path_train = "data/training_set.tsv"; String path_output = "data/lucene_search_result_train_index_wiki_external_links_v1.txt"; Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path_output), "UTF-8")); try (BufferedReader br = new BufferedReader(new FileReader(path_train))) { String line; while ((line = br.readLine()) != null) { line = line.trim(); String[] lst = line.split("\t"); String query_s = lst[1]; if (query_s == "question") { continue; } System.out.println("query_s: " + query_s); writer.write(query_s + "\t"); try { Query query = parser.parse(query_s); System.out.println("Searching for: " + query.toString(field)); doPagingSearch(in, writer, searcher, query, hitsPerPage, raw, queries == null && queryString == null); } catch (org.apache.lucene.queryparser.classic.ParseException e) { continue; } } // while } writer.close(); /* while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100); } Date end = new Date(); System.out.println("Time: "+(end.getTime()-start.getTime())+"ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } // while (True) */ reader.close(); }
From source file:QueryLuceneIndex.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\t[-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);//w w w . j av a 2 s . c o m } String index = "/home/chrisschaefer/enwiki-20130604-lucene_bkp"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } QueryParser parser = new QueryParser(Version.LUCENE_43, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } //Query query = new TermQuery(new Term("title", line)); Query query = parser.parse(line); //System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:SearchFiles11.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);//from w w w .ja v a 2s . com } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); StandardQueryParser queryParserHelper = new StandardQueryParser(); Query query = queryParserHelper.parse( "Physical OR tests OR for OR shoulder OR impingements OR and OR local OR lesions OR of OR bursa, OR tendon OR labrum OR that OR may OR accompany OR impingement", field); TopDocs results = searcher.search(query, 100); Date end = new Date(); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; String FILENAME = "/home/devil/research/CLEF/ehealth/task2/dataset/pubmed11.res"; int i = 1; try (BufferedWriter bw = new BufferedWriter(new FileWriter(FILENAME))) { String content = ""; for (ScoreDoc h : hits) { Document doc = searcher.doc(h.doc); String path = doc.get("path"); String[] path_words = path.split("/"); System.out.println(path_words[path_words.length - 1] + " score=" + h.score); content = "CD007427 " + "NF " + path_words[path_words.length - 1] + " " + i++ + " " + h.score + " pubmed\n"; bw.write(content); } } catch (IOException e) { e.printStackTrace(); } //doPagingSearch(in, searcher, bQuery.build(), hitsPerPage, raw, queries == null && queryString == null); reader.close(); }
From source file:syslogSearch.java
License:Open Source License
public void run() { try {/*from www.ja v a 2s . c o m*/ String searchQuery = (new BufferedReader(new InputStreamReader(searchSocket.getInputStream()))) .readLine().trim(); IndexReader reader = writer.getReader(); Searcher searcher = new IndexSearcher(reader); QueryParser indexParser = new QueryParser(Version.LUCENE_30, "data", analyzer); SortField hitSortField = new SortField("date", SortField.LONG); Sort hitSort = new Sort(hitSortField); TopFieldDocs hits = searcher.search(indexParser.parse(searchQuery), null, 1000, hitSort); PrintWriter searchReply = new PrintWriter(searchSocket.getOutputStream(), true); searchReply.println(hits.totalHits + " Hits for " + searchQuery); for (int i = 0; i < hits.totalHits; i++) { Document document = searcher.doc(hits.scoreDocs[i].doc); String host = document.get("hostname"); String date = document.get("date"); String data = document.get("data"); searchReply.print("host: " + host + ", date: " + date + ", data: " + data + "\n\n"); } searchReply.close(); searcher.close(); reader.close(); searchSocket.close(); } catch (Exception ex) { System.out.print("Exception: " + ex + "\n"); } }
From source file:ReadFiles.java
License:Apache License
public static Result doScan(String path, DIRTYPE type, IndexReader ir) throws IOException { IndexReader reader; Result r = new Result(); long beginTs, endTs; if (ir != null) reader = ir;/*from ww w. j a v a2 s . co m*/ else { beginTs = System.currentTimeMillis(); switch (type) { default: case MMAP: reader = DirectoryReader.open(MMapDirectory.open(new File(path))); break; case NIO: reader = DirectoryReader.open(NIOFSDirectory.open(new File(path))); break; case SIMPLE: reader = DirectoryReader.open(SimpleFSDirectory.open(new File(path))); break; } endTs = System.currentTimeMillis(); r.initTs += endTs - beginTs; r.initTsNr += 1; } System.out.println("-----Scan it------" + reader.maxDoc()); beginTs = System.currentTimeMillis(); for (int i = 0; i < reader.maxDoc(); i++) { Document doc = reader.document(i); doc.get("foo"); doc.get("bar"); //System.out.println("Key: " + doc.get("foo") + ", Value: " + doc.get("bar") + ", Content: " + doc.get("content")); } endTs = System.currentTimeMillis(); r.fetchTs += endTs - beginTs; r.fetchTsNr += reader.maxDoc(); if (ir == null) { beginTs = System.currentTimeMillis(); reader.close(); endTs = System.currentTimeMillis(); r.closeTs += endTs - beginTs; r.closeTsNr += 1; } return r; }
From source file:ReadFiles.java
License:Apache License
public static Result doSearch(String path, DIRTYPE type, IndexReader ir) throws IOException { IndexReader reader; Result r = new Result(); long beginTs, endTs; if (ir != null) reader = ir;// ww w. j av a 2 s . co m else { beginTs = System.currentTimeMillis(); switch (type) { default: case MMAP: reader = DirectoryReader.open(MMapDirectory.open(new File(path))); break; case NIO: reader = DirectoryReader.open(NIOFSDirectory.open(new File(path))); break; case SIMPLE: reader = DirectoryReader.open(SimpleFSDirectory.open(new File(path))); break; } endTs = System.currentTimeMillis(); r.initTs += endTs - beginTs; r.initTsNr += 1; } System.out.println("-----Search it------"); IndexSearcher searcher = new IndexSearcher(reader); Query q = NumericRangeQuery.newIntRange("foo", new Integer("100000"), null, false, false); beginTs = System.currentTimeMillis(); ScoreDoc[] hits = searcher.search(q, searcher.getIndexReader().maxDoc()).scoreDocs; endTs = System.currentTimeMillis(); r.searchTs += endTs - beginTs; r.searchTsNr += hits.length; System.out.println("Hits -> " + hits.length); boolean isSeq = true; int lastid = 0; beginTs = System.currentTimeMillis(); for (int i = 0; i < hits.length; i++) { if (hits[i].doc < lastid) isSeq = false; Document doc = searcher.doc(hits[i].doc); doc.get("foo"); doc.get("bar"); //System.out.println("Key: " + doc.get("foo") + ", Value: " + doc.get("bar")); } System.out.println("Search DocID is SEQ? " + isSeq); endTs = System.currentTimeMillis(); r.fetchTs += endTs - beginTs; r.fetchTsNr += hits.length; if (ir == null) { beginTs = System.currentTimeMillis(); reader.close(); endTs = System.currentTimeMillis(); r.closeTs += endTs - beginTs; r.closeTsNr += 1; } return r; }