List of usage examples for org.apache.lucene.index DirectoryReader open
public static DirectoryReader open(final IndexCommit commit) throws IOException
From source file:GUIFrame.java
public int SearchResults(PorterStemAnalyzer Analyzer, Directory Index, String userInput, DefaultListModel DocList) throws ParseException, IOException { // The query/*from w w w. j a v a 2 s.co m*/ userInput = userInput.replace("\"", ""); Query q = new QueryParser(Version.LATEST, "summary", Analyzer).parse(userInput); // The search int hitsPerPage = 20; // return 20 top documents IndexReader indoReader = DirectoryReader.open(Index); IndexSearcher indoSearcher = new IndexSearcher(indoReader); TopScoreDocCollector docCollector = TopScoreDocCollector.create(hitsPerPage, true); indoSearcher.search(q, docCollector); ScoreDoc[] hits = docCollector.topDocs().scoreDocs; // Copy results to list models for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = indoSearcher.doc(docId); DocList.addElement(d.get("docID")); SumListModel.addElement(d.get("summary")); } GetTerms(Index, Analyzer, "summary", userInput); return hits.length; }
From source file:GUIFrame.java
protected void GetTerms(Directory Index, PorterStemAnalyzer Analyzer, String field, String queryString) throws IOException, ParseException { Set<Term> results;//from w ww. j av a 2 s. com IndexReader indoReader = DirectoryReader.open(Index); QueryParser qp = new QueryParser(Version.LUCENE_30, field, Analyzer); Query query = qp.parse(queryString); query.rewrite(indoReader); results = new HashSet<>(); query.extractTerms(results); Iterator<Term> iterator = results.iterator(); while (iterator.hasNext()) { String term = iterator.next().toString(); TermListModel.addElement(term.substring(8)); } }
From source file:SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);//w ww. j av a2 s . c om } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:VocabDumper.java
void printWords() throws IOException { IndexReader indexReader = DirectoryReader.open(FSDirectory.open(indexDir)); Fields fields = MultiFields.getFields(indexReader); String[] fieldNames = { MSIRDoc.FIELD_TITLE_EN, MSIRDoc.FIELD_EN, MSIRDoc.FIELD_TITLE_HN, MSIRDoc.FIELD_HN };//from ww w.ja v a 2s .c o m for (String fieldName : fieldNames) { Terms terms = fields.terms(fieldName); TermsEnum iterator = terms.iterator(null); BytesRef byteRef = null; while ((byteRef = iterator.next()) != null) { String term = new String(byteRef.bytes, byteRef.offset, byteRef.length); if (term.indexOf('#') == -1) System.out.println(term); } } }
From source file:MyServlet.java
private void gotoSearch(PrintWriter out, HttpServletRequest request, HttpServletResponse response) { try {/*from ww w .j a v a 2 s .com*/ // Text to search String querystr = request.getParameter("keyword"); log.addHistory(querystr); // The \"title\" arg specifies the default field to use when no field is explicitly specified in the query Query q = new QueryParser("Classes", analyzer).parse(querystr); // Searching code int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // Code to display the results of search //out.println("Found " + hits.length + " Classes Matching your Requirement"); courseList = new ArrayList(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); Course course = new Course(d.get("Number"), d.get("Classes"), d.get("Time"), d.get("Department")); //out.println((i + 1) + ". " + d.get("Number")+ d.get("Classes") ); courseList.add(course); } request.setAttribute("course", courseList); RequestDispatcher de = request.getRequestDispatcher("/table.jsp"); de.forward(request, response); // reader can only be closed when there is no need to access the documents any more reader.close(); } catch (Exception e) { System.out.println(e.getMessage()); } }
From source file:SearcherTest.java
@Before public void setUp() throws Exception { dir = FSDirectory.open(Paths.get("E:\\lucene\\idx")); reader = DirectoryReader.open(dir); is = new IndexSearcher(reader); }
From source file:IrqaQuery.java
License:Apache License
public static List<Document> query(String index, String stoppath, String question, int numResult, String sim) throws Exception { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stoppath))); if (sim.equals("TFIDF")) searcher.setSimilarity(new ClassicSimilarity()); else if (sim.equals("BM25")) searcher.setSimilarity(new BM25Similarity()); else/*from w ww . j a v a 2s. c o m*/ searcher.setSimilarity(new BM25Similarity()); String field = "contents"; QueryParser parser = new QueryParser(field, analyzer); Query query = parser.parse(parser.escape(question)); TopDocs results = searcher.search(query, numResult); ScoreDoc[] hits = results.scoreDocs; List<Document> docs = new ArrayList<Document>(); int numTotalHits = results.totalHits; // System.out.println(numTotalHits + " total matching documents"); int end = Math.min(numTotalHits, numResult); String searchResult = ""; // System.out.println("Only results 1 - " + hits.length); for (int i = 0; i < end; i++) { Document doc = searcher.doc(hits[i].doc); docs.add(doc); } return docs; }
From source file:Get_Top_Documents_Based_on_Lucene.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);/*from w w w .j a v a2 s. c o m*/ } //String index = "index"; //String index = "index_wiki_2"; String index = "index_external_links_v1/"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 20; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(field, analyzer); // Read Question in Training Data line by line //String path_train = "data/training_set.tsv"; //String path_output = "data/lucene_search_result_train.txt"; //String path_train = "data/validation_set.tsv"; //String path_output = "data/lucene_search_result_validation_index_wiki_2.txt"; String path_train = "data/training_set.tsv"; String path_output = "data/lucene_search_result_train_index_wiki_external_links_v1.txt"; Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path_output), "UTF-8")); try (BufferedReader br = new BufferedReader(new FileReader(path_train))) { String line; while ((line = br.readLine()) != null) { line = line.trim(); String[] lst = line.split("\t"); String query_s = lst[1]; if (query_s == "question") { continue; } System.out.println("query_s: " + query_s); writer.write(query_s + "\t"); try { Query query = parser.parse(query_s); System.out.println("Searching for: " + query.toString(field)); doPagingSearch(in, writer, searcher, query, hitsPerPage, raw, queries == null && queryString == null); } catch (org.apache.lucene.queryparser.classic.ParseException e) { continue; } } // while } writer.close(); /* while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100); } Date end = new Date(); System.out.println("Time: "+(end.getTime()-start.getTime())+"ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } // while (True) */ reader.close(); }
From source file:QueryLuceneIndex.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\t[-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);//from w w w .ja v a 2 s.com } String index = "/home/chrisschaefer/enwiki-20130604-lucene_bkp"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } QueryParser parser = new QueryParser(Version.LUCENE_43, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } //Query query = new TermQuery(new Term("title", line)); Query query = parser.parse(line); //System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:SearchFiles11.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);/*from w w w . j a va 2s.co m*/ } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); StandardQueryParser queryParserHelper = new StandardQueryParser(); Query query = queryParserHelper.parse( "Physical OR tests OR for OR shoulder OR impingements OR and OR local OR lesions OR of OR bursa, OR tendon OR labrum OR that OR may OR accompany OR impingement", field); TopDocs results = searcher.search(query, 100); Date end = new Date(); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; String FILENAME = "/home/devil/research/CLEF/ehealth/task2/dataset/pubmed11.res"; int i = 1; try (BufferedWriter bw = new BufferedWriter(new FileWriter(FILENAME))) { String content = ""; for (ScoreDoc h : hits) { Document doc = searcher.doc(h.doc); String path = doc.get("path"); String[] path_words = path.split("/"); System.out.println(path_words[path_words.length - 1] + " score=" + h.score); content = "CD007427 " + "NF " + path_words[path_words.length - 1] + " " + i++ + " " + h.score + " pubmed\n"; bw.write(content); } } catch (IOException e) { e.printStackTrace(); } //doPagingSearch(in, searcher, bQuery.build(), hitsPerPage, raw, queries == null && queryString == null); reader.close(); }