List of usage examples for org.apache.lucene.search Query toString
public abstract String toString(String field);
field assumed to be the default field and omitted. From source file:SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);/*w ww .j a v a 2 s . c o m*/ } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:SearchHelpDocs.java
License:Open Source License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage: java SearchFiles index-dir"; if (args.length != 1) { System.out.println(usage); System.exit(0);//from ww w.j a va 2s . com } String index = args[0]; String field = LUC_KEY_CONTENT; String queries = null; int repeat = 0; boolean raw = false; String normsField = null; System.out.println("INFO: index-directory=" + index); IndexReader reader = IndexReader.open(index); if (normsField != null) reader = new OneNormsReader(reader, normsField); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); String[] fields = { LUC_KEY_CONTENT, LUC_KEY_FULL_PATH, LUC_KEY_FILE_NAME }; MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer); printHelpInformation(); while (true) { // prompt the user System.out.print(CMDLINE_PREFIX); System.out.flush(); String line = in.readLine(); if (line == null || line.length() < 0) break; if (line.trim().length() == 0) { continue; } // Exit gracefully. if (line.trim().equalsIgnoreCase(":quit")) { System.out.println("INFO: quit successful"); break; } // Modify for fuzzy query (E.g. ~0.58), also use wildcard postfix (*) line = line + "~"; Object obj = parser.parse(line); Query query = parser.parse(line); System.out.println(CMDLINE_PREFIX + "Searching for: [" + line + "] query=" + query.toString(field)); System.out.flush(); // Search and also add the sort element Hits hits = searcher.search(query, createSort()); if (repeat > 0) { Date start = new Date(); for (int i = 0; i < repeat; i++) { hits = searcher.search(query); } Date end = new Date(); System.out.println(CMDLINE_PREFIX + "Time: " + (end.getTime() - start.getTime()) + "ms"); } System.out.println(hits.length() + " total matching documents"); for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) { int end = Math.min(hits.length(), start + HITS_PER_PAGE); for (int i = start; i < end; i++) { System.out.println(CMDLINE_PREFIX + "doc=" + hits.id(i) + " score=" + hits.score(i)); // Ignore scores based on a certain threshold if (hits.score(i) < 0.09) continue; Document doc = hits.doc(i); String path = doc.get(LUC_KEY_CONTENT); if (path != null) { // Attempt to pretty print help document information System.out.println("\n == Help Document Found; docid=" + hits.id(i)); System.out.println("*************************"); String fullpath = doc.get(LUC_KEY_FULL_PATH); String filename = doc.get(LUC_KEY_FILE_NAME); String content = doc.get(LUC_KEY_CONTENT); String id = doc.get(LUC_KEY_IDENTITY); if (filename != null) { System.out.println(" +Filename: " + doc.get(filename)); } if (fullpath != null) { System.out.println(" +Path: " + doc.get(fullpath)); } System.out.println(" id: " + id); System.out.println(" == Content:"); System.out.println(prettyPrintContent(content)); System.out.println("-------------------------"); System.out.println(); } else { System.out.println((i + 1) + ". " + "No content for this document"); } } if (queries != null) // non-interactive break; if (hits.length() > end) { System.out.print("more (y/n) ? "); line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') break; } } } reader.close(); }
From source file:Get_Top_Documents_Based_on_Lucene.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);/*from w w w . j a va 2 s . c o m*/ } //String index = "index"; //String index = "index_wiki_2"; String index = "index_external_links_v1/"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 20; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(field, analyzer); // Read Question in Training Data line by line //String path_train = "data/training_set.tsv"; //String path_output = "data/lucene_search_result_train.txt"; //String path_train = "data/validation_set.tsv"; //String path_output = "data/lucene_search_result_validation_index_wiki_2.txt"; String path_train = "data/training_set.tsv"; String path_output = "data/lucene_search_result_train_index_wiki_external_links_v1.txt"; Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path_output), "UTF-8")); try (BufferedReader br = new BufferedReader(new FileReader(path_train))) { String line; while ((line = br.readLine()) != null) { line = line.trim(); String[] lst = line.split("\t"); String query_s = lst[1]; if (query_s == "question") { continue; } System.out.println("query_s: " + query_s); writer.write(query_s + "\t"); try { Query query = parser.parse(query_s); System.out.println("Searching for: " + query.toString(field)); doPagingSearch(in, writer, searcher, query, hitsPerPage, raw, queries == null && queryString == null); } catch (org.apache.lucene.queryparser.classic.ParseException e) { continue; } } // while } writer.close(); /* while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100); } Date end = new Date(); System.out.println("Time: "+(end.getTime()-start.getTime())+"ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } // while (True) */ reader.close(); }
From source file:SearchFilesTest.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String index = args[0];/*from www . ja va2 s . c o m*/ String field = "contents"; String queries = "resources/query.txt"; String queryString = null; int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); File stopWordsFile = new File("resources/stop.txt"); CharArraySet stopWordsCharArraySet = WordlistLoader.getWordSet(new FileReader(stopWordsFile), Version.LUCENE_47); Analyzer analyzer = new RomanianAnalyzerUsingAnotherConstructorForStopwordAnalyzer(Version.LUCENE_47, stopWordsCharArraySet); BufferedReader in = null; in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), codification)); QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer); while (true) { String line = in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Looking for: " + query.toString(field)); doPagingSearch(in, searcher, query); if (queryString != null) { break; } } reader.close(); }
From source file:SimpleLuceneSearch.java
License:Apache License
public String getCuidFromMimId(String mimId) throws IOException, ParseException { //term: ?? String field = "mimId"; String userQuery = mimId;/*from w w w.ja v a2 s . co m*/ // only searching, so read-only=true Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery); System.out.println("Searching for: " + query.toString(field) + " in OMIM MRCONSO"); int hitsPerPage = 10;// result is ordered with lucene scored then true TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query, collector); int numTotalHits = collector.getTotalHits(); //display results //System.out.println("Found " + numTotalHits + " hits."); if (numTotalHits == 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; int docId = results[0].doc; Document d = SEARCHER.doc(docId); System.out.println(" =>" + d.get("cuId")); return d.get("cuId"); } else if (numTotalHits > 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; int docId = results[0].doc; // WE TAKE THE FIRST ONE Document d = SEARCHER.doc(docId); return d.get("cuId"); } else if (numTotalHits == 0) { System.out.println("WARNING: there is no corresping CUI"); } return ""; }
From source file:SimpleLuceneSearch.java
License:Apache License
/** * //from w ww . j a va 2 s . co m * @param symbol * @return * @throws IOException * @throws ParseException */ public String getPharmgkbIdForDrug(String drugName) throws IOException, ParseException { //symbol:warfarin String field = "name"; String userQuery = drugName; // only searching, so read-only=true Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery); System.out.println("Searching for: " + query.toString(field) + "in PharmGKB drugs "); int hitsPerPage = 10;// result is ordered with lucene scored then true TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query, collector); int numTotalHits = collector.getTotalHits(); //display results //System.out.println("Found " + numTotalHits + " hits."); if (numTotalHits == 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; int docId = results[0].doc; Document d = SEARCHER.doc(docId); return d.get("paId"); } else if (numTotalHits > 1) { System.out.println("WARNING: there is several corresping paId"); } else if (numTotalHits == 0) { System.out.println("WARNING: there is no corresping paId"); // let's try alternate gene symbols String userQuery2 = drugName; Query query2 = new QueryParser(Version.LUCENE_35, "genericNames", ANALYSER).parse(userQuery2); TopScoreDocCollector collector2 = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query2, collector2); int numTotalHits2 = collector2.getTotalHits(); if (numTotalHits2 > 1) { ScoreDoc[] results2 = collector2.topDocs().scoreDocs; int docId = results2[0].doc; // WE TAKE THE FIRST ONE Document d = SEARCHER.doc(docId); return d.get("paId"); } } //System.out.println((i + 1) + ". " + d.get("cuId")+ ", meshId= " + d.get("meshId")+ ", term= " + d.get("term")+ ", preferred= " + d.get("preferred")); return ""; }
From source file:SimpleLuceneSearch.java
License:Apache License
/** * get the PA ID from the gene symbol by querying a lucen index make form the genes.tsv file * @param symbol/*from w w w. j a v a 2 s . c o m*/ * @return * @throws IOException * @throws ParseException */ public String getPharmgkbIdForGene(String symbol) throws IOException, ParseException { //symbol:CYP2C9 String field = "symbol"; String userQuery = symbol; // only searching, so read-only=true Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery); System.out.println("Searching for: " + query.toString(field) + "in PharmGKB genes "); int hitsPerPage = 10;// result is ordered with lucene scored then true TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query, collector); int numTotalHits = collector.getTotalHits(); //display results //System.out.println("Found " + numTotalHits + " hits."); if (numTotalHits == 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; int docId = results[0].doc; Document d = SEARCHER.doc(docId); return d.get("paId"); } else if (numTotalHits > 1) { System.out.println("WARNING: there is several corresping paId"); } else if (numTotalHits == 0) { System.out.println("WARNING: there is no corresping paId"); // let's try alternate gene symbols String userQuery2 = symbol; Query query2 = new QueryParser(Version.LUCENE_35, "alternateSymbols", ANALYSER).parse(userQuery2); TopScoreDocCollector collector2 = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query2, collector2); int numTotalHits2 = collector2.getTotalHits(); if (numTotalHits2 > 1) { ScoreDoc[] results2 = collector2.topDocs().scoreDocs; int docId = results2[0].doc; // WE TAKE THE FIRST ONE Document d = SEARCHER.doc(docId); return d.get("paId"); } } //System.out.println((i + 1) + ". " + d.get("cuId")+ ", meshId= " + d.get("meshId")+ ", term= " + d.get("term")+ ", preferred= " + d.get("preferred")); return ""; }
From source file:SimpleLuceneSearch.java
License:Apache License
/** * // ww w. j ava2 s.co m * @param diseaseLabel * @return * @throws IOException * @throws ParseException */ public String getPharmgkbIdForDisease(String diseaseLabel) throws IOException, ParseException { //symbol:warfarin String field = "diseaseLabel"; String userQuery = diseaseLabel; // only searching, so read-only=true Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery); System.out.println("Searching for: " + query.toString(field) + "in PharmGKB diseases "); int hitsPerPage = 10;// result is ordered with lucene scored then true TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query, collector); int numTotalHits = collector.getTotalHits(); //display results //System.out.println("Found " + numTotalHits + " hits."); if (numTotalHits == 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; int docId = results[0].doc; Document d = SEARCHER.doc(docId); return d.get("paId"); } else if (numTotalHits > 1) { System.out.println("WARNING: there is several corresping paId"); // let's see if one of the results has a label exactly similar ScoreDoc[] results = collector.topDocs().scoreDocs; for (int i = 0; i < results.length; ++i) { int docId = results[i].doc; Document d = SEARCHER.doc(docId); if (d.get("diseaseLabel").toLowerCase().equals(diseaseLabel.toLowerCase())) { return d.get("paId"); } } } else if (numTotalHits == 0) { System.out.println("WARNING: there is no corresping paId"); } //System.out.println((i + 1) + ". " + d.get("cuId")+ ", meshId= " + d.get("meshId")+ ", term= " + d.get("term")+ ", preferred= " + d.get("preferred")); return ""; }
From source file:LuceneSearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void search(String phrase, String field, int hitsPerPage) { try {/*from w ww . j a v a2s . co m*/ IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(m_index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); Query query = parser.parse(phrase); System.out.println("Searching for: " + query.toString(field)); searcher.search(query, null, hitsPerPage); TopDocs results = searcher.search(query, hitsPerPage); ScoreDoc[] hits = results.scoreDocs; for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); //String path = doc.get("path"); String title = doc.get("title"); System.out.println(hit.score + " -" + title); } reader.close(); } catch (IOException e) { } catch (ParseException e) { } }
From source file:al.franzis.lucene.header.search.SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/java/4_0/demo.html for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);/*from w w w .java 2 s . com*/ } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(index))); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } QueryParser parser = new QueryParser(Version.LUCENE_31, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } searcher.close(); }