List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:ikanalyzer.LuceneIndexAndSearchDemo.java
License:Apache License
/** * ???//from w ww. ja v a 2 s . com * * @param args */ public static void main(String[] args) { // Lucene Document?? String fieldName = "text"; // String text = "IK Analyzer???????"; // IKAnalyzer? Analyzer analyzer = new IKAnalyzer(true); Directory directory = null; IndexWriter iwriter = null; IndexReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); // ?IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc = new Document(); doc.add(new StringField("ID", "10000", Field.Store.YES)); doc.add(new TextField(fieldName, text, Field.Store.YES)); iwriter.addDocument(doc); iwriter.close(); // ?********************************** // ? ireader = DirectoryReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = "?"; // QueryParser?Query QueryParser qp = new QueryParser(fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); System.out.println("Query = " + query); // ?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:in.student.project.queryexpansion.SearchFilesLDA.java
License:Apache License
/** * Generates necessary output - in this case this output is used as input to matlab * @param hits/*w w w. j a v a 2s.c o m*/ * @param terms * @param query_num - tag of the query * @param writer * @param termCount * @param outCount * @param idxReader * @param similarity * @param searcher * @throws IOException */ private static void generateOutput(TopDocs hits, Vector<TermQuery> terms, String query_num, BufferedWriter writer, int termCount, int outCount, IndexSearcher searcher, TFIDFSimilarity similarity, IndexReader idxReader) throws IOException { logger.finer("terms.size(): " + terms.size()); // Generate Output // For each doc for (int i = 0; ((i < hits.scoreDocs.length) && (i < outCount)); i++) { Document doc = searcher.doc(hits.scoreDocs[i].doc); String docno = ((Field) doc.getField("DOCNO")).stringValue(); int docId = hits.scoreDocs[i].doc; float coord = Utils.coord(terms, doc, docId, similarity, idxReader); writer.write(query_num + " " + "Q0" + " " + docno + " " + (i + 1) + " " + hits.scoreDocs[i].score + " " + coord); // For each term output normalized: tf, idf, boostFactor for (int j = 0; j < termCount; j++) { if (j < terms.size()) { TermQuery termQuery = terms.elementAt(j); Term term = termQuery.getTerm(); String termStr = term.text(); float tf = Utils.getTFNorm(termStr, doc, docId, similarity, idxReader, true); float idf = Utils.getIDFNorm(termStr, terms, idxReader, similarity, true); float boost = Utils.getBoostNorm(termQuery, terms); writer.write(" " + tf + " " + idf + " " + boost + " "); } // If not enough terms pad with 0's else { writer.write(" 0 0 0 "); } } writer.write("\n"); } }
From source file:in.student.project.search.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.//from w w w . ja v a 2 s.co m * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); String title = doc.get("title"); System.out.println(" Title: " + doc.get("title")); System.out.println(" " + doc.get("summary")); } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:index.IndexUtils.java
public static List highlight(IndexSearcher indexSearcher, String key) throws ClassNotFoundException { try {/* w ww .ja va 2 s. com*/ QueryParser queryParser = new QueryParser("name", new StandardAnalyzer()); Query query = queryParser.parse(key); TopDocCollector collector = new TopDocCollector(800); indexSearcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; Highlighter highlighter = null; SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(200)); List list = new ArrayList(); Document doc; for (int i = 0; i < hits.length; i++) { //System.out.println(hits[i].score); doc = indexSearcher.doc(hits[i].doc); TokenStream tokenStream = new StandardAnalyzer().tokenStream("name", new StringReader(doc.get("name"))); // IndexResult ir = getIndexResult(doc,"index.IndexResult"); // ir.setName(highlighter.getBestFragment(tokenStream, doc.get("name"))); // list.add(ir); } return list; } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; }
From source file:index.reader.LuceneQuery.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.//from w ww. j av a 2s .c o m * <p/> * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("resource_uri"); if (path != null) { System.out.println((i + 1) + ". " + path); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:indexer.Retriever.java
private void calculateIFIDF(IndexReader reader, IndexSearcher searcher, String words, TopDocs results) throws IOException { ScoreDoc[] hits = results.scoreDocs; System.out.println("\nIDF"); for (String word : words.split(" ")) { System.out.println(getIDF(reader, word)); }//w ww. ja v a 2 s. c om System.out.println(); int counter = 1; for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); System.out.println(counter + ". " + doc.get(Constants.titleField) + " (score: " + hit.score + ")"); for (String word : words.split(" ")) { System.out.println(getTF(reader, hit.doc, word)); } counter++; } }
From source file:indexer.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.//from www . j av a2 s .c o m * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * @return * */ public static String[] doPagingSearch(IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { ArrayList<String> docPaths = new ArrayList<String>(); // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = numTotalHits; // int end = Math.min(numTotalHits, hitsPerPage); // while (true) { if (end > hits.length) { // System.out.println("Only results 1 - " + hits.length +" of " + numTotalHits + " total matching documents collected."); // System.out.println("Collect more (y/n) ?"); /* String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } */ hits = searcher.search(query, numTotalHits).scoreDocs; } // end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { docPaths.add(path); //Add to arraylist of candidate document paths // System.out.println((i+1) + ". " + path); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } /* if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0)=='q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start+=hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } }*/ String[] paths = new String[docPaths.size()]; docPaths.toArray(paths); return paths; }
From source file:info.extensiblecatalog.OAIToolkit.db.LuceneSearcher.java
License:Open Source License
public String getLatestDatestamp() { String latest = null;//from ww w. j av a 2 s.com try { IndexSearcher searcher = getSearcher(); Document doc = searcher.doc(searcher.maxDoc() - 1); Field[] flds = doc.getFields("modification_date"); // this field is stored in order (if it weren't we'd have to sort them first) // most recent is at the top of the list latest = flds[0].stringValue(); prglog.info("getLatestDatestamp:" + latest); // It's extremely possible that the max doc id is NOT the most recent record // we just need to narrow our range search to something reasonable Sort sort = new Sort(new SortField("modification_date", SortField.STRING, true)); String queryString = "+modification_date:[\"" + latest + "\" TO \"" + TextUtil.utcToMysqlTimestamp(TextUtil.nowInUTC()) + "\"]"; prglog.info("queryString for latest datestamp:" + queryString); TopDocs hits = search(queryString, sort, 1); if (hits.scoreDocs.length > 0) { int id = hits.scoreDocs[0].doc; doc = searcher.doc(id); flds = doc.getFields("modification_date"); latest = flds[0].stringValue(); } prglog.info("getLatestDatestamp pass two:" + latest); } catch (Exception e) { prglog.error("[PRG] " + e); } return latest; }
From source file:info.johtani.jjug.lucene.sample.SearcherSample.java
License:Apache License
public static void main(String[] args) { String indexDirectory = "./indexdir"; //String keyword = ""; String keyword = "johtani"; IndexReader reader = null;//from w w w.j a v a 2s. com try { //?? Directory dir = FSDirectory.open(new File(indexDirectory)); //IndexReader?? reader = DirectoryReader.open(dir); //IndexSearcher?? IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer; //Standard analyzer = new StandardAnalyzer(); // //analyzer = new JapaneseAnalyzer(); //?? QueryParser parser = new QueryParser("content", analyzer); //???? Query query = parser.parse(keyword); //?????1????? TopDocs hits = searcher.search(query, 10); //???? System.out.println("Found " + hits.totalHits + " document(s)"); //???? for (ScoreDoc scoreDoc : hits.scoreDocs) { System.out.println("--- " + scoreDoc.doc + " ---"); //ID?? Document doc = searcher.doc(scoreDoc.doc); //??? System.out.println(doc.get("content")); System.out.println("---------"); } } catch (IOException | ParseException e) { e.printStackTrace(); } finally { try { if (reader != null) { reader.close(); } } catch (IOException e) { // ignore } } }
From source file:InformationRetrieval.Search.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.//from w w w .j av a 2 s. c o m * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * @return * */ public static ArrayList<Store> doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { ArrayList<Store> list1 = new ArrayList<Store>(); // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; // list1.add(numTotalHits +" total matching documents"); // System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = "n"; if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format //list1.add("doc="+hits[i].doc+" score="+hits[i].score); // System.out.println("doc="+hits[i].doc+" score="+hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { // System.out.println((i+1) + ". " + path); //list1.add((i+1) + ". " + path); BufferedReader OutWriter = new BufferedReader(new FileReader(path)); String line = OutWriter.readLine(); String title1 = OutWriter.readLine(); String s1 = "NA", s2 = "NA", s3 = "NA"; while (OutWriter.readLine() != null) { s1 = OutWriter.readLine().trim(); break; } while (OutWriter.readLine() != null) { s2 = OutWriter.readLine().trim(); break; } while (OutWriter.readLine() != null) { // s3 = OutWriter.readLine().trim(); break; } String small = s1 + " " + s2 + " " + s3; Store a = new Store(title1, line, small); list1.add(a); //System.out.println(a.getsnippet() + "--------------------"); //System.out.println(a.gettitle()); //System.out.println(a.geturl()); //System.out.println(line); //fileWriter.write(line); //fileWriter.newLine(); String title = doc.get("title"); if (title != null) { // System.out.println(" Title: " + doc.get("title")); // list1.add(" Title: " + doc.get("title")); } } else { // System.out.println((i+1) + ". " + "No path for this document"); // list1.add((i+1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = "q"; if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } return list1; }