List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:ch.algotrader.rest.index.SecurityIndexer.java
License:Open Source License
private Document searchDocument(IndexSearcher searcher, ScoreDoc scoreDoc) { try {/*from w ww . java 2 s. co m*/ return searcher.doc(scoreDoc.doc); } catch (IOException ioe) { throw new UnrecoverableCoreException("Unexpected I/O error accessing security index", ioe); } }
From source file:choco.lucene.IKAnalyzerDemo.java
License:Apache License
public static void main(String[] args) { //Lucene Document?? String fieldName = "text"; // //w ww.j a v a 2s. c o m String text = "IK Analyzer???????"; //IKAnalyzer? Analyzer analyzer = new IKAnalyzer(); Directory directory = null; IndexWriter iwriter = null; IndexReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); //?IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_34, analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc = new Document(); doc.add(new Field("ID", "10000", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.addDocument(doc); iwriter.close(); //?********************************** //? ireader = IndexReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = "?"; //QueryParser?Query QueryParser qp = new QueryParser(Version.LUCENE_34, fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); //?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:ci6226.eval_index_reader.java
public static void Searchit(IndexReader reader, IndexSearcher searcher, Analyzer _analyzer, String field, String[] _searchList, int _topn, PrintWriter writer) throws org.apache.lucene.queryparser.classic.ParseException, IOException, InvalidTokenOffsetsException { Analyzer analyzer = _analyzer;//from w ww . j a v a 2s . co m QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer); String[] testString = _searchList;//{"to","123","impressed","Geezer","geezer","semi-busy","\"eggs vegetable\"","gs veget","\"gs veget\""};//,"good","I","but","coffee"}; for (int j = 0; j < testString.length; j++) { String lstr = String.valueOf(j) + "," + testString[j]; Query query = parser.parse(testString[j]); System.out.println("Searching for: " + query.toString(field)); TopDocs topdocs = searcher.search(query, _topn); lstr += "," + topdocs.totalHits; ScoreDoc[] scoreDocs = topdocs.scoreDocs; SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query.rewrite(reader))); for (int i = 0; i < scoreDocs.length; i++) { int doc = scoreDocs[i].doc; Document document = searcher.doc(doc); // System.out.println("Snippet=" + document.get(field)); System.out.println(i); String text = document.get(field); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), doc, field, analyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); String line = ""; for (int m = 0; m < frag.length; m++) { if ((frag[m] != null) && (frag[m].getScore() > 0)) { System.out.println((frag[m].toString())); line = frag[m].toString(); line = line.replaceAll("\n", ""); line = line.replaceAll("\r", ""); line = line.replaceAll("\"", ""); line = line.replaceAll(",", " "); } } lstr += "," + line; lstr += "," + String.valueOf(scoreDocs[i].score); } writer.write(lstr + "\n"); System.out.println("Search for:" + testString[j] + " Total hits=" + scoreDocs.length); System.out.println("////////////////////////////////////////////////////"); } }
From source file:ci6226.facetsearch.java
public static void main(String[] args) throws Exception { String index = "./myindex"; String field = "text"; String queries = null;/* w w w .j a va2s .c om*/ int hitsPerPage = 10; boolean raw = false; //http://lucene.apache.org/core/4_0_0/facet/org/apache/lucene/facet/doc-files/userguide.html#facet_accumulation IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); // :Post-Release-Update-Version.LUCENE_XY: //TODO: SAME AS HOW U BUILD INDEX Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } // :Post-Release-Update-Version.LUCENE_XY: QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer); while (true) { System.out.println("Enter query: "); String line = in.readLine(); line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); Date start = new Date(); searcher.search(query, null, 100); Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; //N= max docs //df = totoal matched doc //idf=log(N/df) for (int i = 0; i < hits.length; i++) { Document doc = searcher.doc(hits[i].doc); System.out.println(ANSI_BLUE + (i + 1) + ANSI_RESET + "\nScore=\t" + hits[i].score); String rtext = doc.get(field); System.out.println("Text=\t" + rtext); Terms vector = reader.getTermVector(i, "text"); if (vector == null) continue; // System.out.println(vector.getSumDocFreq()); // Terms vector = reader.getTermVector(hits[i].doc, field); //hits[i].doc=docID TermsEnum termsEnum = vector.iterator(null); termsEnum = vector.iterator(termsEnum); Map<String, Integer> frequencies = new HashMap<>(); BytesRef text = null; while ((text = termsEnum.next()) != null) { String term = text.utf8ToString(); int freq = (int) termsEnum.totalTermFreq(); frequencies.put(term, freq); // System.out.println("Time: "+term + " idef "+freq); } } // String[] facetCatlog={""}; System.out.println(numTotalHits + " total matching documents"); } reader.close(); }
From source file:ci6226.loadIndex.java
/** * This demonstrates a typical paging search scenario, where the search * engine presents pages of size n to the user. The user can then go to the * next page if interested in the next hits. * * When the query is executed for the first time, then only enough results * are collected to fill 5 result pages. If the user wants to page beyond * this limit, then the query is executed another time and all hits are * collected.//from w w w . ja v a2 s. c o m * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive, Analyzer analyzer) throws IOException, InvalidTokenOffsetsException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("review_id"); if (path != null) { System.out.println(ANSI_BLUE + (i + 1) + ANSI_RESET + "\nScore=\t" + hits[i].score); String title = doc.get("business_id"); if (title != null) { String text = doc.get("text"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits[i].doc, "text", doc, analyzer);//TokenSources.getAnyTokenStream(searcher.getIndexReader() ,"text", analyzer); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(ANSI_RED, ANSI_RESET); // SimpleFragmenter fragmenter = new SimpleFragmenter(80); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 4); System.out.print("Snippet=\t"); for (int j = 0; j < frag.length; j++) { if ((frag[j] != null) && (frag[j].getScore() > 0)) { System.out.println((frag[j].toString())); } } //System.out.print("\n"); System.out.println("Full Review=\t" + doc.get("text") + "\nBusinessID=\t" + title); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); int cpage = start / hitsPerPage; System.out.println(String.format("Current page=%d,max page=%d", cpage + 1, 1 + numTotalHits / hitsPerPage)); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) { break; } end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:cn.fql.blogspider.SearchMain.java
License:Open Source License
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean interactive) throws IOException { TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { do {//ww w . jav a 2 s.com if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0) return; if (line.charAt(0) == 'n') { return; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; ++i) { Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); //String modifyDate=doc.get("modified"); System.out.println( "doc=" + hits[i].doc + " score=" + hits[i].score + "------" + (i + 1) + ". " + path); } if (!(interactive)) return; if (end == 0) return; } while (numTotalHits < end); boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) System.out.print("(p)revious page, "); if (start + hitsPerPage < numTotalHits) System.out.print("(n)ext page, "); System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if ((line.length() == 0) || (line.charAt(0) == 'q')) { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } if (line.charAt(0) == 'n') { if (start + hitsPerPage >= numTotalHits) break; start += hitsPerPage; break; } int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } System.out.println("No such page"); } if (quit) return; end = Math.min(numTotalHits, start + hitsPerPage); } }
From source file:collene.Freedb.java
License:Apache License
public static void DoSearch(Directory directory) throws Exception { out.println("I think these are the files:"); for (String s : directory.listAll()) { out.println(s);/*from w w w. j a va2 s. c o m*/ } IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "any", analyzer); for (int i = 0; i < 5; i++) { long searchStart = System.currentTimeMillis(); Query query = parser.parse("morrissey"); //Query query = parser.parse("Dance"); TopDocs docs = searcher.search(query, 10); long searchEnd = System.currentTimeMillis(); out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(), docs.totalHits, searchEnd - searchStart)); long lookupStart = System.currentTimeMillis(); for (ScoreDoc d : docs.scoreDocs) { Document doc = searcher.doc(d.doc); out.println(String.format("%d %.2f %d %s", d.doc, d.score, d.shardIndex, doc.getField("any").stringValue())); } long lookupEnd = System.currentTimeMillis(); out.println(String.format("Document lookup took %d ms for %d documents", lookupEnd - lookupStart, docs.scoreDocs.length)); } directory.close(); }
From source file:com.adanac.module.blog.search.LuceneHelper.java
License:Apache License
private static List<Map<String, String>> search(String searchText, String path, String title, LoadQuery loadQuery) {/*w ww . j av a 2 s . c om*/ try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(INDEX_PATH + path))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new SmartChineseAnalyzer(); QueryParser parser = new QueryParser("indexedContent", analyzer); Query query = parser.parse(searchText); TopDocs resultDocs = searcher.search(query, 100); ScoreDoc[] scoreDocs = resultDocs.scoreDocs; // SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(150)); List<Map<String, String>> result = new ArrayList<>(); List<Integer> idList = new ArrayList<>(); for (int i = 0; i < scoreDocs.length; i++) { Document doc = searcher.doc(scoreDocs[i].doc); Integer id = Integer.valueOf(doc.get("id")); if (!idList.contains(id)) { String indexedContent = doc.get("indexedContent"); TokenStream tokenStream = analyzer.tokenStream("indexedContent", indexedContent); Map<String, String> data = loadQuery.getById(id); String highlighterString = highlighter.getBestFragment(tokenStream, indexedContent); if (highlighterString.contains(SEPARATOR)) { String[] array = highlighterString.split(SEPARATOR); data.put(title, array[0]); if (array.length > 1) { data.put("summary", array[1]); } } else { data.put("summary", highlighterString); } result.add(data); idList.add(id); } } return result; } catch (Exception e) { logger.error("search failed ...", e); } return new ArrayList<>(); }
From source file:com.agiletec.plugins.jacms.aps.system.services.searchengine.SearcherDAO.java
License:Open Source License
/** * Ricerca una lista di identificativi di contenuto in base * al codice della lingua corrente ed alla parola immessa. * @param langCode Il codice della lingua corrente. * @param word La parola in base al quale fare la ricerca. Nel caso * venissero inserite stringhe di ricerca del tipo "Venice Amsterdam" * viene considerato come se fosse "Venice OR Amsterdam". * @param allowedGroups I gruppi autorizzati alla visualizzazione. Nel caso * che la collezione sia nulla o vuota, la ricerca sar effettuata su contenuti * referenziati con il gruppo "Ad accesso libero". Nel caso che nella collezione * sia presente il gruppo degli "Amministratori", la ricerca produrr un'insieme * di identificativi di contenuto non filtrati per gruppo. * @return La lista di identificativi contenuto. * @throws ApsSystemException//from ww w . java 2 s . co m */ public List<String> searchContentsId(String langCode, String word, Collection<String> allowedGroups) throws ApsSystemException { List<String> contentsId = new ArrayList<String>(); IndexSearcher searcher = null; try { searcher = this.getSearcher(); QueryParser parser = new QueryParser(Version.LUCENE_30, langCode, this.getAnalyzer()); String queryString = this.createQueryString(langCode, word, allowedGroups); Query query = parser.parse(queryString); int maxSearchLength = 1000; TopDocs topDocs = searcher.search(query, null, maxSearchLength); ScoreDoc[] scoreDoc = topDocs.scoreDocs; if (scoreDoc.length > 0) { for (int index = 0; index < scoreDoc.length; index++) { ScoreDoc sDoc = scoreDoc[index]; Document doc = searcher.doc(sDoc.doc); contentsId.add(doc.get(IIndexerDAO.CONTENT_ID_FIELD_NAME)); } } } catch (IOException e) { throw new ApsSystemException("Errore in estrazione " + "documento in base ad indice", e); } catch (ParseException e) { throw new ApsSystemException("Errore parsing nella ricerca", e); } finally { this.releaseSearcher(searcher); } return contentsId; }
From source file:com.aliasi.lingmed.medline.IndexMedline.java
License:Lingpipe license
private String getLastUpdate(File index) throws IOException { System.out.println("Got index" + index); IndexReader reader = null;/*from w w w .j a v a 2 s . co m*/ IndexSearcher searcher = null; try { if (isNewDirectory(mIndex)) return LOW_SORT_STRING; Directory fsDir = FSDirectory.open(mIndex); reader = IndexReader.open(fsDir); searcher = new IndexSearcher(reader); Term term = new Term(Fields.MEDLINE_DIST_FIELD, Fields.MEDLINE_DIST_VALUE); SortField sortField = new SortField(Fields.MEDLINE_FILE_FIELD, SortField.STRING); Sort sort = new Sort(sortField); Query query = new TermQuery(term); TopFieldDocs results = searcher.search(query, null, 1, sort); if (results.totalHits == 0) { searcher.close(); reader.close(); return LOW_SORT_STRING; } // if (mLogger.isDebugEnabled()) // mLogger.debug("num MEDLINE_FILE docs: " + results.totalHits); Document d = searcher.doc(results.scoreDocs[0].doc); return d.get(Fields.MEDLINE_FILE_FIELD); } finally { if (searcher != null) searcher.close(); if (reader != null) reader.close(); } }