List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:coreservlets.consolesearch.java
License:Apache License
/** 139 * This demonstrates a typical paging search scenario, where the search engine presents 140 * pages of size n to the user. The user can then go to the next page if interested in 141 * the next hits.// w w w . j a v a2 s . c o m 142 * 143 * When the query is executed for the first time, then only enough results are collected 144 * to fill 5 result pages. If the user wants to page beyond this limit, then the query 145 * is executed another time and all hits are collected. 146 * 147 */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; System.out.println("hitsperpage is :" + hitsPerPage); int numTotalHits = results.totalHits; //System.out.println("numTotalHits is :"+numTotalHits); //?otalhits?360 System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { System.out.println("now hits length is:" + hits.length); System.out.println("now end is:" + end); if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String url = doc.get("url"); if (url != null) { System.out.println((i + 1) + ". " + url); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); String origstr = doc.getField("contents").stringValue(); System.out.println(origstr); } } else { System.out.println((i + 1) + ". " + "No url for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:cs571.proj1.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.// w w w . j a v a 2 s .c o m * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String docID = doc.get("docID"); if (docID != null) { System.out.println((i + 1) + " " + docID); } else { System.out.println((i + 1) + ". " + "No docID for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:csdn.lucene.first.version.Searcher.java
License:Apache License
protected ArrayList<String> search(String indexDir, String q) { //5.5 replace directory with path java.nio.file.Path pathA = Paths.get(indexDir); FSDirectory dir;//from w ww.j a v a 2 s. c o m IndexSearcher is; try { dir = FSDirectory.open(pathA); DirectoryReader dReader = DirectoryReader.open(dir); is = new IndexSearcher(dReader); QueryParser parser = new QueryParser("contents", analyzer); Query query = parser.parse(q); long start = System.currentTimeMillis(); //is.search():Finds the top n hits for query //TopDocs:Represents hits returned by IndexSearcher.search(Query,int). TopDocs hits = is.search(query, 10); //5 long end = System.currentTimeMillis(); System.err.println("Found " + hits.totalHits + //6 " document(s) (in " + (end - start) + // 6 " milliseconds) that matched query '" + // 6 q + "':"); // 6 //ScoreDocs:The top hits for the query. for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); // System.out.println("From Searcher : filename = " + doc.get("fieldname")); // System.out.println("From Searcher : fullpath = " + doc.get("fullpath")); hit_ids.add(doc.get("fieldname")); hit_paths.add(doc.get("fullpath")); } dReader.close(); dir.close(); } catch (Exception e) { e.printStackTrace(); } return hit_ids; }
From source file:cz.muni.fi.japanesedictionary.engine.FragmentListAsyncTask.java
License:Open Source License
/** * Loads translation using Lucene//from www. j a va 2s . com */ @Override protected List<Translation> doInBackground(String... params) { String expression = params[0]; String part = params[1]; SharedPreferences settings = mContext.getSharedPreferences(ParserService.DICTIONARY_PREFERENCES, 0); String pathToDictionary = settings.getString(Const.PREF_JMDICT_PATH, null); SharedPreferences sharedPrefs = PreferenceManager.getDefaultSharedPreferences(mContext); final boolean englishBool = sharedPrefs.getBoolean("language_english", false); final boolean frenchBool = sharedPrefs.getBoolean("language_french", false); final boolean dutchBool = sharedPrefs.getBoolean("language_dutch", false); final boolean germanBool = sharedPrefs.getBoolean("language_german", false); final boolean russianBool = sharedPrefs.getBoolean("language_russian", false); final boolean searchOnlyFavorised = sharedPrefs.getBoolean("search_only_favorite", false); final boolean searchDeinflected = sharedPrefs.getBoolean("search_deinflected", false); final List<Translation> translations = new ArrayList<>(); if (expression == null) { // first run Log.i(LOG_TAG, "First run - last 10 translations "); GlossaryReaderContract database = new GlossaryReaderContract(mContext); List<Translation> translationsTemp = database.getLastTranslations(10); database.close(); return translationsTemp; } if (pathToDictionary == null) { Log.e(LOG_TAG, "No path to jmdict dictionary"); return null; } File file = new File(pathToDictionary); if (!file.exists() || !file.canRead()) { Log.e(LOG_TAG, "Can't read jmdict dictionary directory"); return null; } if (expression.length() < 1) { Log.w(LOG_TAG, "No expression to translate"); return null; } Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_36); IndexReader reader; try { final String search; final String hiragana; boolean onlyReb = false; if (Pattern.matches("\\p{Latin}*", expression)) { // only romaji onlyReb = true; Log.i(LOG_TAG, "Only latin letters, converting to hiragana. "); expression = TranscriptionConverter.kunreiToHepburn(expression); expression = RomanizationEnum.Hepburn.toHiragana(expression); } hiragana = expression; expression = insertSpaces(expression); switch (part) { case "end": search = "\"" + expression + "lucenematch\""; break; case "beginning": search = "\"lucenematch " + expression + "\""; break; case "middle": search = "\"" + expression + "\""; break; default: if (searchDeinflected) { StringBuilder sb = new StringBuilder("\"lucenematch " + expression + "lucenematch\""); for (Predicate predicate : Deconjugator.deconjugate(hiragana)) { if (predicate.isSuru()) { sb.append(" OR ").append("(\"lucenematch ") .append(insertSpaces(predicate.getPredicate())) .append("lucenematch\" AND (pos:vs OR pos:vs-c OR pos:vs-s OR pos:vs-i))"); } else if (predicate.isKuru()) { sb.append(" OR ").append("(\"lucenematch ") .append(insertSpaces(predicate.getPredicate())) .append("lucenematch\" AND pos:vk)"); } else if (predicate.isIku()) { sb.append(" OR ").append("(\"lucenematch ") .append(insertSpaces(predicate.getPredicate())) .append("lucenematch\" AND pos:v5k-s)"); } else if (predicate.isIAdjective()) { sb.append(" OR ").append("(\"lucenematch ") .append(insertSpaces(predicate.getPredicate())) .append("lucenematch\" AND pos:adj-i)"); } else sb.append(" OR ").append("(\"lucenematch ") .append(insertSpaces(predicate.getPredicate())) .append("lucenematch\" AND (pos:v1 OR pos:v2 OR pos:v5 OR pos:vz OR pos:vi OR pos:vn OR pos:vr))"); } search = sb.toString(); } else { search = "\"lucenematch " + expression + "lucenematch\""; } } Log.i(LOG_TAG, " Searching for: " + search); Query q; if (onlyReb) { q = (new QueryParser(Version.LUCENE_36, "index_japanese_reb", analyzer)).parse(search); } else { StandardQueryParser parser = new StandardQueryParser(analyzer); q = parser.parse(search, "japanese"); } Directory dir = FSDirectory.open(file); reader = IndexReader.open(dir); final IndexSearcher searcher = new IndexSearcher(reader); Collector collector = new Collector() { int max = 1000; int count = 0; private int docBase; @Override public boolean acceptsDocsOutOfOrder() { return true; } @Override public void collect(int docID) throws IOException { Document d = searcher.doc(docID + docBase); Translation translation = new Translation(); String prioritized = d.get("prioritized"); if (searchOnlyFavorised && prioritized == null) { return; } if (prioritized != null) { //is prioritized translation.setPrioritized(true); } String ruby = d.get("ruby"); if (ruby != null && ruby.length() > 0) { translation.setRuby(ruby); } String japanese_keb = d.get("japanese_keb"); if (japanese_keb != null && japanese_keb.length() != 0) { translation.parseJapaneseKeb(japanese_keb); } String japanese_reb = d.get("japanese_reb"); if (japanese_reb != null && japanese_reb.length() != 0) { translation.parseJapaneseReb(japanese_reb); } String english = d.get("english"); if (english != null && english.length() != 0) { translation.parseEnglish(english); } String french = d.get("french"); if (french != null && french.length() != 0) { translation.parseFrench(french); } String dutch = d.get("dutch"); if (dutch != null && dutch.length() != 0) { translation.parseDutch(dutch); } String german = d.get("german"); if (german != null && german.length() != 0) { translation.parseGerman(german); } String russian = d.get("russian"); if (russian != null && russian.length() != 0) { translation.parseRussian(russian); } if ((englishBool && translation.getEnglishSense() != null) || (dutchBool && translation.getDutchSense() != null) || (germanBool && translation.getGermanSense() != null) || (frenchBool && translation.getFrenchSense() != null) || (russianBool && translation.getRussianSense() != null)) { count++; if (count < max) { if (!FragmentListAsyncTask.this.isCancelled()) { FragmentListAsyncTask.this.publishProgress(translation); translations.add(translation); } else { translations.clear(); throw new IOException("Loader canceled"); } } else { throw new IOException("Max exceeded"); } } } @Override public void setNextReader(IndexReader reader, int docBas) throws IOException { docBase = docBas; } @Override public void setScorer(Scorer arg0) throws IOException { } }; searcher.search(q, collector); reader.close(); } catch (IOException ex) { Log.e(LOG_TAG, "IO Exception: " + ex.toString()); return translations; } catch (Exception ex) { Log.e(LOG_TAG, "Exception: " + ex.toString()); return null; } return translations.isEmpty() ? null : translations; }
From source file:de.anycook.db.lucene.FulltextIndex.java
License:Open Source License
public Set<String> search(String q) throws IOException { Set<String> recipes = new LinkedHashSet<>(); String fields[] = new String[] { "description", "steps" }; logger.debug(String.format("searching for %s", q)); try (IndexReader reader = DirectoryReader.open(index)) { int hitsPerPage = 1000; IndexSearcher searcher = new IndexSearcher(reader); Query query = new MultiFieldQueryParser(fields, analyzer).parse(q); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, null); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { Document d = searcher.doc(hit.doc); recipes.add(d.get("title")); }/*from w ww . ja v a 2 s. com*/ } catch (CorruptIndexException | ParseException e) { logger.error(e); } logger.debug(String.format("found %d results", recipes.size())); return recipes; }
From source file:de.berlios.jhelpdesk.utils.LuceneIndexer.java
License:Open Source License
public List<Article> search(String searchQuery) { try {//from w w w .j av a2 s.c om Query query = parser.parse(searchQuery); Directory directory = FSDirectory.open(new File(indexDirectory)); IndexSearcher indexSearcher = new IndexSearcher(directory); TopDocs res = indexSearcher.search(query, DEFAULT_SEARCH_RESULT_LIMIT); List<Article> result = new ArrayList<Article>(); for (ScoreDoc scoreDoc : res.scoreDocs) { Document document = indexSearcher.doc(scoreDoc.doc); result.add(documentToArticle(document)); } indexSearcher.close(); return result; } catch (Exception ex) { log.error(ex.getMessage(), ex); throw new RuntimeException(ex); } }
From source file:de.cosmocode.lucene.fragments.query.AbstractLuceneQueryTestFragment.java
License:Apache License
/** * Searches for the given query and returns a list that has at most "max" items. * @param query the query to search for//from www . j ava 2s . c o m * @param max the maximum number of items in the returned list * @return a list with the names of the found documents * @throws IOException if lucene throws an exception */ protected List<String> search(final Query query, final int max) throws IOException { final List<String> docList = new LinkedList<String>(); final IndexSearcher searcher = new IndexSearcher(IndexHelper.DIRECTORY); final TopDocs docs = searcher.search(query, max); if (docs.totalHits > 0) { for (final ScoreDoc doc : docs.scoreDocs) { docList.add(searcher.doc(doc.doc).get("name")); } } return docList; }
From source file:de.elbe5.cms.search.SearchBean.java
License:Open Source License
public void searchContent(ContentSearchResultData result) { result.getResults().clear();/*from w w w. j a v a2s . c o m*/ String[] fieldNames = result.getFieldNames(); ScoreDoc[] hits = null; float maxScore = 0f; try { String indexPath = ApplicationPath.getAppPath() + "contentindex"; ensureDirectory(indexPath); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); MultiFieldQueryParser parser = new MultiFieldQueryParser(fieldNames, analyzer); String pattern = result.getPattern(); pattern = pattern.trim(); Query query = null; if (pattern.length() != 0) { query = parser.parse(pattern); //Log.log("Searching for: " + query.toString()); TopDocs topDocs = searcher.search(query, result.getMaxSearchResults()); hits = topDocs.scoreDocs; maxScore = topDocs.getMaxScore(); } if (hits != null) { for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); ContentSearchData data = null; String type = doc.get("type"); switch (type) { case SiteSearchData.TYPE: data = new SiteSearchData(); break; case PageSearchData.TYPE: data = new PageSearchData(); break; case FileSearchData.TYPE: data = new FileSearchData(); break; } assert (data != null); data.setDoc(doc); data.setScore(maxScore <= 1f ? hit.score : hit.score / maxScore); data.evaluateDoc(); data.setContexts(query, analyzer); result.getResults().add(data); } } reader.close(); } catch (Exception ignore) { } }
From source file:de.elbe5.cms.search.SearchBean.java
License:Open Source License
public void searchUsers(UserSearchResultData result) { result.getResults().clear();//from www . j a va2 s . c o m String[] fieldNames = result.getFieldNames(); ScoreDoc[] hits = null; float maxScore = 0f; try { String indexPath = ApplicationPath.getAppPath() + "userindex"; ensureDirectory(indexPath); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); MultiFieldQueryParser parser = new MultiFieldQueryParser(fieldNames, analyzer); String pattern = result.getPattern(); pattern = pattern.trim(); Query query = null; if (pattern.length() != 0) { query = parser.parse(pattern); //Log.log("Searching for: " + query.toString()); TopDocs topDocs = searcher.search(query, result.getMaxSearchResults()); hits = topDocs.scoreDocs; maxScore = topDocs.getMaxScore(); } if (hits != null) { for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); UserSearchData data = new UserSearchData(); data.setDoc(doc); data.setScore(maxScore <= 1f ? hit.score : hit.score / maxScore); data.evaluateDoc(); data.setContexts(query, analyzer); result.getResults().add(data); } } reader.close(); } catch (Exception ignore) { } }
From source file:de.hsmannheim.ss15.alr.searchengine.DefaultLuceneController.java
public List<StoredDocument> doSearch(String queryString) throws IOException, ParseException { String field = "contents"; String queries = null;//from w ww . j a v a 2 s . c om boolean raw = false; int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexDir))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(field, analyzer); Query query = parser.parse(queryString); Highlighter highlighter = new Highlighter(new QueryScorer(query)); TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query, collector); TopDocs topDocs = searcher.search(query, Math.max(1, collector.getTotalHits())); List<StoredDocument> results = new ArrayList<>(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { StoredDocument doc = searcher.doc(scoreDoc.doc); try { File file = new File(doc.get("path")); BufferedReader docReader = new BufferedReader( new InputStreamReader(Files.newInputStream(file.toPath()), StandardCharsets.UTF_8)); List<String> lines = new ArrayList<>(); while (docReader.ready()) { lines.add(docReader.readLine()); } lines.remove(0); lines.remove(0); lines.remove(0); String content = ""; for (String s : lines) { content = content + s; } String highLight = highlighter.getBestFragment(analyzer, null, content); if (highLight == null) { LOGGER.warn("No Highlight found"); } else { doc.add(new TextField("highlight", highLight, Field.Store.YES)); } } catch (InvalidTokenOffsetsException ex) { LOGGER.warn("No Highlight found"); } results.add(doc); } reader.close(); return results; }