List of usage examples for org.apache.lucene.search Collector Collector
Collector
From source file:aplicacion.sistema.indexer.test.SearchFiles.java
License:Apache License
/** * This method uses a custom HitCollector implementation which simply prints out * the docId and score of every matching document. * // w w w.ja v a 2 s. c o m * This simulates the streaming search use case, where all hits are supposed to * be processed, regardless of their relevance. */ public static void doStreamingSearch(final Searcher searcher, Query query) throws IOException { Collector streamingHitCollector = new Collector() { private Scorer scorer; private int docBase; // simply print docId and score of every matching document @Override public void collect(int doc) throws IOException { System.out.println("doc=" + doc + docBase + " score=" + scorer.score()); } @Override public boolean acceptsDocsOutOfOrder() { return true; } @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { this.docBase = docBase; } @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } }; searcher.search(query, streamingHitCollector); }
From source file:cc.osint.graphd.graph.Graph.java
License:Apache License
public List<JSONObject> query(IndexSearcher indexSearcher, String queryStr) throws Exception { long start_t = System.currentTimeMillis(); final List<JSONObject> results = new ArrayList<JSONObject>(); QueryParser qp = new QueryParser(Version.LUCENE_31, KEY_FIELD, analyzer); qp.setDefaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.AND); qp.setAllowLeadingWildcard(true);/*from ww w . ja va 2 s . co m*/ Query query = qp.parse(queryStr); org.apache.lucene.search.Filter filter = new org.apache.lucene.search.CachingWrapperFilter( new QueryWrapperFilter(query)); indexSearcher.search(new MatchAllDocsQuery(), filter, new Collector() { private int docBase; IndexReader reader; // ignore scoring public void setScorer(Scorer scorer) { } // accept docs out of order public boolean acceptsDocsOutOfOrder() { return true; } public void collect(int doc) { try { Document d = reader.document(doc); JSONObject result = new JSONObject(); for (Fieldable f : d.getFields()) { result.put(f.name(), d.get(f.name())); } results.add(result); } catch (Exception ex) { ex.printStackTrace(); } } public void setNextReader(IndexReader reader, int docBase) { this.reader = reader; this.docBase = docBase; } }); long end_t = System.currentTimeMillis(); //log.info("query: hits.scoreDocs.length = " + results.size() + " (" + (end_t-start_t) + "ms)"); return results; }
From source file:com.rubenlaguna.en4j.searchlucene.NoteFinderLuceneImpl.java
License:Open Source License
public Collection<Note> find(String searchText) { if ("".equals(searchText.trim())) { return Collections.EMPTY_LIST; }//w w w .j a va2 s . co m long start = System.currentTimeMillis(); searchText = searchText.trim(); String patternStr = "\\s+"; String replaceStr = "* "; Pattern pattern = Pattern.compile(patternStr); Matcher matcher = pattern.matcher(searchText); searchText = matcher.replaceAll(replaceStr); if (Pattern.matches(".*\\w$", searchText)) { searchText = searchText + "*"; } LOG.info("search text:" + searchText); final Collection<Note> toReturn = new ArrayList<Note>(); try { IndexReader newReader = reader.reopen(); if (newReader != reader) { reader.close(); } reader = newReader; LOG.info("using index version: " + reader.getVersion()); final IndexSearcher searcher = new IndexSearcher(reader); final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29); QueryParser parser = new CustomQueryParser("all", analyzer); parser.setDefaultOperator(QueryParser.Operator.AND); Query query = parser.parse(searchText); LOG.info("query =" + query.toString()); //search the query Collector collector = new Collector() { private int docBase = 0; @Override public void setScorer(Scorer scorer) throws IOException { } @Override public void collect(int doc) throws IOException { int scoreId = doc + docBase; Document document = searcher.doc(scoreId); final String stringValue = document.getField("id").stringValue(); int docId = Integer.parseInt(stringValue); LOG.fine("doc id " + stringValue + " matches the search."); toReturn.add(nr.get(docId, false)); } @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { this.docBase = docBase; } @Override public boolean acceptsDocsOutOfOrder() { return true; } }; searcher.search(query, collector); searcher.close(); } catch (ParseException ex) { Exceptions.printStackTrace(ex); } catch (CorruptIndexException ex) { Exceptions.printStackTrace(ex); } catch (IOException ex) { Exceptions.printStackTrace(ex); } catch (IllegalStateException ex) { LOG.info("caught " + ex.getMessage() + ". Most likely the app is shutting down"); } long delta = System.currentTimeMillis() - start; Installer.mbean.sampleSearchTime(delta); LOG.info("find took " + delta / 1000.0 + " secs. " + toReturn.size() + " results found"); return toReturn; }
From source file:com.scsb.crpro.lucene.SearchFiles.java
License:Apache License
/** * This method uses a custom HitCollector implementation which simply prints out * the docId and score of every matching document. * //from www .jav a 2s . c om * This simulates the streaming search use case, where all hits are supposed to * be processed, regardless of their relevance. */ public static void doStreamingSearch(final Searcher searcher, Query query) throws IOException { Collector streamingHitCollector = new Collector() { private Scorer scorer; private int docBase; // simply print docId and score of every matching document public void collect(int doc) throws IOException { System.out.println("doc=" + doc + docBase + " score=" + scorer.score()); } public boolean acceptsDocsOutOfOrder() { return true; } public void setNextReader(IndexReader reader, int docBase) throws IOException { this.docBase = docBase; } public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } }; searcher.search(query, streamingHitCollector); }
From source file:cz.muni.fi.japanesedictionary.engine.FragmentListAsyncTask.java
License:Open Source License
/** * Loads translation using Lucene//from w w w. jav a 2s .co m */ @Override protected List<Translation> doInBackground(String... params) { String expression = params[0]; String part = params[1]; SharedPreferences settings = mContext.getSharedPreferences(ParserService.DICTIONARY_PREFERENCES, 0); String pathToDictionary = settings.getString(Const.PREF_JMDICT_PATH, null); SharedPreferences sharedPrefs = PreferenceManager.getDefaultSharedPreferences(mContext); final boolean englishBool = sharedPrefs.getBoolean("language_english", false); final boolean frenchBool = sharedPrefs.getBoolean("language_french", false); final boolean dutchBool = sharedPrefs.getBoolean("language_dutch", false); final boolean germanBool = sharedPrefs.getBoolean("language_german", false); final boolean russianBool = sharedPrefs.getBoolean("language_russian", false); final boolean searchOnlyFavorised = sharedPrefs.getBoolean("search_only_favorite", false); final boolean searchDeinflected = sharedPrefs.getBoolean("search_deinflected", false); final List<Translation> translations = new ArrayList<>(); if (expression == null) { // first run Log.i(LOG_TAG, "First run - last 10 translations "); GlossaryReaderContract database = new GlossaryReaderContract(mContext); List<Translation> translationsTemp = database.getLastTranslations(10); database.close(); return translationsTemp; } if (pathToDictionary == null) { Log.e(LOG_TAG, "No path to jmdict dictionary"); return null; } File file = new File(pathToDictionary); if (!file.exists() || !file.canRead()) { Log.e(LOG_TAG, "Can't read jmdict dictionary directory"); return null; } if (expression.length() < 1) { Log.w(LOG_TAG, "No expression to translate"); return null; } Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_36); IndexReader reader; try { final String search; final String hiragana; boolean onlyReb = false; if (Pattern.matches("\\p{Latin}*", expression)) { // only romaji onlyReb = true; Log.i(LOG_TAG, "Only latin letters, converting to hiragana. "); expression = TranscriptionConverter.kunreiToHepburn(expression); expression = RomanizationEnum.Hepburn.toHiragana(expression); } hiragana = expression; expression = insertSpaces(expression); switch (part) { case "end": search = "\"" + expression + "lucenematch\""; break; case "beginning": search = "\"lucenematch " + expression + "\""; break; case "middle": search = "\"" + expression + "\""; break; default: if (searchDeinflected) { StringBuilder sb = new StringBuilder("\"lucenematch " + expression + "lucenematch\""); for (Predicate predicate : Deconjugator.deconjugate(hiragana)) { if (predicate.isSuru()) { sb.append(" OR ").append("(\"lucenematch ") .append(insertSpaces(predicate.getPredicate())) .append("lucenematch\" AND (pos:vs OR pos:vs-c OR pos:vs-s OR pos:vs-i))"); } else if (predicate.isKuru()) { sb.append(" OR ").append("(\"lucenematch ") .append(insertSpaces(predicate.getPredicate())) .append("lucenematch\" AND pos:vk)"); } else if (predicate.isIku()) { sb.append(" OR ").append("(\"lucenematch ") .append(insertSpaces(predicate.getPredicate())) .append("lucenematch\" AND pos:v5k-s)"); } else if (predicate.isIAdjective()) { sb.append(" OR ").append("(\"lucenematch ") .append(insertSpaces(predicate.getPredicate())) .append("lucenematch\" AND pos:adj-i)"); } else sb.append(" OR ").append("(\"lucenematch ") .append(insertSpaces(predicate.getPredicate())) .append("lucenematch\" AND (pos:v1 OR pos:v2 OR pos:v5 OR pos:vz OR pos:vi OR pos:vn OR pos:vr))"); } search = sb.toString(); } else { search = "\"lucenematch " + expression + "lucenematch\""; } } Log.i(LOG_TAG, " Searching for: " + search); Query q; if (onlyReb) { q = (new QueryParser(Version.LUCENE_36, "index_japanese_reb", analyzer)).parse(search); } else { StandardQueryParser parser = new StandardQueryParser(analyzer); q = parser.parse(search, "japanese"); } Directory dir = FSDirectory.open(file); reader = IndexReader.open(dir); final IndexSearcher searcher = new IndexSearcher(reader); Collector collector = new Collector() { int max = 1000; int count = 0; private int docBase; @Override public boolean acceptsDocsOutOfOrder() { return true; } @Override public void collect(int docID) throws IOException { Document d = searcher.doc(docID + docBase); Translation translation = new Translation(); String prioritized = d.get("prioritized"); if (searchOnlyFavorised && prioritized == null) { return; } if (prioritized != null) { //is prioritized translation.setPrioritized(true); } String ruby = d.get("ruby"); if (ruby != null && ruby.length() > 0) { translation.setRuby(ruby); } String japanese_keb = d.get("japanese_keb"); if (japanese_keb != null && japanese_keb.length() != 0) { translation.parseJapaneseKeb(japanese_keb); } String japanese_reb = d.get("japanese_reb"); if (japanese_reb != null && japanese_reb.length() != 0) { translation.parseJapaneseReb(japanese_reb); } String english = d.get("english"); if (english != null && english.length() != 0) { translation.parseEnglish(english); } String french = d.get("french"); if (french != null && french.length() != 0) { translation.parseFrench(french); } String dutch = d.get("dutch"); if (dutch != null && dutch.length() != 0) { translation.parseDutch(dutch); } String german = d.get("german"); if (german != null && german.length() != 0) { translation.parseGerman(german); } String russian = d.get("russian"); if (russian != null && russian.length() != 0) { translation.parseRussian(russian); } if ((englishBool && translation.getEnglishSense() != null) || (dutchBool && translation.getDutchSense() != null) || (germanBool && translation.getGermanSense() != null) || (frenchBool && translation.getFrenchSense() != null) || (russianBool && translation.getRussianSense() != null)) { count++; if (count < max) { if (!FragmentListAsyncTask.this.isCancelled()) { FragmentListAsyncTask.this.publishProgress(translation); translations.add(translation); } else { translations.clear(); throw new IOException("Loader canceled"); } } else { throw new IOException("Max exceeded"); } } } @Override public void setNextReader(IndexReader reader, int docBas) throws IOException { docBase = docBas; } @Override public void setScorer(Scorer arg0) throws IOException { } }; searcher.search(q, collector); reader.close(); } catch (IOException ex) { Log.e(LOG_TAG, "IO Exception: " + ex.toString()); return translations; } catch (Exception ex) { Log.e(LOG_TAG, "Exception: " + ex.toString()); return null; } return translations.isEmpty() ? null : translations; }
From source file:de.walware.statet.r.internal.core.rhelp.index.REnvIndexReader.java
License:Open Source License
public List<RHelpTopicEntry> getPackageTopics(final IRPkgHelp pkgHelp) { final List<RHelpTopicEntry> list = new ArrayList<>(64); try {/* w w w. java2s . co m*/ final BooleanQuery q = new BooleanQuery(true); q.add(DOCTYPE_PAGE_QUERY, Occur.MUST); q.add(new TermQuery(new Term(PACKAGE_FIELD_NAME, pkgHelp.getName())), Occur.MUST); this.indexSearcher.search(q, new Collector() { private Scorer scorer; private AtomicReader reader; private int docBase; @Override public void setScorer(final Scorer scorer) throws IOException { this.scorer = scorer; } @Override public boolean acceptsDocsOutOfOrder() { return true; } @Override public void setNextReader(final AtomicReaderContext context) throws IOException { this.reader = context.reader(); this.docBase = context.docBase; } @Override public void collect(final int doc) throws IOException { if (this.scorer.score() > 0.0f) { // TODO: reader#document not recommend final Document document = this.reader.document(doc, LOAD_PKG_TOPICS_SELECTOR); final String pageName = document.get(PAGE_FIELD_NAME); final IRHelpPage page = pkgHelp.getHelpPage(pageName); final String[] topics = document.getValues(ALIAS_FIELD_NAME); for (int i = 0; i < topics.length; i++) { list.add(new RHelpTopicEntry(topics[i], page)); } } } }); Collections.sort(list); return list; } catch (final Exception e) { RCorePlugin.log( new Status(IStatus.ERROR, RCore.PLUGIN_ID, -1, NLS.bind("An error occurred in search: {0}.", //$NON-NLS-1$ getPackageTopicsDescription(pkgHelp.getName())), e)); throw new RuntimeException("R help index search error."); } }
From source file:edu.ku.brc.specify.tools.LocalizerSearchHelper.java
License:Open Source License
/** * This method uses a custom HitCollector implementation which simply prints out * the docId and score of every matching document. * //from ww w . ja v a2s. c o m * This simulates the streaming search use case, where all hits are supposed to * be processed, regardless of their relevance. */ public void doStreamingSearch(final IndexSearcher searcher, final Query query) throws IOException { Collector streamingHitCollector = new Collector() { private Scorer scorer; /* (non-Javadoc) * @see org.apache.lucene.search.Collector#setNextReader(org.apache.lucene.index.AtomicReaderContext) */ @Override public void setNextReader(AtomicReaderContext arg0) throws IOException { this.docBase = arg0.docBase; } private int docBase; // simply print docId and score of every matching document @Override public void collect(int doc) throws IOException { log.debug("doc=" + doc + docBase + " score=" + scorer.score()); } @Override public boolean acceptsDocsOutOfOrder() { return true; } @Override public void setScorer(final Scorer scorer) throws IOException { this.scorer = scorer; } }; searcher.search(query, streamingHitCollector); }
From source file:edu.ku.brc.specify.tools.schemalocale.LocalizerApp.java
License:Open Source License
/** * This method uses a custom HitCollector implementation which simply prints out * the docId and score of every matching document. * // www . j a va 2s .c o m * This simulates the streaming search use case, where all hits are supposed to * be processed, regardless of their relevance. */ public void doStreamingSearch(final IndexSearcher searcher, Query query) throws IOException { Collector streamingHitCollector = new Collector() { private Scorer scorer; private int docBase; // simply print docId and score of every matching document @Override public void collect(int doc) throws IOException { System.out.println("doc=" + doc + docBase + " score=" + scorer.score()); } /* (non-Javadoc) * @see org.apache.lucene.search.Collector#setNextReader(org.apache.lucene.index.AtomicReaderContext) */ @Override public void setNextReader(AtomicReaderContext arg0) throws IOException { this.docBase = arg0.docBase; } @Override public boolean acceptsDocsOutOfOrder() { return true; } @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } }; searcher.search(query, streamingHitCollector); }
From source file:edu.wayne.cs.severe.ir4se.lucene.SearchFiles.java
License:Apache License
/** * This method uses a custom HitCollector implementation which simply prints * out the docId and score of every matching document. * /*from w w w.j av a 2s .c o m*/ * This simulates the streaming search use case, where all hits are supposed * to be processed, regardless of their relevance. */ public static void doStreamingSearch(final IndexSearcher searcher, Query query) throws IOException { Collector streamingHitCollector = new Collector() { private Scorer scorer; private int i = 0; private int docBase; // simply print docId and score of every matching document public void collect(int docNumber) throws IOException { try { } catch (Exception e) { e.printStackTrace(); } } public boolean acceptsDocsOutOfOrder() { return true; } public void setNextReader(IndexReader reader, int docBase) throws IOException { this.docBase = docBase; } public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } }; searcher.search(query, streamingHitCollector); }
From source file:gov.nasa.ensemble.core.plan.editor.search.PlanSearcher.java
License:Open Source License
/** * Search function to populate results vector based on vector of queries. * //from ww w. j av a2 s . c om * @return results from search */ public Vector<Integer> zearch() { clearResults(); long t0 = System.currentTimeMillis(); if (queries == null || queries.size() == 0) { int max = 0; try { max = searcher.maxDoc(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } for (int i = 0; i < max; i++) { results.add(new Integer(i)); } return results; } BooleanQuery bQuery = new BooleanQuery(); int size = queries.size(); for (int i = 0; i < size; i++) { switch (booleanOps[i]) { case PlanSearchInput.AND: bQuery.add(queries.get(i), BooleanClause.Occur.MUST); break; case PlanSearchInput.OR: bQuery.add(queries.get(i), BooleanClause.Occur.SHOULD); break; case PlanSearchInput.NOT: bQuery.add(queries.get(i), BooleanClause.Occur.MUST_NOT); break; } } try { searcher.search(bQuery, new Collector() { @Override public void setScorer(Scorer scorer) throws IOException { // TODO Auto-generated method stub } @Override public void collect(int doc) throws IOException { results.add(new Integer(doc)); } @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { // TODO Auto-generated method stub } @Override public boolean acceptsDocsOutOfOrder() { // TODO Auto-generated method stub return false; } }); } catch (Exception e) { System.out.println("bad search: " + e.getMessage()); } long t1 = System.currentTimeMillis(); LogUtil.debug("search for " + bQuery + " took: " + String.valueOf((double) (t1 - t0) / 1000) + " seconds"); return results; }