Example usage for org.apache.lucene.search Collector Collector

List of usage examples for org.apache.lucene.search Collector Collector

Introduction

In this page you can find the example usage for org.apache.lucene.search Collector Collector.

Prototype

Collector

Source Link

Usage

From source file:aplicacion.sistema.indexer.test.SearchFiles.java

License:Apache License

/**
 * This method uses a custom HitCollector implementation which simply prints out
 * the docId and score of every matching document. 
 * // w  w w.ja v a  2  s. c o  m
 *  This simulates the streaming search use case, where all hits are supposed to
 *  be processed, regardless of their relevance.
 */
public static void doStreamingSearch(final Searcher searcher, Query query) throws IOException {
    Collector streamingHitCollector = new Collector() {
        private Scorer scorer;
        private int docBase;

        // simply print docId and score of every matching document
        @Override
        public void collect(int doc) throws IOException {
            System.out.println("doc=" + doc + docBase + " score=" + scorer.score());
        }

        @Override
        public boolean acceptsDocsOutOfOrder() {
            return true;
        }

        @Override
        public void setNextReader(IndexReader reader, int docBase) throws IOException {
            this.docBase = docBase;
        }

        @Override
        public void setScorer(Scorer scorer) throws IOException {
            this.scorer = scorer;
        }

    };

    searcher.search(query, streamingHitCollector);
}

From source file:cc.osint.graphd.graph.Graph.java

License:Apache License

public List<JSONObject> query(IndexSearcher indexSearcher, String queryStr) throws Exception {
    long start_t = System.currentTimeMillis();
    final List<JSONObject> results = new ArrayList<JSONObject>();
    QueryParser qp = new QueryParser(Version.LUCENE_31, KEY_FIELD, analyzer);
    qp.setDefaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.AND);
    qp.setAllowLeadingWildcard(true);/*from   ww  w  .  ja va 2 s  . co  m*/
    Query query = qp.parse(queryStr);
    org.apache.lucene.search.Filter filter = new org.apache.lucene.search.CachingWrapperFilter(
            new QueryWrapperFilter(query));

    indexSearcher.search(new MatchAllDocsQuery(), filter, new Collector() {
        private int docBase;
        IndexReader reader;

        // ignore scoring
        public void setScorer(Scorer scorer) {
        }

        // accept docs out of order
        public boolean acceptsDocsOutOfOrder() {
            return true;
        }

        public void collect(int doc) {
            try {
                Document d = reader.document(doc);
                JSONObject result = new JSONObject();
                for (Fieldable f : d.getFields()) {
                    result.put(f.name(), d.get(f.name()));
                }
                results.add(result);
            } catch (Exception ex) {
                ex.printStackTrace();
            }
        }

        public void setNextReader(IndexReader reader, int docBase) {
            this.reader = reader;
            this.docBase = docBase;
        }
    });
    long end_t = System.currentTimeMillis();
    //log.info("query: hits.scoreDocs.length = " + results.size() + " (" + (end_t-start_t) + "ms)");
    return results;
}

From source file:com.rubenlaguna.en4j.searchlucene.NoteFinderLuceneImpl.java

License:Open Source License

public Collection<Note> find(String searchText) {
    if ("".equals(searchText.trim())) {
        return Collections.EMPTY_LIST;
    }//w  w  w  .j a  va2  s  . co m
    long start = System.currentTimeMillis();
    searchText = searchText.trim();
    String patternStr = "\\s+";
    String replaceStr = "* ";
    Pattern pattern = Pattern.compile(patternStr);
    Matcher matcher = pattern.matcher(searchText);
    searchText = matcher.replaceAll(replaceStr);
    if (Pattern.matches(".*\\w$", searchText)) {
        searchText = searchText + "*";
    }

    LOG.info("search text:" + searchText);
    final Collection<Note> toReturn = new ArrayList<Note>();

    try {
        IndexReader newReader = reader.reopen();
        if (newReader != reader) {
            reader.close();
        }
        reader = newReader;
        LOG.info("using index version: " + reader.getVersion());
        final IndexSearcher searcher = new IndexSearcher(reader);

        final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29);
        QueryParser parser = new CustomQueryParser("all", analyzer);
        parser.setDefaultOperator(QueryParser.Operator.AND);

        Query query = parser.parse(searchText);
        LOG.info("query =" + query.toString());
        //search the query
        Collector collector = new Collector() {

            private int docBase = 0;

            @Override
            public void setScorer(Scorer scorer) throws IOException {
            }

            @Override
            public void collect(int doc) throws IOException {
                int scoreId = doc + docBase;
                Document document = searcher.doc(scoreId);
                final String stringValue = document.getField("id").stringValue();
                int docId = Integer.parseInt(stringValue);
                LOG.fine("doc id " + stringValue + " matches the search.");
                toReturn.add(nr.get(docId, false));
            }

            @Override
            public void setNextReader(IndexReader reader, int docBase) throws IOException {
                this.docBase = docBase;
            }

            @Override
            public boolean acceptsDocsOutOfOrder() {
                return true;
            }
        };
        searcher.search(query, collector);
        searcher.close();
    } catch (ParseException ex) {
        Exceptions.printStackTrace(ex);
    } catch (CorruptIndexException ex) {
        Exceptions.printStackTrace(ex);
    } catch (IOException ex) {
        Exceptions.printStackTrace(ex);
    } catch (IllegalStateException ex) {
        LOG.info("caught " + ex.getMessage() + ". Most likely the app is shutting down");
    }
    long delta = System.currentTimeMillis() - start;
    Installer.mbean.sampleSearchTime(delta);
    LOG.info("find took " + delta / 1000.0 + " secs. " + toReturn.size() + " results found");
    return toReturn;
}

From source file:com.scsb.crpro.lucene.SearchFiles.java

License:Apache License

/**
 * This method uses a custom HitCollector implementation which simply prints out
 * the docId and score of every matching document. 
 * //from www  .jav a  2s .  c  om
 *  This simulates the streaming search use case, where all hits are supposed to
 *  be processed, regardless of their relevance.
 */
public static void doStreamingSearch(final Searcher searcher, Query query) throws IOException {
    Collector streamingHitCollector = new Collector() {
        private Scorer scorer;
        private int docBase;

        // simply print docId and score of every matching document
        public void collect(int doc) throws IOException {
            System.out.println("doc=" + doc + docBase + " score=" + scorer.score());
        }

        public boolean acceptsDocsOutOfOrder() {
            return true;
        }

        public void setNextReader(IndexReader reader, int docBase) throws IOException {
            this.docBase = docBase;
        }

        public void setScorer(Scorer scorer) throws IOException {
            this.scorer = scorer;
        }

    };

    searcher.search(query, streamingHitCollector);
}

From source file:cz.muni.fi.japanesedictionary.engine.FragmentListAsyncTask.java

License:Open Source License

/**
 * Loads translation using Lucene//from   w  w w.  jav a 2s .co m
 */
@Override
protected List<Translation> doInBackground(String... params) {
    String expression = params[0];
    String part = params[1];

    SharedPreferences settings = mContext.getSharedPreferences(ParserService.DICTIONARY_PREFERENCES, 0);
    String pathToDictionary = settings.getString(Const.PREF_JMDICT_PATH, null);
    SharedPreferences sharedPrefs = PreferenceManager.getDefaultSharedPreferences(mContext);
    final boolean englishBool = sharedPrefs.getBoolean("language_english", false);
    final boolean frenchBool = sharedPrefs.getBoolean("language_french", false);
    final boolean dutchBool = sharedPrefs.getBoolean("language_dutch", false);
    final boolean germanBool = sharedPrefs.getBoolean("language_german", false);
    final boolean russianBool = sharedPrefs.getBoolean("language_russian", false);
    final boolean searchOnlyFavorised = sharedPrefs.getBoolean("search_only_favorite", false);
    final boolean searchDeinflected = sharedPrefs.getBoolean("search_deinflected", false);

    final List<Translation> translations = new ArrayList<>();

    if (expression == null) {
        // first run
        Log.i(LOG_TAG, "First run - last 10 translations ");
        GlossaryReaderContract database = new GlossaryReaderContract(mContext);
        List<Translation> translationsTemp = database.getLastTranslations(10);
        database.close();
        return translationsTemp;
    }

    if (pathToDictionary == null) {
        Log.e(LOG_TAG, "No path to jmdict dictionary");
        return null;
    }
    File file = new File(pathToDictionary);
    if (!file.exists() || !file.canRead()) {
        Log.e(LOG_TAG, "Can't read jmdict dictionary directory");
        return null;
    }

    if (expression.length() < 1) {
        Log.w(LOG_TAG, "No expression to translate");
        return null;
    }
    Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_36);

    IndexReader reader;
    try {
        final String search;
        final String hiragana;
        boolean onlyReb = false;

        if (Pattern.matches("\\p{Latin}*", expression)) {
            // only romaji
            onlyReb = true;
            Log.i(LOG_TAG, "Only latin letters, converting to hiragana. ");
            expression = TranscriptionConverter.kunreiToHepburn(expression);
            expression = RomanizationEnum.Hepburn.toHiragana(expression);
        }
        hiragana = expression;

        expression = insertSpaces(expression);

        switch (part) {
        case "end":
            search = "\"" + expression + "lucenematch\"";
            break;
        case "beginning":
            search = "\"lucenematch " + expression + "\"";
            break;
        case "middle":
            search = "\"" + expression + "\"";
            break;
        default:
            if (searchDeinflected) {
                StringBuilder sb = new StringBuilder("\"lucenematch " + expression + "lucenematch\"");
                for (Predicate predicate : Deconjugator.deconjugate(hiragana)) {
                    if (predicate.isSuru()) {
                        sb.append(" OR ").append("(\"lucenematch ")
                                .append(insertSpaces(predicate.getPredicate()))
                                .append("lucenematch\" AND (pos:vs OR pos:vs-c OR pos:vs-s OR pos:vs-i))");
                    } else if (predicate.isKuru()) {
                        sb.append(" OR ").append("(\"lucenematch ")
                                .append(insertSpaces(predicate.getPredicate()))
                                .append("lucenematch\" AND pos:vk)");
                    } else if (predicate.isIku()) {
                        sb.append(" OR ").append("(\"lucenematch ")
                                .append(insertSpaces(predicate.getPredicate()))
                                .append("lucenematch\" AND pos:v5k-s)");
                    } else if (predicate.isIAdjective()) {
                        sb.append(" OR ").append("(\"lucenematch ")
                                .append(insertSpaces(predicate.getPredicate()))
                                .append("lucenematch\" AND pos:adj-i)");
                    } else
                        sb.append(" OR ").append("(\"lucenematch ")
                                .append(insertSpaces(predicate.getPredicate()))
                                .append("lucenematch\" AND (pos:v1 OR pos:v2 OR pos:v5 OR pos:vz OR pos:vi OR pos:vn OR pos:vr))");
                }
                search = sb.toString();
            } else {
                search = "\"lucenematch " + expression + "lucenematch\"";
            }
        }
        Log.i(LOG_TAG, " Searching for: " + search);

        Query q;
        if (onlyReb) {
            q = (new QueryParser(Version.LUCENE_36, "index_japanese_reb", analyzer)).parse(search);
        } else {
            StandardQueryParser parser = new StandardQueryParser(analyzer);
            q = parser.parse(search, "japanese");
        }

        Directory dir = FSDirectory.open(file);
        reader = IndexReader.open(dir);
        final IndexSearcher searcher = new IndexSearcher(reader);
        Collector collector = new Collector() {
            int max = 1000;
            int count = 0;
            private int docBase;

            @Override
            public boolean acceptsDocsOutOfOrder() {
                return true;
            }

            @Override
            public void collect(int docID) throws IOException {
                Document d = searcher.doc(docID + docBase);
                Translation translation = new Translation();
                String prioritized = d.get("prioritized");
                if (searchOnlyFavorised && prioritized == null) {
                    return;
                }
                if (prioritized != null) {
                    //is prioritized
                    translation.setPrioritized(true);
                }

                String ruby = d.get("ruby");

                if (ruby != null && ruby.length() > 0) {
                    translation.setRuby(ruby);
                }

                String japanese_keb = d.get("japanese_keb");
                if (japanese_keb != null && japanese_keb.length() != 0) {
                    translation.parseJapaneseKeb(japanese_keb);
                }

                String japanese_reb = d.get("japanese_reb");
                if (japanese_reb != null && japanese_reb.length() != 0) {
                    translation.parseJapaneseReb(japanese_reb);
                }

                String english = d.get("english");
                if (english != null && english.length() != 0) {
                    translation.parseEnglish(english);
                }

                String french = d.get("french");
                if (french != null && french.length() != 0) {
                    translation.parseFrench(french);
                }

                String dutch = d.get("dutch");
                if (dutch != null && dutch.length() != 0) {
                    translation.parseDutch(dutch);
                }

                String german = d.get("german");
                if (german != null && german.length() != 0) {
                    translation.parseGerman(german);
                }

                String russian = d.get("russian");
                if (russian != null && russian.length() != 0) {
                    translation.parseRussian(russian);
                }

                if ((englishBool && translation.getEnglishSense() != null)
                        || (dutchBool && translation.getDutchSense() != null)
                        || (germanBool && translation.getGermanSense() != null)
                        || (frenchBool && translation.getFrenchSense() != null)
                        || (russianBool && translation.getRussianSense() != null)) {

                    count++;
                    if (count < max) {
                        if (!FragmentListAsyncTask.this.isCancelled()) {
                            FragmentListAsyncTask.this.publishProgress(translation);
                            translations.add(translation);
                        } else {
                            translations.clear();
                            throw new IOException("Loader canceled");
                        }
                    } else {
                        throw new IOException("Max exceeded");
                    }
                }
            }

            @Override
            public void setNextReader(IndexReader reader, int docBas) throws IOException {
                docBase = docBas;
            }

            @Override
            public void setScorer(Scorer arg0) throws IOException {
            }

        };

        searcher.search(q, collector);
        reader.close();
    } catch (IOException ex) {
        Log.e(LOG_TAG, "IO Exception:  " + ex.toString());
        return translations;
    } catch (Exception ex) {
        Log.e(LOG_TAG, "Exception: " + ex.toString());
        return null;
    }

    return translations.isEmpty() ? null : translations;
}

From source file:de.walware.statet.r.internal.core.rhelp.index.REnvIndexReader.java

License:Open Source License

public List<RHelpTopicEntry> getPackageTopics(final IRPkgHelp pkgHelp) {
    final List<RHelpTopicEntry> list = new ArrayList<>(64);
    try {/* w w  w.  java2s . co m*/
        final BooleanQuery q = new BooleanQuery(true);
        q.add(DOCTYPE_PAGE_QUERY, Occur.MUST);
        q.add(new TermQuery(new Term(PACKAGE_FIELD_NAME, pkgHelp.getName())), Occur.MUST);
        this.indexSearcher.search(q, new Collector() {

            private Scorer scorer;

            private AtomicReader reader;
            private int docBase;

            @Override
            public void setScorer(final Scorer scorer) throws IOException {
                this.scorer = scorer;
            }

            @Override
            public boolean acceptsDocsOutOfOrder() {
                return true;
            }

            @Override
            public void setNextReader(final AtomicReaderContext context) throws IOException {
                this.reader = context.reader();
                this.docBase = context.docBase;
            }

            @Override
            public void collect(final int doc) throws IOException {
                if (this.scorer.score() > 0.0f) {
                    // TODO: reader#document not recommend
                    final Document document = this.reader.document(doc, LOAD_PKG_TOPICS_SELECTOR);
                    final String pageName = document.get(PAGE_FIELD_NAME);
                    final IRHelpPage page = pkgHelp.getHelpPage(pageName);
                    final String[] topics = document.getValues(ALIAS_FIELD_NAME);
                    for (int i = 0; i < topics.length; i++) {
                        list.add(new RHelpTopicEntry(topics[i], page));
                    }
                }
            }

        });
        Collections.sort(list);
        return list;
    } catch (final Exception e) {
        RCorePlugin.log(
                new Status(IStatus.ERROR, RCore.PLUGIN_ID, -1, NLS.bind("An error occurred in search: {0}.", //$NON-NLS-1$
                        getPackageTopicsDescription(pkgHelp.getName())), e));
        throw new RuntimeException("R help index search error.");
    }
}

From source file:edu.ku.brc.specify.tools.LocalizerSearchHelper.java

License:Open Source License

/**
 * This method uses a custom HitCollector implementation which simply prints out
 * the docId and score of every matching document. 
 * //from   ww  w . ja v  a2s. c  o  m
 *  This simulates the streaming search use case, where all hits are supposed to
 *  be processed, regardless of their relevance.
 */
public void doStreamingSearch(final IndexSearcher searcher, final Query query) throws IOException {
    Collector streamingHitCollector = new Collector() {
        private Scorer scorer;

        /* (non-Javadoc)
        * @see org.apache.lucene.search.Collector#setNextReader(org.apache.lucene.index.AtomicReaderContext)
        */
        @Override
        public void setNextReader(AtomicReaderContext arg0) throws IOException {
            this.docBase = arg0.docBase;
        }

        private int docBase;

        // simply print docId and score of every matching document
        @Override
        public void collect(int doc) throws IOException {
            log.debug("doc=" + doc + docBase + " score=" + scorer.score());
        }

        @Override
        public boolean acceptsDocsOutOfOrder() {
            return true;
        }

        @Override
        public void setScorer(final Scorer scorer) throws IOException {
            this.scorer = scorer;
        }

    };

    searcher.search(query, streamingHitCollector);
}

From source file:edu.ku.brc.specify.tools.schemalocale.LocalizerApp.java

License:Open Source License

/**
 * This method uses a custom HitCollector implementation which simply prints out
 * the docId and score of every matching document. 
 * // www  . j  a  va 2s  .c  o m
 *  This simulates the streaming search use case, where all hits are supposed to
 *  be processed, regardless of their relevance.
 */
public void doStreamingSearch(final IndexSearcher searcher, Query query) throws IOException {
    Collector streamingHitCollector = new Collector() {
        private Scorer scorer;
        private int docBase;

        // simply print docId and score of every matching document
        @Override
        public void collect(int doc) throws IOException {
            System.out.println("doc=" + doc + docBase + " score=" + scorer.score());
        }

        /* (non-Javadoc)
        * @see org.apache.lucene.search.Collector#setNextReader(org.apache.lucene.index.AtomicReaderContext)
        */
        @Override
        public void setNextReader(AtomicReaderContext arg0) throws IOException {
            this.docBase = arg0.docBase;
        }

        @Override
        public boolean acceptsDocsOutOfOrder() {
            return true;
        }

        @Override
        public void setScorer(Scorer scorer) throws IOException {
            this.scorer = scorer;
        }

    };

    searcher.search(query, streamingHitCollector);
}

From source file:edu.wayne.cs.severe.ir4se.lucene.SearchFiles.java

License:Apache License

/**
 * This method uses a custom HitCollector implementation which simply prints
 * out the docId and score of every matching document.
 * /*from w w w.j  av a  2s .c  o m*/
 * This simulates the streaming search use case, where all hits are supposed
 * to be processed, regardless of their relevance.
 */
public static void doStreamingSearch(final IndexSearcher searcher, Query query) throws IOException {
    Collector streamingHitCollector = new Collector() {
        private Scorer scorer;
        private int i = 0;
        private int docBase;

        // simply print docId and score of every matching document
        public void collect(int docNumber) throws IOException {
            try {

            } catch (Exception e) {
                e.printStackTrace();
            }
        }

        public boolean acceptsDocsOutOfOrder() {
            return true;
        }

        public void setNextReader(IndexReader reader, int docBase) throws IOException {
            this.docBase = docBase;
        }

        public void setScorer(Scorer scorer) throws IOException {
            this.scorer = scorer;
        }
    };

    searcher.search(query, streamingHitCollector);

}

From source file:gov.nasa.ensemble.core.plan.editor.search.PlanSearcher.java

License:Open Source License

/**
 * Search function to populate results vector based on vector of queries.
 * //from   ww w.  j  av a2  s  .  c  om
 * @return results from search
 */
public Vector<Integer> zearch() {
    clearResults();
    long t0 = System.currentTimeMillis();
    if (queries == null || queries.size() == 0) {
        int max = 0;

        try {
            max = searcher.maxDoc();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        for (int i = 0; i < max; i++) {
            results.add(new Integer(i));
        }

        return results;
    }

    BooleanQuery bQuery = new BooleanQuery();
    int size = queries.size();

    for (int i = 0; i < size; i++) {
        switch (booleanOps[i]) {
        case PlanSearchInput.AND:
            bQuery.add(queries.get(i), BooleanClause.Occur.MUST);
            break;
        case PlanSearchInput.OR:
            bQuery.add(queries.get(i), BooleanClause.Occur.SHOULD);
            break;
        case PlanSearchInput.NOT:
            bQuery.add(queries.get(i), BooleanClause.Occur.MUST_NOT);
            break;
        }
    }

    try {
        searcher.search(bQuery, new Collector() {
            @Override
            public void setScorer(Scorer scorer) throws IOException {
                // TODO Auto-generated method stub

            }

            @Override
            public void collect(int doc) throws IOException {
                results.add(new Integer(doc));
            }

            @Override
            public void setNextReader(IndexReader reader, int docBase) throws IOException {
                // TODO Auto-generated method stub

            }

            @Override
            public boolean acceptsDocsOutOfOrder() {
                // TODO Auto-generated method stub
                return false;
            }
        });

    } catch (Exception e) {
        System.out.println("bad search: " + e.getMessage());
    }

    long t1 = System.currentTimeMillis();

    LogUtil.debug("search for " + bQuery + " took: " + String.valueOf((double) (t1 - t0) / 1000) + " seconds");

    return results;
}