Example usage for org.apache.lucene.search IndexSearcher search

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher search.

Prototype

protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException

Source Link

Document

Lower-level search API.

Usage

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.PersonalAndTrendingRecommender.java

License:Apache License

@Override
public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException {
    count = count / 2;/*from w  ww. j  a va 2s  .  co  m*/

    List<RecommendedNewsItem> results = super.recommend(userid, start, count);

    IndexSearcher searcher = null;
    try {
        Map<String, Double> terms = ratingsDao.getRatings(userid);
        Query query = buildQuery(terms);
        int hitsPerPage = start + count;

        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);

        Filter f1 = new UniqueResultsFilter(results);
        Filter f2 = new RecentFilter("timestamp", 1000 * 60 * 60 * 24);
        Filter f = new ChainedFilter(new Filter[] { f1, f2 }, ChainedFilter.AND);

        searcher = manager.acquire();
        manager.maybeRefresh();
        searcher.search(query, f, collector);

        ScoreDoc[] hits = collector.topDocs(start, count).scoreDocs;

        for (ScoreDoc s : hits) {
            int docId = s.doc;
            Document d = searcher.doc(docId);
            RecommendedNewsItem item = toNewsitem(d, docId, s.score, "personal");
            results.add(item);
        }
        //Collections.sort(results);
    } catch (RatingsDaoException | IOException ex) {
        logger.error(ex);
        throw new RecommendationException(ex);
    }
    return results;
}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.TopNRecommender.java

License:Apache License

@Override
public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException {
    IndexSearcher searcher = null;
    try {//from w w w  .  j av  a2s  .co m
        List<Long> ids = viewsDao.getNMostSeenArticles(start, start + count);
        Query query = buildQuery(ids);
        int hitsPerPage = count;

        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);

        Filter filter = new SeenArticlesFilter(viewsDao, userid);
        searcher = manager.acquire();
        searcher.search(query, filter, collector);

        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        int stop = (start + count < hits.length ? start + count : hits.length);
        List<RecommendedNewsItem> results = new ArrayList<>(stop - start);

        for (int i = start; i < stop; i++) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            results.add(toNewsitem(d, docId, hits[i].score, "topN"));
        }

        return results;

    } catch (ViewsDaoException | IOException ex) {

        throw new RecommendationException(ex);
    } finally {
        if (searcher != null) {
            try {
                manager.release(searcher);
            } catch (IOException ex) {
                logger.error(ex);
            }
            searcher = null;
        }
    }
}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.TrendingTopicRecommender.java

License:Apache License

@Override
public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException {
    IndexSearcher searcher = null;
    try {/*from w  w  w.ja  v  a  2 s.  c o  m*/
        String[] trends = trendsDao.getTrends(250);
        Query query = buildQuery(trends);
        int hitsPerPage = start + count;

        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);

        //Filter filter = new SeenArticlesFilter(viewsDao, userid);
        Filter f = new RecentFilter("timestamp", 1000 * 60 * 60 * 24);

        manager.maybeRefresh();
        searcher = manager.acquire();

        searcher.search(query, f, collector);

        ScoreDoc[] hits = collector.topDocs(start, count).scoreDocs;

        List<RecommendedNewsItem> results = new ArrayList<>(hits.length);

        for (ScoreDoc hit : hits) {
            int docId = hit.doc;
            Document d = searcher.doc(docId);
            RecommendedNewsItem item = toNewsitem(d, docId, hit.score, "trending");
            results.add(item);
        }

        return results;

    } catch (TrendsDaoException | IOException ex) {
        logger.error(ex);
        throw new RecommendationException(ex);
    } finally {
        try {
            if (searcher != null) {
                manager.release(searcher);
            }
        } catch (IOException ex) {
            logger.error(ex);
        }
        searcher = null;
    }
}

From source file:br.andrew.lucene.testing.SearchFiles.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(final String[] args) throws Exception {
    final String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);//  w w  w  .j  av a  2s  . c  o  m
    }

    String index = "index";
    String field = "contents";
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String queryString = null;
    int hitsPerPage = 10;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;
        } else if ("-field".equals(args[i])) {
            field = args[i + 1];
            i++;
        } else if ("-queries".equals(args[i])) {
            queries = args[i + 1];
            i++;
        } else if ("-query".equals(args[i])) {
            queryString = args[i + 1];
            i++;
        } else if ("-repeat".equals(args[i])) {
            repeat = Integer.parseInt(args[i + 1]);
            i++;
        } else if ("-raw".equals(args[i])) {
            raw = true;
        } else if ("-paging".equals(args[i])) {
            hitsPerPage = Integer.parseInt(args[i + 1]);
            if (hitsPerPage <= 0) {
                System.err.println("There must be at least 1 hit per page.");
                System.exit(1);
            }
            i++;
        }
    }

    final IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    final IndexSearcher searcher = new IndexSearcher(reader);
    final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);

    BufferedReader in = null;
    if (queries != null) {
        in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }
    final QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer);
    while (true) {
        if (queries == null && queryString == null) { // prompt the user
            System.out.println("Enter query: ");
        }

        String line = queryString != null ? queryString : in.readLine();

        if (line == null || line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }

        final Query query = parser.parse(line);
        System.out.println("Searching for: " + query.toString(field));

        if (repeat > 0) { // repeat & time as benchmark
            final Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                searcher.search(query, null, 100);
            }
            final Date end = new Date();
            System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        }

        SearchFiles.doPagingSearch(in, searcher, query, hitsPerPage, raw,
                queries == null && queryString == null);

        if (queryString != null) {
            break;
        }
    }
    reader.close();
}

From source file:bzh.terrevirtuelle.navisu.gazetteer.impl.lucene.GeoNameResolver.java

License:Apache License

private HashMap<String, List<Location>> resolveEntities(List<String> locationNames, int count,
        IndexReader reader) throws IOException {
    if (locationNames.size() >= 200) {
        hitsPerPage = 5; // avoid heavy computation
    }//from w  w  w  . j  ava  2  s. c  o  m
    IndexSearcher searcher = new IndexSearcher(reader);
    Query q = null;

    HashMap<String, List<Location>> allCandidates = new HashMap<String, List<Location>>();

    for (String name : locationNames) {

        if (!allCandidates.containsKey(name)) {
            try {
                //query is wrapped in additional quotes (") to avoid query tokenization on space
                q = new MultiFieldQueryParser(new String[] { FIELD_NAME_NAME, FIELD_NAME_ALTERNATE_NAMES },
                        analyzer).parse(String.format("\"%s\"", name));

                Sort sort = new Sort(populationSort);
                //Fetch 3 times desired values, these will be sorted on code and only desired number will be kept
                ScoreDoc[] hits = searcher.search(q, hitsPerPage * 3, sort).scoreDocs;

                getMatchingCandidates(searcher, allCandidates, name, hits);
            } catch (org.apache.lucene.queryparser.classic.ParseException e) {
                e.printStackTrace();
            }
        }
    }

    HashMap<String, List<Location>> resolvedEntities = new HashMap<String, List<Location>>();
    pickBestCandidates(resolvedEntities, allCandidates, count);
    return resolvedEntities;
}

From source file:cc.osint.graphd.graph.Graph.java

License:Apache License

public List<JSONObject> query(IndexSearcher indexSearcher, String queryStr) throws Exception {
    long start_t = System.currentTimeMillis();
    final List<JSONObject> results = new ArrayList<JSONObject>();
    QueryParser qp = new QueryParser(Version.LUCENE_31, KEY_FIELD, analyzer);
    qp.setDefaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.AND);
    qp.setAllowLeadingWildcard(true);/*from   w  w  w. ja  va 2 s  .  c  o  m*/
    Query query = qp.parse(queryStr);
    org.apache.lucene.search.Filter filter = new org.apache.lucene.search.CachingWrapperFilter(
            new QueryWrapperFilter(query));

    indexSearcher.search(new MatchAllDocsQuery(), filter, new Collector() {
        private int docBase;
        IndexReader reader;

        // ignore scoring
        public void setScorer(Scorer scorer) {
        }

        // accept docs out of order
        public boolean acceptsDocsOutOfOrder() {
            return true;
        }

        public void collect(int doc) {
            try {
                Document d = reader.document(doc);
                JSONObject result = new JSONObject();
                for (Fieldable f : d.getFields()) {
                    result.put(f.name(), d.get(f.name()));
                }
                results.add(result);
            } catch (Exception ex) {
                ex.printStackTrace();
            }
        }

        public void setNextReader(IndexReader reader, int docBase) {
            this.reader = reader;
            this.docBase = docBase;
        }
    });
    long end_t = System.currentTimeMillis();
    //log.info("query: hits.scoreDocs.length = " + results.size() + " (" + (end_t-start_t) + "ms)");
    return results;
}

From source file:cc.twittertools.search.local.RunQueries.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(// w  ww  .  j  a v  a2s. co  m
            OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return")
            .create(NUM_RESULTS_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("file containing topics in TREC format").create(QUERIES_OPTION));
    options.addOption(OptionBuilder.withArgName("similarity").hasArg()
            .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION));
    options.addOption(new Option(VERBOSE_OPTION, "print out complete document"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(RunQueries.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG;

    String topicsFile = cmdline.getOptionValue(QUERIES_OPTION);

    int numResults = 1000;
    try {
        if (cmdline.hasOption(NUM_RESULTS_OPTION)) {
            numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION));
        }
    } catch (NumberFormatException e) {
        System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION));
        System.exit(-1);
    }

    String similarity = "LM";
    if (cmdline.hasOption(SIMILARITY_OPTION)) {
        similarity = cmdline.getOptionValue(SIMILARITY_OPTION);
    }

    boolean verbose = cmdline.hasOption(VERBOSE_OPTION);

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    IndexSearcher searcher = new IndexSearcher(reader);

    if (similarity.equalsIgnoreCase("BM25")) {
        searcher.setSimilarity(new BM25Similarity());
    } else if (similarity.equalsIgnoreCase("LM")) {
        searcher.setSimilarity(new LMDirichletSimilarity(2500.0f));
    }

    QueryParser p = new QueryParser(Version.LUCENE_43, StatusField.TEXT.name, IndexStatuses.ANALYZER);

    TrecTopicSet topics = TrecTopicSet.fromFile(new File(topicsFile));
    for (TrecTopic topic : topics) {
        Query query = p.parse(topic.getQuery());
        Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, topic.getQueryTweetTime(),
                true, true);

        TopDocs rs = searcher.search(query, filter, numResults);

        int i = 1;
        for (ScoreDoc scoreDoc : rs.scoreDocs) {
            Document hit = searcher.doc(scoreDoc.doc);
            out.println(String.format("%s Q0 %s %d %f %s", topic.getId(),
                    hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag));
            if (verbose) {
                out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " "));
            }
            i++;
        }
    }
    reader.close();
    out.close();
}

From source file:cc.twittertools.search.local.SearchStatuses.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(/* w w  w  .  j  a  va2 s . c  o  m*/
            OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("query id").create(QID_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("query text").create(QUERY_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("maxid").create(MAX_ID_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return")
            .create(NUM_RESULTS_OPTION));
    options.addOption(OptionBuilder.withArgName("similarity").hasArg()
            .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION));
    options.addOption(new Option(VERBOSE_OPTION, "print out complete document"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(QUERY_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(SearchStatuses.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    String qid = cmdline.hasOption(QID_OPTION) ? cmdline.getOptionValue(QID_OPTION) : DEFAULT_QID;
    String queryText = cmdline.hasOption(QUERY_OPTION) ? cmdline.getOptionValue(QUERY_OPTION) : DEFAULT_Q;
    String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG;
    long maxId = cmdline.hasOption(MAX_ID_OPTION) ? Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION))
            : DEFAULT_MAX_ID;
    int numResults = cmdline.hasOption(NUM_RESULTS_OPTION)
            ? Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION))
            : DEFAULT_NUM_RESULTS;
    boolean verbose = cmdline.hasOption(VERBOSE_OPTION);

    String similarity = "LM";
    if (cmdline.hasOption(SIMILARITY_OPTION)) {
        similarity = cmdline.getOptionValue(SIMILARITY_OPTION);
    }

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    IndexSearcher searcher = new IndexSearcher(reader);

    if (similarity.equalsIgnoreCase("BM25")) {
        searcher.setSimilarity(new BM25Similarity());
    } else if (similarity.equalsIgnoreCase("LM")) {
        searcher.setSimilarity(new LMDirichletSimilarity(2500.0f));
    }

    QueryParser p = new QueryParser(Version.LUCENE_43, IndexStatuses.StatusField.TEXT.name,
            IndexStatuses.ANALYZER);
    Query query = p.parse(queryText);
    Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, maxId, true, true);

    TopDocs rs = searcher.search(query, filter, numResults);

    int i = 1;
    for (ScoreDoc scoreDoc : rs.scoreDocs) {
        Document hit = searcher.doc(scoreDoc.doc);

        out.println(String.format("%s Q0 %s %d %f %s", qid, hit.getField(StatusField.ID.name).numericValue(), i,
                scoreDoc.score, runtag));
        if (verbose) {
            out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " "));
        }
        i++;
    }

    reader.close();
    out.close();
}

From source file:ccc.plugins.search.lucene.SimpleLuceneFS.java

License:Open Source License

private void find(final String searchTerms, final int maxHits, final Sort sorter, final ACL userPerms,
        final CapturingHandler sh) {
    IndexSearcher searcher = null;

    try {/*from w w  w. j  a va2 s  .  c o  m*/
        searcher = new IndexSearcher(createDirectory());

        TopDocs docs;
        if (null == sorter) {
            docs = searcher.search(createParser().parse(searchTerms),
                    new AclFilter(ACL_FIELD, (null == userPerms) ? new ACL() : userPerms), maxHits);
        } else {
            docs = searcher.search(createParser().parse(searchTerms),
                    new AclFilter(ACL_FIELD, (null == userPerms) ? new ACL() : userPerms), maxHits, sorter);
        }

        sh.handle(searcher, docs);
    } catch (final IOException e) {
        LOG.warn("Error performing query.", e);
    } catch (final ParseException e) {
        LOG.warn("Error performing query.", e);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (final IOException e) {
                Exceptions.swallow(e);
            }
        }
    }
}

From source file:ch.admin.isb.hermes5.business.search.SearchEngine.java

License:Apache License

public List<SearchResult> search(String searchInput, String modelIdentifier, String lang) {
    DirectoryReader directoryReader = null;

    try {/*from   www .  j a  va2s .co m*/
        List<SearchResult> results = new ArrayList<SearchResult>();
        String queryString = buildQueryString(searchInput);
        if (queryString != null) {

            String searchIndexPath = searchIndexManager.getSearchIndexPath(modelIdentifier, lang);
            try {
                directoryReader = DirectoryReader.open(FSDirectory.open(new File(searchIndexPath)));
            } catch (Exception e) {
                logger.warn("No index files found at " + searchIndexPath + ". Will try to restore from S3");
                searchIndexManager.restoreIndexFilesFromS3(modelIdentifier, lang);
                directoryReader = DirectoryReader.open(FSDirectory.open(new File(searchIndexPath)));
            }
            Analyzer analyzer = analyserRepository.getAnalyzer(lang);
            IndexSearcher isearcher = new IndexSearcher(directoryReader);

            Query query = new QueryParser(Version.LUCENE_47, "presentationName", analyzer).parse(queryString);
            ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
            HighlighterWrapper highlighter = highlighterRepository.getHighlighter(analyzer, isearcher, query);
            for (int i = 0; i < hits.length; i++) {
                results.add(buildSearchResult(isearcher, highlighter, hits[i].doc));
            }
        }

        searchLogger.info("<{}> returned {} results", searchInput, results.size());

        return results;
    } catch (Exception e) {
        logger.warn("An exception occurred during search, empty result will be returned", e);
        return new ArrayList<SearchResult>();
    } finally {
        try {
            if (directoryReader != null) {
                directoryReader.close();
            }
        } catch (IOException e) {
            logger.debug("unable to close directory reader", e);
        }
    }
}