Example usage for org.apache.lucene.search IndexSearcher search

List of usage examples for org.apache.lucene.search IndexSearcher search

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher search.

Prototype

protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException 

Source Link

Document

Lower-level search API.

Usage

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.PersonalAndTrendingRecommender.java

License:Apache License

@Override
public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException {
    count = count / 2;/*from w  ww. j  a va 2s  .  co  m*/

    List<RecommendedNewsItem> results = super.recommend(userid, start, count);

    IndexSearcher searcher = null;
    try {
        Map<String, Double> terms = ratingsDao.getRatings(userid);
        Query query = buildQuery(terms);
        int hitsPerPage = start + count;

        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);

        Filter f1 = new UniqueResultsFilter(results);
        Filter f2 = new RecentFilter("timestamp", 1000 * 60 * 60 * 24);
        Filter f = new ChainedFilter(new Filter[] { f1, f2 }, ChainedFilter.AND);

        searcher = manager.acquire();
        manager.maybeRefresh();
        searcher.search(query, f, collector);

        ScoreDoc[] hits = collector.topDocs(start, count).scoreDocs;

        for (ScoreDoc s : hits) {
            int docId = s.doc;
            Document d = searcher.doc(docId);
            RecommendedNewsItem item = toNewsitem(d, docId, s.score, "personal");
            results.add(item);
        }
        //Collections.sort(results);
    } catch (RatingsDaoException | IOException ex) {
        logger.error(ex);
        throw new RecommendationException(ex);
    }
    return results;
}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.TopNRecommender.java

License:Apache License

@Override
public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException {
    IndexSearcher searcher = null;
    try {//from w w w  .  j av  a2s  .co m
        List<Long> ids = viewsDao.getNMostSeenArticles(start, start + count);
        Query query = buildQuery(ids);
        int hitsPerPage = count;

        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);

        Filter filter = new SeenArticlesFilter(viewsDao, userid);
        searcher = manager.acquire();
        searcher.search(query, filter, collector);

        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        int stop = (start + count < hits.length ? start + count : hits.length);
        List<RecommendedNewsItem> results = new ArrayList<>(stop - start);

        for (int i = start; i < stop; i++) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            results.add(toNewsitem(d, docId, hits[i].score, "topN"));
        }

        return results;

    } catch (ViewsDaoException | IOException ex) {

        throw new RecommendationException(ex);
    } finally {
        if (searcher != null) {
            try {
                manager.release(searcher);
            } catch (IOException ex) {
                logger.error(ex);
            }
            searcher = null;
        }
    }
}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.TrendingTopicRecommender.java

License:Apache License

@Override
public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException {
    IndexSearcher searcher = null;
    try {/*from w  w  w.ja  v  a  2 s.  c o  m*/
        String[] trends = trendsDao.getTrends(250);
        Query query = buildQuery(trends);
        int hitsPerPage = start + count;

        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);

        //Filter filter = new SeenArticlesFilter(viewsDao, userid);
        Filter f = new RecentFilter("timestamp", 1000 * 60 * 60 * 24);

        manager.maybeRefresh();
        searcher = manager.acquire();

        searcher.search(query, f, collector);

        ScoreDoc[] hits = collector.topDocs(start, count).scoreDocs;

        List<RecommendedNewsItem> results = new ArrayList<>(hits.length);

        for (ScoreDoc hit : hits) {
            int docId = hit.doc;
            Document d = searcher.doc(docId);
            RecommendedNewsItem item = toNewsitem(d, docId, hit.score, "trending");
            results.add(item);
        }

        return results;

    } catch (TrendsDaoException | IOException ex) {
        logger.error(ex);
        throw new RecommendationException(ex);
    } finally {
        try {
            if (searcher != null) {
                manager.release(searcher);
            }
        } catch (IOException ex) {
            logger.error(ex);
        }
        searcher = null;
    }
}

From source file:br.andrew.lucene.testing.SearchFiles.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(final String[] args) throws Exception {
    final String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);//  w w  w  .j  av a  2s  . c  o  m
    }

    String index = "index";
    String field = "contents";
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String queryString = null;
    int hitsPerPage = 10;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;
        } else if ("-field".equals(args[i])) {
            field = args[i + 1];
            i++;
        } else if ("-queries".equals(args[i])) {
            queries = args[i + 1];
            i++;
        } else if ("-query".equals(args[i])) {
            queryString = args[i + 1];
            i++;
        } else if ("-repeat".equals(args[i])) {
            repeat = Integer.parseInt(args[i + 1]);
            i++;
        } else if ("-raw".equals(args[i])) {
            raw = true;
        } else if ("-paging".equals(args[i])) {
            hitsPerPage = Integer.parseInt(args[i + 1]);
            if (hitsPerPage <= 0) {
                System.err.println("There must be at least 1 hit per page.");
                System.exit(1);
            }
            i++;
        }
    }

    final IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    final IndexSearcher searcher = new IndexSearcher(reader);
    final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);

    BufferedReader in = null;
    if (queries != null) {
        in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }
    final QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer);
    while (true) {
        if (queries == null && queryString == null) { // prompt the user
            System.out.println("Enter query: ");
        }

        String line = queryString != null ? queryString : in.readLine();

        if (line == null || line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }

        final Query query = parser.parse(line);
        System.out.println("Searching for: " + query.toString(field));

        if (repeat > 0) { // repeat & time as benchmark
            final Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                searcher.search(query, null, 100);
            }
            final Date end = new Date();
            System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        }

        SearchFiles.doPagingSearch(in, searcher, query, hitsPerPage, raw,
                queries == null && queryString == null);

        if (queryString != null) {
            break;
        }
    }
    reader.close();
}

From source file:bzh.terrevirtuelle.navisu.gazetteer.impl.lucene.GeoNameResolver.java

License:Apache License

private HashMap<String, List<Location>> resolveEntities(List<String> locationNames, int count,
        IndexReader reader) throws IOException {
    if (locationNames.size() >= 200) {
        hitsPerPage = 5; // avoid heavy computation
    }//from w  w  w  . j  ava  2  s. c  o  m
    IndexSearcher searcher = new IndexSearcher(reader);
    Query q = null;

    HashMap<String, List<Location>> allCandidates = new HashMap<String, List<Location>>();

    for (String name : locationNames) {

        if (!allCandidates.containsKey(name)) {
            try {
                //query is wrapped in additional quotes (") to avoid query tokenization on space
                q = new MultiFieldQueryParser(new String[] { FIELD_NAME_NAME, FIELD_NAME_ALTERNATE_NAMES },
                        analyzer).parse(String.format("\"%s\"", name));

                Sort sort = new Sort(populationSort);
                //Fetch 3 times desired values, these will be sorted on code and only desired number will be kept
                ScoreDoc[] hits = searcher.search(q, hitsPerPage * 3, sort).scoreDocs;

                getMatchingCandidates(searcher, allCandidates, name, hits);
            } catch (org.apache.lucene.queryparser.classic.ParseException e) {
                e.printStackTrace();
            }
        }
    }

    HashMap<String, List<Location>> resolvedEntities = new HashMap<String, List<Location>>();
    pickBestCandidates(resolvedEntities, allCandidates, count);
    return resolvedEntities;
}

From source file:cc.osint.graphd.graph.Graph.java

License:Apache License

public List<JSONObject> query(IndexSearcher indexSearcher, String queryStr) throws Exception {
    long start_t = System.currentTimeMillis();
    final List<JSONObject> results = new ArrayList<JSONObject>();
    QueryParser qp = new QueryParser(Version.LUCENE_31, KEY_FIELD, analyzer);
    qp.setDefaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.AND);
    qp.setAllowLeadingWildcard(true);/*from   w  w  w. ja  va 2 s  .  c  o  m*/
    Query query = qp.parse(queryStr);
    org.apache.lucene.search.Filter filter = new org.apache.lucene.search.CachingWrapperFilter(
            new QueryWrapperFilter(query));

    indexSearcher.search(new MatchAllDocsQuery(), filter, new Collector() {
        private int docBase;
        IndexReader reader;

        // ignore scoring
        public void setScorer(Scorer scorer) {
        }

        // accept docs out of order
        public boolean acceptsDocsOutOfOrder() {
            return true;
        }

        public void collect(int doc) {
            try {
                Document d = reader.document(doc);
                JSONObject result = new JSONObject();
                for (Fieldable f : d.getFields()) {
                    result.put(f.name(), d.get(f.name()));
                }
                results.add(result);
            } catch (Exception ex) {
                ex.printStackTrace();
            }
        }

        public void setNextReader(IndexReader reader, int docBase) {
            this.reader = reader;
            this.docBase = docBase;
        }
    });
    long end_t = System.currentTimeMillis();
    //log.info("query: hits.scoreDocs.length = " + results.size() + " (" + (end_t-start_t) + "ms)");
    return results;
}

From source file:cc.twittertools.search.local.RunQueries.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(// w  ww  .  j  a v  a2s. co  m
            OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return")
            .create(NUM_RESULTS_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("file containing topics in TREC format").create(QUERIES_OPTION));
    options.addOption(OptionBuilder.withArgName("similarity").hasArg()
            .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION));
    options.addOption(new Option(VERBOSE_OPTION, "print out complete document"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(RunQueries.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG;

    String topicsFile = cmdline.getOptionValue(QUERIES_OPTION);

    int numResults = 1000;
    try {
        if (cmdline.hasOption(NUM_RESULTS_OPTION)) {
            numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION));
        }
    } catch (NumberFormatException e) {
        System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION));
        System.exit(-1);
    }

    String similarity = "LM";
    if (cmdline.hasOption(SIMILARITY_OPTION)) {
        similarity = cmdline.getOptionValue(SIMILARITY_OPTION);
    }

    boolean verbose = cmdline.hasOption(VERBOSE_OPTION);

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    IndexSearcher searcher = new IndexSearcher(reader);

    if (similarity.equalsIgnoreCase("BM25")) {
        searcher.setSimilarity(new BM25Similarity());
    } else if (similarity.equalsIgnoreCase("LM")) {
        searcher.setSimilarity(new LMDirichletSimilarity(2500.0f));
    }

    QueryParser p = new QueryParser(Version.LUCENE_43, StatusField.TEXT.name, IndexStatuses.ANALYZER);

    TrecTopicSet topics = TrecTopicSet.fromFile(new File(topicsFile));
    for (TrecTopic topic : topics) {
        Query query = p.parse(topic.getQuery());
        Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, topic.getQueryTweetTime(),
                true, true);

        TopDocs rs = searcher.search(query, filter, numResults);

        int i = 1;
        for (ScoreDoc scoreDoc : rs.scoreDocs) {
            Document hit = searcher.doc(scoreDoc.doc);
            out.println(String.format("%s Q0 %s %d %f %s", topic.getId(),
                    hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag));
            if (verbose) {
                out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " "));
            }
            i++;
        }
    }
    reader.close();
    out.close();
}

From source file:cc.twittertools.search.local.SearchStatuses.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(/* w w  w  .  j  a  va2 s . c  o  m*/
            OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("query id").create(QID_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("query text").create(QUERY_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("maxid").create(MAX_ID_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return")
            .create(NUM_RESULTS_OPTION));
    options.addOption(OptionBuilder.withArgName("similarity").hasArg()
            .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION));
    options.addOption(new Option(VERBOSE_OPTION, "print out complete document"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(QUERY_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(SearchStatuses.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    String qid = cmdline.hasOption(QID_OPTION) ? cmdline.getOptionValue(QID_OPTION) : DEFAULT_QID;
    String queryText = cmdline.hasOption(QUERY_OPTION) ? cmdline.getOptionValue(QUERY_OPTION) : DEFAULT_Q;
    String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG;
    long maxId = cmdline.hasOption(MAX_ID_OPTION) ? Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION))
            : DEFAULT_MAX_ID;
    int numResults = cmdline.hasOption(NUM_RESULTS_OPTION)
            ? Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION))
            : DEFAULT_NUM_RESULTS;
    boolean verbose = cmdline.hasOption(VERBOSE_OPTION);

    String similarity = "LM";
    if (cmdline.hasOption(SIMILARITY_OPTION)) {
        similarity = cmdline.getOptionValue(SIMILARITY_OPTION);
    }

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    IndexSearcher searcher = new IndexSearcher(reader);

    if (similarity.equalsIgnoreCase("BM25")) {
        searcher.setSimilarity(new BM25Similarity());
    } else if (similarity.equalsIgnoreCase("LM")) {
        searcher.setSimilarity(new LMDirichletSimilarity(2500.0f));
    }

    QueryParser p = new QueryParser(Version.LUCENE_43, IndexStatuses.StatusField.TEXT.name,
            IndexStatuses.ANALYZER);
    Query query = p.parse(queryText);
    Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, maxId, true, true);

    TopDocs rs = searcher.search(query, filter, numResults);

    int i = 1;
    for (ScoreDoc scoreDoc : rs.scoreDocs) {
        Document hit = searcher.doc(scoreDoc.doc);

        out.println(String.format("%s Q0 %s %d %f %s", qid, hit.getField(StatusField.ID.name).numericValue(), i,
                scoreDoc.score, runtag));
        if (verbose) {
            out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " "));
        }
        i++;
    }

    reader.close();
    out.close();
}

From source file:ccc.plugins.search.lucene.SimpleLuceneFS.java

License:Open Source License

private void find(final String searchTerms, final int maxHits, final Sort sorter, final ACL userPerms,
        final CapturingHandler sh) {
    IndexSearcher searcher = null;

    try {/*from w w  w. j  a va2 s  .  c o  m*/
        searcher = new IndexSearcher(createDirectory());

        TopDocs docs;
        if (null == sorter) {
            docs = searcher.search(createParser().parse(searchTerms),
                    new AclFilter(ACL_FIELD, (null == userPerms) ? new ACL() : userPerms), maxHits);
        } else {
            docs = searcher.search(createParser().parse(searchTerms),
                    new AclFilter(ACL_FIELD, (null == userPerms) ? new ACL() : userPerms), maxHits, sorter);
        }

        sh.handle(searcher, docs);
    } catch (final IOException e) {
        LOG.warn("Error performing query.", e);
    } catch (final ParseException e) {
        LOG.warn("Error performing query.", e);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (final IOException e) {
                Exceptions.swallow(e);
            }
        }
    }
}

From source file:ch.admin.isb.hermes5.business.search.SearchEngine.java

License:Apache License

public List<SearchResult> search(String searchInput, String modelIdentifier, String lang) {
    DirectoryReader directoryReader = null;

    try {/*from   www .  j a  va2s .co m*/
        List<SearchResult> results = new ArrayList<SearchResult>();
        String queryString = buildQueryString(searchInput);
        if (queryString != null) {

            String searchIndexPath = searchIndexManager.getSearchIndexPath(modelIdentifier, lang);
            try {
                directoryReader = DirectoryReader.open(FSDirectory.open(new File(searchIndexPath)));
            } catch (Exception e) {
                logger.warn("No index files found at " + searchIndexPath + ". Will try to restore from S3");
                searchIndexManager.restoreIndexFilesFromS3(modelIdentifier, lang);
                directoryReader = DirectoryReader.open(FSDirectory.open(new File(searchIndexPath)));
            }
            Analyzer analyzer = analyserRepository.getAnalyzer(lang);
            IndexSearcher isearcher = new IndexSearcher(directoryReader);

            Query query = new QueryParser(Version.LUCENE_47, "presentationName", analyzer).parse(queryString);
            ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
            HighlighterWrapper highlighter = highlighterRepository.getHighlighter(analyzer, isearcher, query);
            for (int i = 0; i < hits.length; i++) {
                results.add(buildSearchResult(isearcher, highlighter, hits[i].doc));
            }
        }

        searchLogger.info("<{}> returned {} results", searchInput, results.size());

        return results;
    } catch (Exception e) {
        logger.warn("An exception occurred during search, empty result will be returned", e);
        return new ArrayList<SearchResult>();
    } finally {
        try {
            if (directoryReader != null) {
                directoryReader.close();
            }
        } catch (IOException e) {
            logger.debug("unable to close directory reader", e);
        }
    }
}