List of usage examples for org.apache.lucene.search IndexSearcher search
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.PersonalAndTrendingRecommender.java
License:Apache License
@Override public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException { count = count / 2;/*from w ww. j a va 2s . co m*/ List<RecommendedNewsItem> results = super.recommend(userid, start, count); IndexSearcher searcher = null; try { Map<String, Double> terms = ratingsDao.getRatings(userid); Query query = buildQuery(terms); int hitsPerPage = start + count; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); Filter f1 = new UniqueResultsFilter(results); Filter f2 = new RecentFilter("timestamp", 1000 * 60 * 60 * 24); Filter f = new ChainedFilter(new Filter[] { f1, f2 }, ChainedFilter.AND); searcher = manager.acquire(); manager.maybeRefresh(); searcher.search(query, f, collector); ScoreDoc[] hits = collector.topDocs(start, count).scoreDocs; for (ScoreDoc s : hits) { int docId = s.doc; Document d = searcher.doc(docId); RecommendedNewsItem item = toNewsitem(d, docId, s.score, "personal"); results.add(item); } //Collections.sort(results); } catch (RatingsDaoException | IOException ex) { logger.error(ex); throw new RecommendationException(ex); } return results; }
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.TopNRecommender.java
License:Apache License
@Override public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException { IndexSearcher searcher = null; try {//from w w w . j av a2s .co m List<Long> ids = viewsDao.getNMostSeenArticles(start, start + count); Query query = buildQuery(ids); int hitsPerPage = count; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); Filter filter = new SeenArticlesFilter(viewsDao, userid); searcher = manager.acquire(); searcher.search(query, filter, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int stop = (start + count < hits.length ? start + count : hits.length); List<RecommendedNewsItem> results = new ArrayList<>(stop - start); for (int i = start; i < stop; i++) { int docId = hits[i].doc; Document d = searcher.doc(docId); results.add(toNewsitem(d, docId, hits[i].score, "topN")); } return results; } catch (ViewsDaoException | IOException ex) { throw new RecommendationException(ex); } finally { if (searcher != null) { try { manager.release(searcher); } catch (IOException ex) { logger.error(ex); } searcher = null; } } }
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.TrendingTopicRecommender.java
License:Apache License
@Override public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException { IndexSearcher searcher = null; try {/*from w w w.ja v a 2 s. c o m*/ String[] trends = trendsDao.getTrends(250); Query query = buildQuery(trends); int hitsPerPage = start + count; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); //Filter filter = new SeenArticlesFilter(viewsDao, userid); Filter f = new RecentFilter("timestamp", 1000 * 60 * 60 * 24); manager.maybeRefresh(); searcher = manager.acquire(); searcher.search(query, f, collector); ScoreDoc[] hits = collector.topDocs(start, count).scoreDocs; List<RecommendedNewsItem> results = new ArrayList<>(hits.length); for (ScoreDoc hit : hits) { int docId = hit.doc; Document d = searcher.doc(docId); RecommendedNewsItem item = toNewsitem(d, docId, hit.score, "trending"); results.add(item); } return results; } catch (TrendsDaoException | IOException ex) { logger.error(ex); throw new RecommendationException(ex); } finally { try { if (searcher != null) { manager.release(searcher); } } catch (IOException ex) { logger.error(ex); } searcher = null; } }
From source file:br.andrew.lucene.testing.SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(final String[] args) throws Exception { final String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);// w w w .j av a 2s . c o m } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } final IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); final IndexSearcher searcher = new IndexSearcher(reader); final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } final QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } final Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark final Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } final Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } SearchFiles.doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:bzh.terrevirtuelle.navisu.gazetteer.impl.lucene.GeoNameResolver.java
License:Apache License
private HashMap<String, List<Location>> resolveEntities(List<String> locationNames, int count, IndexReader reader) throws IOException { if (locationNames.size() >= 200) { hitsPerPage = 5; // avoid heavy computation }//from w w w . j ava 2 s. c o m IndexSearcher searcher = new IndexSearcher(reader); Query q = null; HashMap<String, List<Location>> allCandidates = new HashMap<String, List<Location>>(); for (String name : locationNames) { if (!allCandidates.containsKey(name)) { try { //query is wrapped in additional quotes (") to avoid query tokenization on space q = new MultiFieldQueryParser(new String[] { FIELD_NAME_NAME, FIELD_NAME_ALTERNATE_NAMES }, analyzer).parse(String.format("\"%s\"", name)); Sort sort = new Sort(populationSort); //Fetch 3 times desired values, these will be sorted on code and only desired number will be kept ScoreDoc[] hits = searcher.search(q, hitsPerPage * 3, sort).scoreDocs; getMatchingCandidates(searcher, allCandidates, name, hits); } catch (org.apache.lucene.queryparser.classic.ParseException e) { e.printStackTrace(); } } } HashMap<String, List<Location>> resolvedEntities = new HashMap<String, List<Location>>(); pickBestCandidates(resolvedEntities, allCandidates, count); return resolvedEntities; }
From source file:cc.osint.graphd.graph.Graph.java
License:Apache License
public List<JSONObject> query(IndexSearcher indexSearcher, String queryStr) throws Exception { long start_t = System.currentTimeMillis(); final List<JSONObject> results = new ArrayList<JSONObject>(); QueryParser qp = new QueryParser(Version.LUCENE_31, KEY_FIELD, analyzer); qp.setDefaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.AND); qp.setAllowLeadingWildcard(true);/*from w w w. ja va 2 s . c o m*/ Query query = qp.parse(queryStr); org.apache.lucene.search.Filter filter = new org.apache.lucene.search.CachingWrapperFilter( new QueryWrapperFilter(query)); indexSearcher.search(new MatchAllDocsQuery(), filter, new Collector() { private int docBase; IndexReader reader; // ignore scoring public void setScorer(Scorer scorer) { } // accept docs out of order public boolean acceptsDocsOutOfOrder() { return true; } public void collect(int doc) { try { Document d = reader.document(doc); JSONObject result = new JSONObject(); for (Fieldable f : d.getFields()) { result.put(f.name(), d.get(f.name())); } results.add(result); } catch (Exception ex) { ex.printStackTrace(); } } public void setNextReader(IndexReader reader, int docBase) { this.reader = reader; this.docBase = docBase; } }); long end_t = System.currentTimeMillis(); //log.info("query: hits.scoreDocs.length = " + results.size() + " (" + (end_t-start_t) + "ms)"); return results; }
From source file:cc.twittertools.search.local.RunQueries.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(// w ww . j a v a2s. co m OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return") .create(NUM_RESULTS_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("file containing topics in TREC format").create(QUERIES_OPTION)); options.addOption(OptionBuilder.withArgName("similarity").hasArg() .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION)); options.addOption(new Option(VERBOSE_OPTION, "print out complete document")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(RunQueries.class.getName(), options); System.exit(-1); } File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION)); if (!indexLocation.exists()) { System.err.println("Error: " + indexLocation + " does not exist!"); System.exit(-1); } String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG; String topicsFile = cmdline.getOptionValue(QUERIES_OPTION); int numResults = 1000; try { if (cmdline.hasOption(NUM_RESULTS_OPTION)) { numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION)); } } catch (NumberFormatException e) { System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION)); System.exit(-1); } String similarity = "LM"; if (cmdline.hasOption(SIMILARITY_OPTION)) { similarity = cmdline.getOptionValue(SIMILARITY_OPTION); } boolean verbose = cmdline.hasOption(VERBOSE_OPTION); PrintStream out = new PrintStream(System.out, true, "UTF-8"); IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation)); IndexSearcher searcher = new IndexSearcher(reader); if (similarity.equalsIgnoreCase("BM25")) { searcher.setSimilarity(new BM25Similarity()); } else if (similarity.equalsIgnoreCase("LM")) { searcher.setSimilarity(new LMDirichletSimilarity(2500.0f)); } QueryParser p = new QueryParser(Version.LUCENE_43, StatusField.TEXT.name, IndexStatuses.ANALYZER); TrecTopicSet topics = TrecTopicSet.fromFile(new File(topicsFile)); for (TrecTopic topic : topics) { Query query = p.parse(topic.getQuery()); Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, topic.getQueryTweetTime(), true, true); TopDocs rs = searcher.search(query, filter, numResults); int i = 1; for (ScoreDoc scoreDoc : rs.scoreDocs) { Document hit = searcher.doc(scoreDoc.doc); out.println(String.format("%s Q0 %s %d %f %s", topic.getId(), hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag)); if (verbose) { out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " ")); } i++; } } reader.close(); out.close(); }
From source file:cc.twittertools.search.local.SearchStatuses.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(/* w w w . j a va2 s . c o m*/ OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("query id").create(QID_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("query text").create(QUERY_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("maxid").create(MAX_ID_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return") .create(NUM_RESULTS_OPTION)); options.addOption(OptionBuilder.withArgName("similarity").hasArg() .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION)); options.addOption(new Option(VERBOSE_OPTION, "print out complete document")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(QUERY_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(SearchStatuses.class.getName(), options); System.exit(-1); } File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION)); if (!indexLocation.exists()) { System.err.println("Error: " + indexLocation + " does not exist!"); System.exit(-1); } String qid = cmdline.hasOption(QID_OPTION) ? cmdline.getOptionValue(QID_OPTION) : DEFAULT_QID; String queryText = cmdline.hasOption(QUERY_OPTION) ? cmdline.getOptionValue(QUERY_OPTION) : DEFAULT_Q; String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG; long maxId = cmdline.hasOption(MAX_ID_OPTION) ? Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION)) : DEFAULT_MAX_ID; int numResults = cmdline.hasOption(NUM_RESULTS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION)) : DEFAULT_NUM_RESULTS; boolean verbose = cmdline.hasOption(VERBOSE_OPTION); String similarity = "LM"; if (cmdline.hasOption(SIMILARITY_OPTION)) { similarity = cmdline.getOptionValue(SIMILARITY_OPTION); } PrintStream out = new PrintStream(System.out, true, "UTF-8"); IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation)); IndexSearcher searcher = new IndexSearcher(reader); if (similarity.equalsIgnoreCase("BM25")) { searcher.setSimilarity(new BM25Similarity()); } else if (similarity.equalsIgnoreCase("LM")) { searcher.setSimilarity(new LMDirichletSimilarity(2500.0f)); } QueryParser p = new QueryParser(Version.LUCENE_43, IndexStatuses.StatusField.TEXT.name, IndexStatuses.ANALYZER); Query query = p.parse(queryText); Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, maxId, true, true); TopDocs rs = searcher.search(query, filter, numResults); int i = 1; for (ScoreDoc scoreDoc : rs.scoreDocs) { Document hit = searcher.doc(scoreDoc.doc); out.println(String.format("%s Q0 %s %d %f %s", qid, hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag)); if (verbose) { out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " ")); } i++; } reader.close(); out.close(); }
From source file:ccc.plugins.search.lucene.SimpleLuceneFS.java
License:Open Source License
private void find(final String searchTerms, final int maxHits, final Sort sorter, final ACL userPerms, final CapturingHandler sh) { IndexSearcher searcher = null; try {/*from w w w. j a va2 s . c o m*/ searcher = new IndexSearcher(createDirectory()); TopDocs docs; if (null == sorter) { docs = searcher.search(createParser().parse(searchTerms), new AclFilter(ACL_FIELD, (null == userPerms) ? new ACL() : userPerms), maxHits); } else { docs = searcher.search(createParser().parse(searchTerms), new AclFilter(ACL_FIELD, (null == userPerms) ? new ACL() : userPerms), maxHits, sorter); } sh.handle(searcher, docs); } catch (final IOException e) { LOG.warn("Error performing query.", e); } catch (final ParseException e) { LOG.warn("Error performing query.", e); } finally { if (searcher != null) { try { searcher.close(); } catch (final IOException e) { Exceptions.swallow(e); } } } }
From source file:ch.admin.isb.hermes5.business.search.SearchEngine.java
License:Apache License
public List<SearchResult> search(String searchInput, String modelIdentifier, String lang) { DirectoryReader directoryReader = null; try {/*from www . j a va2s .co m*/ List<SearchResult> results = new ArrayList<SearchResult>(); String queryString = buildQueryString(searchInput); if (queryString != null) { String searchIndexPath = searchIndexManager.getSearchIndexPath(modelIdentifier, lang); try { directoryReader = DirectoryReader.open(FSDirectory.open(new File(searchIndexPath))); } catch (Exception e) { logger.warn("No index files found at " + searchIndexPath + ". Will try to restore from S3"); searchIndexManager.restoreIndexFilesFromS3(modelIdentifier, lang); directoryReader = DirectoryReader.open(FSDirectory.open(new File(searchIndexPath))); } Analyzer analyzer = analyserRepository.getAnalyzer(lang); IndexSearcher isearcher = new IndexSearcher(directoryReader); Query query = new QueryParser(Version.LUCENE_47, "presentationName", analyzer).parse(queryString); ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; HighlighterWrapper highlighter = highlighterRepository.getHighlighter(analyzer, isearcher, query); for (int i = 0; i < hits.length; i++) { results.add(buildSearchResult(isearcher, highlighter, hits[i].doc)); } } searchLogger.info("<{}> returned {} results", searchInput, results.size()); return results; } catch (Exception e) { logger.warn("An exception occurred during search, empty result will be returned", e); return new ArrayList<SearchResult>(); } finally { try { if (directoryReader != null) { directoryReader.close(); } } catch (IOException e) { logger.debug("unable to close directory reader", e); } } }