List of usage examples for org.apache.lucene.search IndexSearcher IndexSearcher
public IndexSearcher(IndexReaderContext context)
From source file:cc.twittertools.search.api.TrecSearchHandler.java
License:Apache License
public TrecSearchHandler(File indexPath, @Nullable Map<String, String> credentials) throws IOException { Preconditions.checkNotNull(indexPath); Preconditions.checkArgument(indexPath.exists()); // Can be null, in which case we don't check for credentials. this.credentials = credentials; IndexReader reader = DirectoryReader.open(FSDirectory.open(indexPath)); searcher = new IndexSearcher(reader); searcher.setSimilarity(new LMDirichletSimilarity(2500.0f)); }
From source file:cc.twittertools.search.api.TrecSearchThriftServer.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(HELP_OPTION, "show help")); options.addOption(OptionBuilder.withArgName("port").hasArg().withDescription("port").create(PORT_OPTION)); options.addOption(/*from w w w .jav a 2s.c o m*/ OptionBuilder.withArgName("index").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg() .withDescription("max number of threads in thread pool").create(MAX_THREADS_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("file containing access tokens").create(CREDENTIALS_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(TrecSearchThriftServer.class.getName(), options); System.exit(-1); } int port = cmdline.hasOption(PORT_OPTION) ? Integer.parseInt(cmdline.getOptionValue(PORT_OPTION)) : DEFAULT_PORT; int maxThreads = cmdline.hasOption(MAX_THREADS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(MAX_THREADS_OPTION)) : DEFAULT_MAX_THREADS; File index = new File(cmdline.getOptionValue(INDEX_OPTION)); Map<String, String> credentials = null; if (cmdline.hasOption(CREDENTIALS_OPTION)) { credentials = Maps.newHashMap(); File cfile = new File(cmdline.getOptionValue(CREDENTIALS_OPTION)); if (!cfile.exists()) { System.err.println("Error: " + cfile + " does not exist!"); System.exit(-1); } for (String s : Files.readLines(cfile, Charsets.UTF_8)) { try { String[] arr = s.split(":"); credentials.put(arr[0], arr[1]); } catch (Exception e) { // Catch any exceptions from parsing file contain access tokens System.err.println("Error reading access tokens from " + cfile + "!"); System.exit(-1); } } } if (!index.exists()) { System.err.println("Error: " + index + " does not exist!"); System.exit(-1); } IndexReader reader = DirectoryReader.open(MMapDirectory.open(index)); IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new LMDirichletSimilarity(DEFAULT_MU)); QueryLikelihoodModel qlModel = new QueryLikelihoodModel(DEFAULT_MU); TServerSocket serverSocket = new TServerSocket(port); TrecSearch.Processor<TrecSearch.Iface> searchProcessor = new TrecSearch.Processor<TrecSearch.Iface>( new TrecSearchHandler(searcher, qlModel, credentials)); TThreadPoolServer.Args serverArgs = new TThreadPoolServer.Args(serverSocket); serverArgs.maxWorkerThreads(maxThreads); TServer thriftServer = new TThreadPoolServer( serverArgs.processor(searchProcessor).protocolFactory(new TBinaryProtocol.Factory())); thriftServer.serve(); }
From source file:cc.twittertools.search.local.RunQueries.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(//from ww w . j a v a 2 s . co m OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return") .create(NUM_RESULTS_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("file containing topics in TREC format").create(QUERIES_OPTION)); options.addOption(OptionBuilder.withArgName("similarity").hasArg() .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION)); options.addOption(new Option(VERBOSE_OPTION, "print out complete document")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(RunQueries.class.getName(), options); System.exit(-1); } File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION)); if (!indexLocation.exists()) { System.err.println("Error: " + indexLocation + " does not exist!"); System.exit(-1); } String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG; String topicsFile = cmdline.getOptionValue(QUERIES_OPTION); int numResults = 1000; try { if (cmdline.hasOption(NUM_RESULTS_OPTION)) { numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION)); } } catch (NumberFormatException e) { System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION)); System.exit(-1); } String similarity = "LM"; if (cmdline.hasOption(SIMILARITY_OPTION)) { similarity = cmdline.getOptionValue(SIMILARITY_OPTION); } boolean verbose = cmdline.hasOption(VERBOSE_OPTION); PrintStream out = new PrintStream(System.out, true, "UTF-8"); IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation)); IndexSearcher searcher = new IndexSearcher(reader); if (similarity.equalsIgnoreCase("BM25")) { searcher.setSimilarity(new BM25Similarity()); } else if (similarity.equalsIgnoreCase("LM")) { searcher.setSimilarity(new LMDirichletSimilarity(2500.0f)); } QueryParser p = new QueryParser(Version.LUCENE_43, StatusField.TEXT.name, IndexStatuses.ANALYZER); TrecTopicSet topics = TrecTopicSet.fromFile(new File(topicsFile)); for (TrecTopic topic : topics) { Query query = p.parse(topic.getQuery()); Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, topic.getQueryTweetTime(), true, true); TopDocs rs = searcher.search(query, filter, numResults); int i = 1; for (ScoreDoc scoreDoc : rs.scoreDocs) { Document hit = searcher.doc(scoreDoc.doc); out.println(String.format("%s Q0 %s %d %f %s", topic.getId(), hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag)); if (verbose) { out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " ")); } i++; } } reader.close(); out.close(); }
From source file:cc.twittertools.search.local.SearchStatuses.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(/* w w w. j av a 2s. c o m*/ OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("query id").create(QID_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("query text").create(QUERY_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("maxid").create(MAX_ID_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return") .create(NUM_RESULTS_OPTION)); options.addOption(OptionBuilder.withArgName("similarity").hasArg() .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION)); options.addOption(new Option(VERBOSE_OPTION, "print out complete document")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(QUERY_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(SearchStatuses.class.getName(), options); System.exit(-1); } File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION)); if (!indexLocation.exists()) { System.err.println("Error: " + indexLocation + " does not exist!"); System.exit(-1); } String qid = cmdline.hasOption(QID_OPTION) ? cmdline.getOptionValue(QID_OPTION) : DEFAULT_QID; String queryText = cmdline.hasOption(QUERY_OPTION) ? cmdline.getOptionValue(QUERY_OPTION) : DEFAULT_Q; String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG; long maxId = cmdline.hasOption(MAX_ID_OPTION) ? Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION)) : DEFAULT_MAX_ID; int numResults = cmdline.hasOption(NUM_RESULTS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION)) : DEFAULT_NUM_RESULTS; boolean verbose = cmdline.hasOption(VERBOSE_OPTION); String similarity = "LM"; if (cmdline.hasOption(SIMILARITY_OPTION)) { similarity = cmdline.getOptionValue(SIMILARITY_OPTION); } PrintStream out = new PrintStream(System.out, true, "UTF-8"); IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation)); IndexSearcher searcher = new IndexSearcher(reader); if (similarity.equalsIgnoreCase("BM25")) { searcher.setSimilarity(new BM25Similarity()); } else if (similarity.equalsIgnoreCase("LM")) { searcher.setSimilarity(new LMDirichletSimilarity(2500.0f)); } QueryParser p = new QueryParser(Version.LUCENE_43, IndexStatuses.StatusField.TEXT.name, IndexStatuses.ANALYZER); Query query = p.parse(queryText); Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, maxId, true, true); TopDocs rs = searcher.search(query, filter, numResults); int i = 1; for (ScoreDoc scoreDoc : rs.scoreDocs) { Document hit = searcher.doc(scoreDoc.doc); out.println(String.format("%s Q0 %s %d %f %s", qid, hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag)); if (verbose) { out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " ")); } i++; } reader.close(); out.close(); }
From source file:cc.wikitools.lucene.WikipediaSearcher.java
License:Apache License
protected void init() { searcher = new IndexSearcher(reader); searcher.setSimilarity(new LMDirichletSimilarity(2500.0f)); parserArticle = new QueryParser(Version.LUCENE_43, IndexField.TEXT.name, IndexWikipediaDump.ANALYZER); parserTitle = new QueryParser(Version.LUCENE_43, IndexField.TITLE.name, IndexWikipediaDump.ANALYZER); }
From source file:ccc.plugins.search.lucene.SimpleLuceneFS.java
License:Open Source License
private void find(final String searchTerms, final int maxHits, final Sort sorter, final ACL userPerms, final CapturingHandler sh) { IndexSearcher searcher = null;/*w w w . j a v a 2 s.c o m*/ try { searcher = new IndexSearcher(createDirectory()); TopDocs docs; if (null == sorter) { docs = searcher.search(createParser().parse(searchTerms), new AclFilter(ACL_FIELD, (null == userPerms) ? new ACL() : userPerms), maxHits); } else { docs = searcher.search(createParser().parse(searchTerms), new AclFilter(ACL_FIELD, (null == userPerms) ? new ACL() : userPerms), maxHits, sorter); } sh.handle(searcher, docs); } catch (final IOException e) { LOG.warn("Error performing query.", e); } catch (final ParseException e) { LOG.warn("Error performing query.", e); } finally { if (searcher != null) { try { searcher.close(); } catch (final IOException e) { Exceptions.swallow(e); } } } }
From source file:ccc.plugins.search.lucene.SimpleLuceneFS.java
License:Open Source License
private void similar(final String uuid, final int maxHits, final CapturingHandler ch) { if (uuid == null) { return;/* w w w . j a va2 s . c o m*/ } IndexReader ir = null; IndexSearcher searcher = null; try { ir = IndexReader.open(createDirectory()); searcher = new IndexSearcher(ir); final int docNum = docNumber(uuid, searcher); if (docNum == -1) { return; } final MoreLikeThis mlt = new MoreLikeThis(ir); mlt.setFieldNames(new String[] { DEFAULT_FIELD }); mlt.setMinDocFreq(2); final Query query = mlt.like(docNum); ch.handle(searcher, searcher.search(query, maxHits)); } catch (final IOException e) { LOG.warn("Error performing query.", e); } finally { if (searcher != null) { try { searcher.close(); } catch (final IOException e) { Exceptions.swallow(e); } } if (ir != null) { try { ir.close(); } catch (final IOException e) { Exceptions.swallow(e); } } } }
From source file:ch.admin.isb.hermes5.business.search.SearchEngine.java
License:Apache License
public List<SearchResult> search(String searchInput, String modelIdentifier, String lang) { DirectoryReader directoryReader = null; try {// w ww . j a v a 2s . c o m List<SearchResult> results = new ArrayList<SearchResult>(); String queryString = buildQueryString(searchInput); if (queryString != null) { String searchIndexPath = searchIndexManager.getSearchIndexPath(modelIdentifier, lang); try { directoryReader = DirectoryReader.open(FSDirectory.open(new File(searchIndexPath))); } catch (Exception e) { logger.warn("No index files found at " + searchIndexPath + ". Will try to restore from S3"); searchIndexManager.restoreIndexFilesFromS3(modelIdentifier, lang); directoryReader = DirectoryReader.open(FSDirectory.open(new File(searchIndexPath))); } Analyzer analyzer = analyserRepository.getAnalyzer(lang); IndexSearcher isearcher = new IndexSearcher(directoryReader); Query query = new QueryParser(Version.LUCENE_47, "presentationName", analyzer).parse(queryString); ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; HighlighterWrapper highlighter = highlighterRepository.getHighlighter(analyzer, isearcher, query); for (int i = 0; i < hits.length; i++) { results.add(buildSearchResult(isearcher, highlighter, hits[i].doc)); } } searchLogger.info("<{}> returned {} results", searchInput, results.size()); return results; } catch (Exception e) { logger.warn("An exception occurred during search, empty result will be returned", e); return new ArrayList<SearchResult>(); } finally { try { if (directoryReader != null) { directoryReader.close(); } } catch (IOException e) { logger.debug("unable to close directory reader", e); } } }
From source file:ch.algotrader.rest.index.SecurityIndexer.java
License:Open Source License
public List<SecurityVO> search(String queryStr) throws ParseException { try (IndexReader reader = DirectoryReader.open(index)) { IndexSearcher searcher = new IndexSearcher(reader); QueryParser queryParser = new MultiFieldQueryParser(FIELDS, new StandardAnalyzer()); queryParser.setAllowLeadingWildcard(true); Query query = queryParser.parse(queryStr); TopDocs results = searcher.search(query, 10); return Arrays.asList(results.scoreDocs).stream().map(sd -> searchDocument(searcher, sd)) .mapToLong(d -> d.getField("id").numericValue().longValue()).mapToObj(securityCache::get) .collect(Collectors.toList()); } catch (IOException ioe) { throw new UnrecoverableCoreException("Unexpected I/O error accessing security index", ioe); }// w ww .j ava2 s.com }
From source file:ch.ksfx.web.services.lucene.ObservationSearch.java
License:Open Source License
public void prepare(String allQuery, String scalarValueQuery, Map<String, String> complexValueQuery, Map<String, String> metaDataQuery, Date dateFrom, Date dateTo, String seriesId) { try {// w ww. j a v a 2 s . c o m IndexReader reader = DirectoryReader .open(FSDirectory.open(Paths.get(systemEnvironment.getApplicationIndexfilePath()))); searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); QueryParser parser = new QueryParser("catch_all", analyzer); System.out.println("Complex value query: " + complexValueQuery); String luceneQuery = buildQuery(allQuery, scalarValueQuery, complexValueQuery, metaDataQuery, dateFrom, dateTo, seriesId); System.out.println("Lucene query: " + luceneQuery); query = parser.parse(luceneQuery); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("Error in Lucene query PREPARE"); } }