List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:buscador.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits./*from ww w.ja v a2 s . c om*/ * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); } else { System.out.println((i + 1) + ". " + "No path for this document"); } String modified = doc.get("modified"); if (modified != null) { System.out.println(modified + ": " + new Date(new Long(modified))); } else { System.out.println((i + 1) + ". " + "No last modified for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:bzh.terrevirtuelle.navisu.gazetteer.impl.lucene.GeoNameResolver.java
License:Apache License
private void getMatchingCandidates(IndexSearcher searcher, HashMap<String, List<Location>> allCandidates, String name, ScoreDoc[] hits) { List<Location> topHits = new ArrayList<Location>(); for (int i = 0; i < hits.length; ++i) { Location tmpLocObj = new Location(); int docId = hits[i].doc; Document d;/*from w w w. ja v a 2s .c o m*/ try { d = searcher.doc(docId); tmpLocObj.setName(d.get(FIELD_NAME_NAME)); tmpLocObj.setLongitude(d.get(FIELD_NAME_LONGITUDE)); tmpLocObj.setLatitude(d.get(FIELD_NAME_LATITUDE)); //If alternate names are empty put name as actual name //This covers missing data and equals weight for later computation if (d.get(FIELD_NAME_ALTERNATE_NAMES).isEmpty()) { tmpLocObj.setAlternateNames(d.get(FIELD_NAME_NAME)); } else { tmpLocObj.setAlternateNames(d.get(FIELD_NAME_ALTERNATE_NAMES)); } tmpLocObj.setCountryCode(d.get(FIELD_NAME_COUNTRY_CODE)); tmpLocObj.setAdmin1Code(d.get(FIELD_NAME_ADMIN1_CODE)); tmpLocObj.setAdmin2Code(d.get(FIELD_NAME_ADMIN2_CODE)); tmpLocObj.setFeatureCode(d.get(FIELD_NAME_FEATURE_CODE)); } catch (IOException e) { e.printStackTrace(); } topHits.add(tmpLocObj); } //Picking hitsPerPage number of locations from feature code sorted list allCandidates.put(name, pickTopSortedByCode(topHits, hitsPerPage)); }
From source file:ca.mcgill.cs.creco.logic.search.CategorySearch.java
License:Apache License
@Override public List<Category> queryCategories(String pQueryString) { List<Category> searchResult = new ArrayList<Category>(); try {/*from w ww.j a va 2s. c o m*/ DirectoryReader reader = DirectoryReader.open(aDirectory); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector results = TopScoreDocCollector.create(MAX_NUM_RESULTS, true); // Search category names Query categoryNameQuery = new QueryParser(VERSION, CATEGORY_NAME, aAnalyzer).parse(pQueryString); searcher.search(categoryNameQuery, results); // Search flattened text (only product names for now) Query flattenedTextQuery = new QueryParser(VERSION, FLATTENED_TEXT, aAnalyzer).parse(pQueryString); searcher.search(flattenedTextQuery, results); for (ScoreDoc scoredResult : results.topDocs().scoreDocs) { Document doc = searcher.doc(scoredResult.doc); Category resultCategory = aDataStore.getCategory(doc.get(CATEGORY_ID)); if (!searchResult.contains(resultCategory) && resultCategory.getNumberOfProducts() > 0) { searchResult.add(resultCategory); } } } catch (IOException e) { LOG.error(e.getMessage()); } catch (ParseException e) { LOG.error(e.getMessage()); } return searchResult; }
From source file:ca.pgon.freenetknowledge.search.impl.LuceneSearchEngine.java
License:Apache License
@Override public List<SearchResultEntry> searchTerm(String term) { IndexSearcher indexSearcher = null; try {/*w w w . j a v a 2 s . c o m*/ // Init the needed components IndexReader indexReader = IndexReader.open(directory); indexSearcher = new IndexSearcher(indexReader); QueryParser queryParser = new QueryParser(LUCENE_VERSION, INDEX_CONTENT, analyzer); // Create the query Query query; query = queryParser.parse(term); // Get the search result TopDocs topDocs = indexSearcher.search(query, LUCENE_MAX_HITS); ScoreDoc[] scoreDocs = topDocs.scoreDocs; // Change them to urls Map<String, SearchResultEntry> alreadyIn = new HashMap<>(); List<SearchResultEntry> results = new ArrayList<>(); for (ScoreDoc sd : scoreDocs) { // Get the url Document document = indexSearcher.doc(sd.doc); String urlId = document.get(INDEX_FOR_URL); SearchResultEntry sre; if (alreadyIn.containsKey(urlId)) { sre = alreadyIn.get(urlId); } else { sre = new SearchResultEntry(); UrlEntity ue = urlDAO.get(Integer.valueOf(urlId)); sre.urlEntity = ue; if (ue == null) { continue; } if (ue.isError()) { continue; } alreadyIn.put(urlId, sre); results.add(sre); } // Add the description String fullDescription = document.get(INDEX_CONTENT); String partialDescription = SearchTools.getPartAround(fullDescription, SearchTools.findWordPosition(fullDescription, term), LUCENE_MAX_DESCRIPTION_CARACTERS); partialDescription = partialDescription.replace('\n', ' '); partialDescription = partialDescription.replace('\r', ' '); if (!sre.description.contains(partialDescription)) { sre.description.add(partialDescription); } } return results; } catch (ParseException e) { logger.log(Level.SEVERE, "Error while parsing the search term", e); } catch (IOException e) { logger.log(Level.SEVERE, "Error while searching", e); } finally { if (indexSearcher != null) { try { indexSearcher.close(); } catch (IOException e) { } } } return null; }
From source file:calliope.search.AeseSearch.java
License:Open Source License
/** * Search the index for the given expression * @param expr the expression to be parsed * @param langCode the language of the expression and index * @param profile the hit profile (where to start from etc) * @return the result docs/*from www . j a va 2 s .c om*/ */ public static String searchIndex(String expr, String langCode, HitProfile profile) { StringBuilder sb = new StringBuilder(); try { Analyzer analyzer = AeseSearch.createAnalyzer(langCode); DirectoryReader reader = DirectoryReader.open(AeseSearch.index); if (reader != null) { IndexSearcher searcher = new IndexSearcher(reader); QueryParser qp = new QueryParser(Version.LUCENE_45, "text", analyzer); Query q = qp.parse(expr); TopDocs hits = searcher.search(q, AeseSearch.maxHits); ScoreDoc[] docs = hits.scoreDocs; for (int j = profile.from; j < profile.to && j < docs.length; j++) { Document doc = searcher.doc(docs[j].doc); String vid = doc.get(LuceneFields.VID); String docID = doc.get(LuceneFields.DOCID); Highlighter h = new Highlighter(new QueryScorer(q)); String text = getCorTexVersion(docID, vid); sb.append(formatDocID(docID)); sb.append(" "); sb.append(formatVersionID(vid)); sb.append(" "); String frag = h.getBestFragment(analyzer, "text", text); sb.append("<span class=\"found\">"); sb.append(frag); sb.append("</span>\n"); } profile.numHits = docs.length; } reader.close(); } catch (Exception e) { sb.append(e.getMessage()); } return sb.toString(); }
From source file:cc.pp.analyzer.ik.demo.IKAnalyzerDemo.java
License:Apache License
public static void main(String[] args) { //Lucene Document?? String fieldName = "text"; ////from w ww . ja v a 2 s. co m String text = "IK Analyzer???????"; //IKAnalyzer? Analyzer analyzer = new IKAnalyzer(Version.LUCENE_48, true); Directory directory = null; IndexWriter iwriter = null; DirectoryReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); //?IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_48, analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc = new Document(); doc.add(new LongField("ID", 1000, Field.Store.YES)); doc.add(new TextField(fieldName, text, Field.Store.YES)); iwriter.addDocument(doc); iwriter.close(); //?********************************** //? ireader = DirectoryReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = "?"; // String keyword = ""; //QueryParser?Query QueryParser qp = new QueryParser(Version.LUCENE_48, fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); System.out.println("Query = " + query); //?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:cc.twittertools.search.local.RunQueries.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(// ww w . j a v a2 s . c o m OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return") .create(NUM_RESULTS_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("file containing topics in TREC format").create(QUERIES_OPTION)); options.addOption(OptionBuilder.withArgName("similarity").hasArg() .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION)); options.addOption(new Option(VERBOSE_OPTION, "print out complete document")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(RunQueries.class.getName(), options); System.exit(-1); } File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION)); if (!indexLocation.exists()) { System.err.println("Error: " + indexLocation + " does not exist!"); System.exit(-1); } String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG; String topicsFile = cmdline.getOptionValue(QUERIES_OPTION); int numResults = 1000; try { if (cmdline.hasOption(NUM_RESULTS_OPTION)) { numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION)); } } catch (NumberFormatException e) { System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION)); System.exit(-1); } String similarity = "LM"; if (cmdline.hasOption(SIMILARITY_OPTION)) { similarity = cmdline.getOptionValue(SIMILARITY_OPTION); } boolean verbose = cmdline.hasOption(VERBOSE_OPTION); PrintStream out = new PrintStream(System.out, true, "UTF-8"); IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation)); IndexSearcher searcher = new IndexSearcher(reader); if (similarity.equalsIgnoreCase("BM25")) { searcher.setSimilarity(new BM25Similarity()); } else if (similarity.equalsIgnoreCase("LM")) { searcher.setSimilarity(new LMDirichletSimilarity(2500.0f)); } QueryParser p = new QueryParser(Version.LUCENE_43, StatusField.TEXT.name, IndexStatuses.ANALYZER); TrecTopicSet topics = TrecTopicSet.fromFile(new File(topicsFile)); for (TrecTopic topic : topics) { Query query = p.parse(topic.getQuery()); Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, topic.getQueryTweetTime(), true, true); TopDocs rs = searcher.search(query, filter, numResults); int i = 1; for (ScoreDoc scoreDoc : rs.scoreDocs) { Document hit = searcher.doc(scoreDoc.doc); out.println(String.format("%s Q0 %s %d %f %s", topic.getId(), hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag)); if (verbose) { out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " ")); } i++; } } reader.close(); out.close(); }
From source file:cc.twittertools.search.local.SearchStatuses.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(//from w ww. j av a 2 s .c o m OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("query id").create(QID_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("query text").create(QUERY_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("maxid").create(MAX_ID_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return") .create(NUM_RESULTS_OPTION)); options.addOption(OptionBuilder.withArgName("similarity").hasArg() .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION)); options.addOption(new Option(VERBOSE_OPTION, "print out complete document")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(QUERY_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(SearchStatuses.class.getName(), options); System.exit(-1); } File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION)); if (!indexLocation.exists()) { System.err.println("Error: " + indexLocation + " does not exist!"); System.exit(-1); } String qid = cmdline.hasOption(QID_OPTION) ? cmdline.getOptionValue(QID_OPTION) : DEFAULT_QID; String queryText = cmdline.hasOption(QUERY_OPTION) ? cmdline.getOptionValue(QUERY_OPTION) : DEFAULT_Q; String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG; long maxId = cmdline.hasOption(MAX_ID_OPTION) ? Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION)) : DEFAULT_MAX_ID; int numResults = cmdline.hasOption(NUM_RESULTS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION)) : DEFAULT_NUM_RESULTS; boolean verbose = cmdline.hasOption(VERBOSE_OPTION); String similarity = "LM"; if (cmdline.hasOption(SIMILARITY_OPTION)) { similarity = cmdline.getOptionValue(SIMILARITY_OPTION); } PrintStream out = new PrintStream(System.out, true, "UTF-8"); IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation)); IndexSearcher searcher = new IndexSearcher(reader); if (similarity.equalsIgnoreCase("BM25")) { searcher.setSimilarity(new BM25Similarity()); } else if (similarity.equalsIgnoreCase("LM")) { searcher.setSimilarity(new LMDirichletSimilarity(2500.0f)); } QueryParser p = new QueryParser(Version.LUCENE_43, IndexStatuses.StatusField.TEXT.name, IndexStatuses.ANALYZER); Query query = p.parse(queryText); Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, maxId, true, true); TopDocs rs = searcher.search(query, filter, numResults); int i = 1; for (ScoreDoc scoreDoc : rs.scoreDocs) { Document hit = searcher.doc(scoreDoc.doc); out.println(String.format("%s Q0 %s %d %f %s", qid, hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag)); if (verbose) { out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " ")); } i++; } reader.close(); out.close(); }
From source file:ccc.plugins.search.lucene.CapturingHandler.java
License:Open Source License
/** * Determine the CCC resource id for a search hit. * * @param searcher The lucene index searcher. * @param docId The lucene document id.//from w ww . j a va 2 s . co m * * @return The CCC resource id. * * @throws IOException If an error occurs while reading the index. */ protected UUID lookupResourceId(final IndexSearcher searcher, final int docId) throws IOException { return UUID.fromString(searcher.doc(docId).getField("id").stringValue()); }
From source file:ch.admin.isb.hermes5.business.search.SearchEngine.java
License:Apache License
private SearchResult buildSearchResult(IndexSearcher isearcher, HighlighterWrapper highlighter, int id) throws IOException, InvalidTokenOffsetsException { Document hitDoc = isearcher.doc(id); String presentationName = hitDoc.getField("presentationName").stringValue(); SearchResultType type = SearchResultType.valueOf(hitDoc.getField("type").stringValue()); String name = hitDoc.getField("name").stringValue(); String fragment = highlighter.getHighlightedText(id, "content"); return new SearchResult(presentationName, name, fragment, type); }