List of usage examples for org.apache.lucene.search IndexSearcher search
public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager) throws IOException
From source file:buscador.SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);// www . j av a 2s.co m } String index = "Zaguan1"; String[] fields = { "title", "description", "identifier", "date", "creator" }; BooleanClause.Occur[] flags = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD }; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new SpanishAnalyzer(Version.LATEST); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = MultiFieldQueryParser.parse(line, fields, flags, analyzer); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:buscador.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.//from ww w .j a v a 2 s. com * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); } else { System.out.println((i + 1) + ". " + "No path for this document"); } String modified = doc.get("modified"); if (modified != null) { System.out.println(modified + ": " + new Date(new Long(modified))); } else { System.out.println((i + 1) + ". " + "No last modified for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:ca.mcgill.cs.creco.logic.search.CategorySearch.java
License:Apache License
@Override public List<Category> queryCategories(String pQueryString) { List<Category> searchResult = new ArrayList<Category>(); try {//from w w w .j ava2 s . co m DirectoryReader reader = DirectoryReader.open(aDirectory); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector results = TopScoreDocCollector.create(MAX_NUM_RESULTS, true); // Search category names Query categoryNameQuery = new QueryParser(VERSION, CATEGORY_NAME, aAnalyzer).parse(pQueryString); searcher.search(categoryNameQuery, results); // Search flattened text (only product names for now) Query flattenedTextQuery = new QueryParser(VERSION, FLATTENED_TEXT, aAnalyzer).parse(pQueryString); searcher.search(flattenedTextQuery, results); for (ScoreDoc scoredResult : results.topDocs().scoreDocs) { Document doc = searcher.doc(scoredResult.doc); Category resultCategory = aDataStore.getCategory(doc.get(CATEGORY_ID)); if (!searchResult.contains(resultCategory) && resultCategory.getNumberOfProducts() > 0) { searchResult.add(resultCategory); } } } catch (IOException e) { LOG.error(e.getMessage()); } catch (ParseException e) { LOG.error(e.getMessage()); } return searchResult; }
From source file:ca.pgon.freenetknowledge.search.impl.LuceneSearchEngine.java
License:Apache License
@Override public List<SearchResultEntry> searchTerm(String term) { IndexSearcher indexSearcher = null; try {//from w ww . j a v a2 s . c om // Init the needed components IndexReader indexReader = IndexReader.open(directory); indexSearcher = new IndexSearcher(indexReader); QueryParser queryParser = new QueryParser(LUCENE_VERSION, INDEX_CONTENT, analyzer); // Create the query Query query; query = queryParser.parse(term); // Get the search result TopDocs topDocs = indexSearcher.search(query, LUCENE_MAX_HITS); ScoreDoc[] scoreDocs = topDocs.scoreDocs; // Change them to urls Map<String, SearchResultEntry> alreadyIn = new HashMap<>(); List<SearchResultEntry> results = new ArrayList<>(); for (ScoreDoc sd : scoreDocs) { // Get the url Document document = indexSearcher.doc(sd.doc); String urlId = document.get(INDEX_FOR_URL); SearchResultEntry sre; if (alreadyIn.containsKey(urlId)) { sre = alreadyIn.get(urlId); } else { sre = new SearchResultEntry(); UrlEntity ue = urlDAO.get(Integer.valueOf(urlId)); sre.urlEntity = ue; if (ue == null) { continue; } if (ue.isError()) { continue; } alreadyIn.put(urlId, sre); results.add(sre); } // Add the description String fullDescription = document.get(INDEX_CONTENT); String partialDescription = SearchTools.getPartAround(fullDescription, SearchTools.findWordPosition(fullDescription, term), LUCENE_MAX_DESCRIPTION_CARACTERS); partialDescription = partialDescription.replace('\n', ' '); partialDescription = partialDescription.replace('\r', ' '); if (!sre.description.contains(partialDescription)) { sre.description.add(partialDescription); } } return results; } catch (ParseException e) { logger.log(Level.SEVERE, "Error while parsing the search term", e); } catch (IOException e) { logger.log(Level.SEVERE, "Error while searching", e); } finally { if (indexSearcher != null) { try { indexSearcher.close(); } catch (IOException e) { } } } return null; }
From source file:calliope.search.AeseSearch.java
License:Open Source License
/** * Search the index for the given expression * @param expr the expression to be parsed * @param langCode the language of the expression and index * @param profile the hit profile (where to start from etc) * @return the result docs/*from ww w.jav a 2 s .c o m*/ */ public static String searchIndex(String expr, String langCode, HitProfile profile) { StringBuilder sb = new StringBuilder(); try { Analyzer analyzer = AeseSearch.createAnalyzer(langCode); DirectoryReader reader = DirectoryReader.open(AeseSearch.index); if (reader != null) { IndexSearcher searcher = new IndexSearcher(reader); QueryParser qp = new QueryParser(Version.LUCENE_45, "text", analyzer); Query q = qp.parse(expr); TopDocs hits = searcher.search(q, AeseSearch.maxHits); ScoreDoc[] docs = hits.scoreDocs; for (int j = profile.from; j < profile.to && j < docs.length; j++) { Document doc = searcher.doc(docs[j].doc); String vid = doc.get(LuceneFields.VID); String docID = doc.get(LuceneFields.DOCID); Highlighter h = new Highlighter(new QueryScorer(q)); String text = getCorTexVersion(docID, vid); sb.append(formatDocID(docID)); sb.append(" "); sb.append(formatVersionID(vid)); sb.append(" "); String frag = h.getBestFragment(analyzer, "text", text); sb.append("<span class=\"found\">"); sb.append(frag); sb.append("</span>\n"); } profile.numHits = docs.length; } reader.close(); } catch (Exception e) { sb.append(e.getMessage()); } return sb.toString(); }
From source file:cc.pp.analyzer.ik.demo.IKAnalyzerDemo.java
License:Apache License
public static void main(String[] args) { //Lucene Document?? String fieldName = "text"; ///*w w w . j a va 2s.c o m*/ String text = "IK Analyzer???????"; //IKAnalyzer? Analyzer analyzer = new IKAnalyzer(Version.LUCENE_48, true); Directory directory = null; IndexWriter iwriter = null; DirectoryReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); //?IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_48, analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc = new Document(); doc.add(new LongField("ID", 1000, Field.Store.YES)); doc.add(new TextField(fieldName, text, Field.Store.YES)); iwriter.addDocument(doc); iwriter.close(); //?********************************** //? ireader = DirectoryReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = "?"; // String keyword = ""; //QueryParser?Query QueryParser qp = new QueryParser(Version.LUCENE_48, fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); System.out.println("Query = " + query); //?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:ccc.plugins.search.lucene.SimpleLuceneFS.java
License:Open Source License
private void similar(final String uuid, final int maxHits, final CapturingHandler ch) { if (uuid == null) { return;// www .j a v a 2 s .c o m } IndexReader ir = null; IndexSearcher searcher = null; try { ir = IndexReader.open(createDirectory()); searcher = new IndexSearcher(ir); final int docNum = docNumber(uuid, searcher); if (docNum == -1) { return; } final MoreLikeThis mlt = new MoreLikeThis(ir); mlt.setFieldNames(new String[] { DEFAULT_FIELD }); mlt.setMinDocFreq(2); final Query query = mlt.like(docNum); ch.handle(searcher, searcher.search(query, maxHits)); } catch (final IOException e) { LOG.warn("Error performing query.", e); } finally { if (searcher != null) { try { searcher.close(); } catch (final IOException e) { Exceptions.swallow(e); } } if (ir != null) { try { ir.close(); } catch (final IOException e) { Exceptions.swallow(e); } } } }
From source file:ccc.plugins.search.lucene.SimpleLuceneFS.java
License:Open Source License
/** * Retrieves lucene document for given page. * * @param uuid UUID to search./*from w w w. j a v a 2s . co m*/ * @param searcher IndexSearcher object. * @return Document number. * @throws IOException If search fails */ private int docNumber(final String uuid, final IndexSearcher searcher) throws IOException { final Query q = new TermQuery(new Term("id", uuid)); final TopDocs hits = searcher.search(q, 1); if (hits.scoreDocs.length < 1) { return -1; } return hits.scoreDocs[0].doc; }
From source file:ch.algotrader.rest.index.SecurityIndexer.java
License:Open Source License
public List<SecurityVO> search(String queryStr) throws ParseException { try (IndexReader reader = DirectoryReader.open(index)) { IndexSearcher searcher = new IndexSearcher(reader); QueryParser queryParser = new MultiFieldQueryParser(FIELDS, new StandardAnalyzer()); queryParser.setAllowLeadingWildcard(true); Query query = queryParser.parse(queryStr); TopDocs results = searcher.search(query, 10); return Arrays.asList(results.scoreDocs).stream().map(sd -> searchDocument(searcher, sd)) .mapToLong(d -> d.getField("id").numericValue().longValue()).mapToObj(securityCache::get) .collect(Collectors.toList()); } catch (IOException ioe) { throw new UnrecoverableCoreException("Unexpected I/O error accessing security index", ioe); }// ww w .j a v a 2 s . co m }
From source file:ch.sentric.hbase.prospective.Percolator.java
License:Apache License
/** * Tries to find a set of queries that match the given document. * * @param doc// w ww . j ava2 s. co m * the Lucene document * @return the matching queries * @throws IOException * if an I/O error occurs */ public Response<T> percolate(final Document doc, final Map<T, Query> queries) throws IOException { // first, parse the source doc into a MemoryIndex final MemoryIndex memoryIndex = new MemoryIndex(); for (final Fieldable field : doc.getFields()) { if (!field.isIndexed()) { continue; } final TokenStream tokenStream = field.tokenStreamValue(); if (tokenStream != null) { memoryIndex.addField(field.name(), tokenStream, field.getBoost()); } else { final Reader reader = field.readerValue(); if (reader != null) { memoryIndex.addField(field.name(), analyzer.reusableTokenStream(field.name(), reader), field.getBoost()); } else { final String value = field.stringValue(); if (value != null) { memoryIndex.addField(field.name(), analyzer.reusableTokenStream(field.name(), new CharSequenceReader(value)), field.getBoost()); } } } } // do the search final IndexSearcher searcher = memoryIndex.createSearcher(); final Map<T, Query> matches = new HashMap<T, Query>(0); if (queries != null && !queries.isEmpty()) { final ExistsCollector collector = new ExistsCollector(); for (final Map.Entry<T, Query> entry : queries.entrySet()) { collector.reset(); searcher.search(entry.getValue(), collector); if (collector.exists()) { matches.put(entry.getKey(), entry.getValue()); } } } return new Response<T>(matches); }