List of usage examples for org.apache.lucene.search IndexSearcher search
public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager) throws IOException
From source file:com.nearinfinity.blur.search.SuperQueryTest.java
License:Apache License
@Test public void testConstantScoreTypes() throws Exception { IndexSearcher searcher = createSearcher(); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(wrapSuper(PERSON_NAME, NAME1, ScoreType.CONSTANT), Occur.SHOULD); booleanQuery.add(wrapSuper(ADDRESS_STREET, STREET1, ScoreType.CONSTANT), Occur.MUST); TopDocs topDocs = searcher.search(booleanQuery, 10); assertEquals(3, topDocs.totalHits);/* w w w. j a va 2 s. c o m*/ printTopDocs(topDocs); assertEquals(2.0, topDocs.scoreDocs[0].score, 0.01); assertEquals(2.0, topDocs.scoreDocs[1].score, 0.01); assertEquals(0.5, topDocs.scoreDocs[2].score, 0.01); }
From source file:com.nearinfinity.blur.search.SuperQueryTest.java
License:Apache License
@Test public void testSuperScoreTypes() throws Exception { IndexSearcher searcher = createSearcher(); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(wrapSuper(PERSON_NAME, NAME1, ScoreType.SUPER), Occur.SHOULD); booleanQuery.add(wrapSuper(ADDRESS_STREET, STREET1, ScoreType.SUPER), Occur.MUST); TopDocs topDocs = searcher.search(booleanQuery, 10); assertEquals(3, topDocs.totalHits);//from www . jav a 2s. com printTopDocs(topDocs); assertEquals(3.10, topDocs.scoreDocs[0].score, 0.01); assertEquals(3.00, topDocs.scoreDocs[1].score, 0.01); assertEquals(0.75, topDocs.scoreDocs[2].score, 0.01); }
From source file:com.nearinfinity.blur.search.SuperQueryTest.java
License:Apache License
@Test public void testSuperScoreTypesWithFacet() throws Exception { IndexSearcher searcher = createSearcher(); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(wrapSuper(PERSON_NAME, NAME1, ScoreType.SUPER), Occur.SHOULD); booleanQuery.add(wrapSuper(ADDRESS_STREET, STREET1, ScoreType.SUPER), Occur.MUST); BooleanQuery f1 = new BooleanQuery(); f1.add(new TermQuery(new Term(PERSON_NAME, NAME1)), Occur.MUST); f1.add(new TermQuery(new Term(PERSON_NAME, NAME2)), Occur.MUST); Query[] facets = new Query[] { new SuperQuery(f1, ScoreType.CONSTANT) }; AtomicLongArray counts = new AtomicLongArray(facets.length); FacetQuery query = new FacetQuery(booleanQuery, facets, counts); TopDocs topDocs = searcher.search(query, 10); assertEquals(3, topDocs.totalHits);/*from w w w . j a v a 2 s . c om*/ printTopDocs(topDocs); assertEquals(3.10, topDocs.scoreDocs[0].score, 0.01); assertEquals(3.00, topDocs.scoreDocs[1].score, 0.01); assertEquals(0.75, topDocs.scoreDocs[2].score, 0.01); }
From source file:com.nero.model.SearchFiles.java
License:Apache License
public static void testSearch() { String queryStr = "1"; //This is the directory that hosts the Lucene index IndexReader reader;/*from w w w.ja v a 2s . com*/ try { reader = IndexReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); Term term = new Term("contents", queryStr); TermQuery luceneQuery = new TermQuery(term); TopDocs results; results = searcher.search(luceneQuery, 100); ScoreDoc[] hits = results.scoreDocs; System.out.println(hits.length); for (int i = 0; i < hits.length; i++) { Document doc; doc = searcher.doc(hits[i].doc); System.out.println("File: " + doc.get("contents")); } searcher.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.nero.model.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits./*ww w . ja v a2s .c om*/ * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("zh_name"); if (path != null) { System.out .println((i + 1) + "!!!!!!!!!!!!!!! " + path + "!!!!!!!!!!!!!!! " + doc.get("summary")); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:com.nuvolect.deepdive.lucene.Search.java
/** * Return results for a search along a specific path. If the path is changed or new * create an index.// ww w . j av a 2 s .c o m * @param searchQuery * @param searchPath * @return */ public static JSONObject search(String searchQuery, String volumeId, String searchPath) { JSONObject result = new JSONObject(); JSONArray jsonArray = new JSONArray(); Context ctx = App.getContext(); DirectoryReader ireader = null; ScoreDoc[] scoreDocs = null; String error = ""; preSearch(volumeId, searchPath); try { ireader = DirectoryReader.open(m_directory); } catch (IOException e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); error += e.toString(); } IndexSearcher isearcher = new IndexSearcher(ireader); Query query = null; try { LogUtil.log(LogUtil.LogType.SEARCH, "query: " + searchQuery + ", vid: " + volumeId + ", path: " + searchPath); // Parse a simple query that searches for "text": QueryParser parser = new QueryParser(CConst.FIELD_CONTENT, m_analyzer); query = parser.parse(searchQuery); TopScoreDocCollector collector = TopScoreDocCollector.create(MAX_HITS); isearcher.search(query, collector); scoreDocs = collector.topDocs().scoreDocs; } catch (ParseException | IOException e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); error += e.toString(); } // Iterate through the results creating an object for each file HashMap<String, Integer> hitCounts = new HashMap<>(); HashMap<String, Integer> hitIndexes = new HashMap<>(); /** * First iterate the hit list and count duplicates based on file path. */ for (int ii = 0; scoreDocs != null && ii < scoreDocs.length; ++ii) { Document hitDoc = null; int fileHits = 1; try { hitDoc = isearcher.doc(scoreDocs[ii].doc); Explanation explanation = isearcher.explain(query, scoreDocs[ii].doc); Explanation[] details = explanation.getDetails(); String description = details[0].getDescription(); /** * FIXME, find a better way to count hits in each file */ if (description.contains("=")) { String[] lineParts = description.split("="); String[] elementParts = lineParts[2].split(Pattern.quote(")")); if (elementParts.length > 0) { fileHits = ((int) Double.parseDouble(elementParts[0])); } } } catch (IOException e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); error += e.toString(); } String filePath = hitDoc.get((CConst.FIELD_PATH)); if (hitCounts.containsKey(filePath)) { hitCounts.put(filePath, hitCounts.get(filePath) + fileHits); } else { hitCounts.put(filePath, fileHits); hitIndexes.put(filePath, ii); } } /** * Iterate over each unique hit and save the results */ for (Map.Entry<String, Integer> uniqueHit : hitIndexes.entrySet()) { Document hitDoc = null; try { hitDoc = isearcher.doc(scoreDocs[uniqueHit.getValue()].doc); } catch (IOException e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); error += e.toString(); } String file_name = hitDoc.get((CConst.FIELD_FILENAME)); String file_path = hitDoc.get((CConst.FIELD_PATH)); try { String folder_url = OmniHash.getStartPathUrl(ctx, volumeId, file_path); JSONObject hitObj = new JSONObject(); hitObj.put("volume_id", volumeId); hitObj.put("file_path", file_path); hitObj.put("file_name", file_name); hitObj.put("folder_url", folder_url); hitObj.put("num_hits", hitCounts.get(file_path)); hitObj.put("error", error); jsonArray.put(hitObj); } catch (Exception e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); } } int num_hits = scoreDocs != null ? scoreDocs.length : 0; try { result.put("hits", jsonArray != null ? jsonArray : new JSONArray()); result.put("num_hits", num_hits); result.put("error", error); ireader.close(); m_directory.close(); } catch (JSONException | IOException e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); } return result; }
From source file:com.o19s.solr.swan.highlight.TermVectorFun.java
License:Apache License
@Test public void testBlah() throws IOException { RAMDirectory ramDir = new RAMDirectory(); // Index some made up content IndexWriterConfig iwf = new IndexWriterConfig(Version.LUCENE_47, new StandardAnalyzer(Version.LUCENE_47)); IndexWriter writer = new IndexWriter(ramDir, iwf); FieldType ft = new FieldType(); ft.setIndexed(true);//from www .j av a 2 s .c o m ft.setTokenized(true); ft.setStored(true); ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(true); ft.freeze(); for (int i = 0; i < DOCS.length; i++) { Document doc = new Document(); StringField id = new StringField("id", "doc_" + i, StringField.Store.YES); doc.add(id); // Store both position and offset information Field text = new Field("content", DOCS[i], ft); // Field.Index.ANALYZED, // Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(text); writer.addDocument(doc); } //writer.close(); // Get a searcher AtomicReader dr = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(writer, true)); IndexSearcher searcher = new IndexSearcher(dr); // Do a search using SpanQuery SpanTermQuery fleeceQ = new SpanTermQuery(new Term("content", "fleece")); TopDocs results = searcher.search(fleeceQ, 10); for (int i = 0; i < results.scoreDocs.length; i++) { ScoreDoc scoreDoc = results.scoreDocs[i]; System.out.println("Score Doc: " + scoreDoc); } IndexReader reader = searcher.getIndexReader(); Bits acceptDocs = null; Map<Term, TermContext> termContexts = new HashMap<Term, TermContext>(); Spans spans = fleeceQ.getSpans(dr.getContext(), acceptDocs, termContexts); while (spans.next()) { System.out.println("Doc: " + spans.doc() + " Start: " + spans.start() + " End: " + spans.end()); DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("content"); reader.document(spans.doc(), visitor); Terms terms = reader.getTermVector(spans.doc(), "content"); TermsEnum tenum = terms.iterator(null); // AttributeSource as = tenum.attributes(); while (tenum.next() != null) { System.out.println(tenum.term().utf8ToString()); } for (long pos = 0L; pos < spans.end(); pos++) { // tenum.next(); // if (tenum.ord()<pos) continue; // System.out.println(tenum.term()); // } reader.document(spans.doc(), visitor); // String[] values = visitor.getDocument().getValues("content"); // List<String> a = new ArrayList<String>(); // // build up the window // tvm.start = spans.start() - window; // tvm.end = spans.end() + window; // reader.getTermFreqVector(spans.doc(), "content", tvm); // for (WindowEntry entry : tvm.entries.values()) { // System.out.println("Entry: " + entry); // } // // clear out the entries for the next round // tvm.entries.clear(); } }
From source file:com.openkm.dao.SearchDAO.java
License:Open Source License
/** * Get Lucent document terms./*from ww w. j ava2s .c om*/ */ @SuppressWarnings("unchecked") public List<String> getTerms(Class<?> entityType, String nodeUuid) throws CorruptIndexException, IOException { List<String> terms = new ArrayList<String>(); FullTextSession ftSession = null; IndexSearcher searcher = null; ReaderProvider provider = null; Session session = null; IndexReader reader = null; try { session = HibernateUtil.getSessionFactory().openSession(); ftSession = Search.getFullTextSession(session); SearchFactory sFactory = ftSession.getSearchFactory(); provider = sFactory.getReaderProvider(); QueryBuilder builder = sFactory.buildQueryBuilder().forEntity(entityType).get(); Query query = builder.keyword().onField("uuid").matching(nodeUuid).createQuery(); DirectoryProvider<Directory>[] dirProv = sFactory.getDirectoryProviders(NodeDocument.class); reader = provider.openReader(dirProv[0]); searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(query, 1); for (ScoreDoc sDoc : topDocs.scoreDocs) { if (!reader.isDeleted(sDoc.doc)) { for (TermEnum te = reader.terms(); te.next();) { Term t = te.term(); if ("text".equals(t.field())) { for (TermDocs tds = reader.termDocs(t); tds.next();) { if (sDoc.doc == tds.doc()) { terms.add(t.text()); //log.info("Field: {} - {}", t.field(), t.text()); } } } } } } } finally { if (provider != null && reader != null) { provider.closeReader(reader); } if (searcher != null) { searcher.close(); } HibernateUtil.close(ftSession); HibernateUtil.close(session); } return terms; }
From source file:com.orientechnologies.lucene.test.LuceneVsLuceneTest.java
License:Apache License
@Test public void testLuceneVsLucene() throws IOException, ParseException { InputStream stream = ClassLoader.getSystemResourceAsStream("testLuceneIndex.sql"); databaseDocumentTx.command(new OCommandScript("sql", getScriptFromStream(stream))).execute(); for (ODocument oDocument : databaseDocumentTx.browseClass("Song")) { String title = oDocument.field("title"); if (title != null) { Document d = new Document(); d.add(new Field("title", title, Field.Store.NO, Field.Index.ANALYZED)); indexWriter.addDocument(d);//w ww . j av a 2 s .co m } } indexWriter.close(); IndexReader reader = DirectoryReader.open(getDirectory()); IndexSearcher searcher = new IndexSearcher(reader); Query query = new MultiFieldQueryParser(OLuceneIndexManagerAbstract.LUCENE_VERSION, new String[] { "title" }, new StandardAnalyzer(OLuceneIndexManagerAbstract.LUCENE_VERSION)) .parse("down the"); final TopDocs docs = searcher.search(query, Integer.MAX_VALUE); ScoreDoc[] hits = docs.scoreDocs; List<ODocument> oDocs = databaseDocumentTx .query(new OSQLSynchQuery<ODocument>("select *,$score from Song where title LUCENE \"down the\"")); Assert.assertEquals(oDocs.size(), hits.length); int i = 0; for (ScoreDoc hit : hits) { Assert.assertEquals(oDocs.get(i).field("$score"), hit.score); i++; } reader.close(); }
From source file:com.paladin.action.SearchAction.java
License:Apache License
/** * search using lucene//from w ww .j av a2 s. c om * * @param jsonObject * @param request * @param _query * @throws IOException * @throws ParseException */ private void _search(JSONObject jsonObject, HttpServletRequest request, String _query, String _table) throws IOException, ParseException, InvalidTokenOffsetsException { // Bean ??? final String index_dir = Constants.LUCENE_INDEX_ROOT + _table; IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(index_dir))); QueryParser parser = new QueryParser(Version.LUCENE_33, INDEX_FIELDS, new IKAnalyzer(false)); TopScoreDocCollector collector = TopScoreDocCollector.create(10000, true); for (String key : _query.split(" ")) { Query query = parser.parse(key); searcher.search(query, collector); // IKSimilarity searcher.setSimilarity(new IKSimilarity()); // int size = collector.getTotalHits(); total_pages = (size + Constants.NUM_PER_PAGE_SEARCH - 1) / Constants.NUM_PER_PAGE_SEARCH; curr_page_number = getCurrentPage(request, 1, total_pages); // ? first_page = curr_page_number - 5 > 0 ? curr_page_number - 5 : 1; last_page = first_page + 10 >= total_pages ? total_pages : first_page + 10; // ? int begin = (curr_page_number - 1) * Constants.NUM_PER_PAGE_SEARCH; ScoreDoc[] score_docs = collector.topDocs(begin, Constants.NUM_PER_PAGE_SEARCH).scoreDocs; List<Document> doc_list = new ArrayList<Document>(); for (ScoreDoc score_doc : score_docs) doc_list.add(searcher.doc(score_doc.doc)); List<Map<String, String>> blog_list = getBlogListFromDocList(query, doc_list); jsonObject.put(_table + "_list", blog_list); jsonObject.put("p_start_" + _table, first_page); jsonObject.put("p_end_" + _table, last_page); jsonObject.put("curr_page_" + _table, curr_page_number); jsonObject.put("total_page_" + _table, total_pages); jsonObject.put("total_count_" + _table, size); } }