List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:com.github.alvanson.xltsearch.SearchTask.java
License:Apache License
@Override protected List<SearchResult> call() { DirectoryReader ireader = null;//from w w w. ja va 2s . co m List<SearchResult> results = null; updateMessage("Searching..."); try { ireader = DirectoryReader.open(config.getDirectory()); IndexSearcher isearcher = new IndexSearcher(ireader); isearcher.setSimilarity(config.getSimilarity()); QueryParser parser = new QueryParser(config.getVersion(), config.contentField, config.getAnalyzer()); Query query = parser.parse(qstr); logger.debug("Query: {}", query); ScoreDoc[] hits = isearcher.search(query, limit).scoreDocs; // collect results results = new ArrayList<>(hits.length); for (ScoreDoc hit : hits) { Document document = isearcher.doc(hit.doc); File file = new File(root.getPath() + File.separator + document.get(config.pathField)); String title = document.get(config.titleField); if (title == null) { title = ""; } // report metadata in `details` StringBuilder sb = new StringBuilder(); for (IndexableField field : document.getFields()) { if (field.stringValue() != null) { sb.append(field.name() + ": " + field.stringValue() + '\n'); } } results.add(new SearchResult(file, title, hit.score, sb.toString())); } updateMessage(hits.length + " results"); } catch (IOException ex) { updateMessage("I/O exception"); logger.error("I/O exception while reading index", ex); } catch (ParseException ex) { updateMessage("Parse error"); logger.warn("Parse exception while parsing '{}'", qstr, ex); } // close ireader if (ireader != null) { try { ireader.close(); } catch (IOException ex) { logger.warn("I/O exception while closing index reader", ex); } } return results; }
From source file:com.github.alvanson.xltsearch.SelectTask.java
License:Apache License
private Map<String, String> getHashSums() { Map<String, String> hashSums = new HashMap<>(); DirectoryReader ireader = null;//from ww w .java 2 s . c o m try { if (DirectoryReader.indexExists(config.getDirectory())) { // read hashsums from `directory` ireader = DirectoryReader.open(config.getDirectory()); IndexSearcher isearcher = new IndexSearcher(ireader); Query query = new MatchAllDocsQuery(); ScoreDoc[] hits = isearcher.search(query, ireader.numDocs() + 1).scoreDocs; // collect results for (ScoreDoc hit : hits) { Document document = isearcher.doc(hit.doc); String relPath = document.get(config.pathField); String hashSum = document.get(config.hashSumField); if (relPath != null && hashSum != null) { hashSums.put(relPath, hashSum); } } } // else: return empty map } catch (IOException ex) { logger.error("I/O exception while reading index", ex); } if (ireader != null) { try { ireader.close(); } catch (IOException ex) { logger.warn("I/O exception while closing index reader", ex); } } return hashSums; }
From source file:com.github.le11.nls.lucene.UIMABaseAnalyzerTest.java
License:Apache License
@Test public void baseUIMAAnalyzerIntegrationTest() { try {/*from ww w . j av a2 s . com*/ // add the first doc Document doc = new Document(); doc.add(new Field("title", "this is a dummy title ", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("contents", "there is some content written here", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc, analyzer); writer.commit(); // try the search over the first doc IndexSearcher indexSearcher = new IndexSearcher(writer.getReader()); TopDocs result = indexSearcher.search(new MatchAllDocsQuery("contents"), 10); assertTrue(result.totalHits > 0); Document d = indexSearcher.doc(result.scoreDocs[0].doc); assertNotNull(d); assertNotNull(d.getFieldable("title")); assertNotNull(d.getFieldable("contents")); // add a second doc doc = new Document(); doc.add(new Field("title", "il mio titolo", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("contents", "che cosa e' scritto qui", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc, analyzer); writer.commit(); // do a matchalldocs query to retrieve both docs indexSearcher = new IndexSearcher(writer.getReader()); result = indexSearcher.search(new MatchAllDocsQuery("contents"), 10); assertTrue(result.totalHits > 0); for (ScoreDoc di : result.scoreDocs) { d = indexSearcher.doc(di.doc); assertNotNull(d); assertNotNull(d.getFieldable("title")); assertNotNull(d.getFieldable("contents")); } } catch (Exception e) { fail(e.getLocalizedMessage()); } }
From source file:com.github.le11.nls.lucene.UIMATypeBasedSimilarityTest.java
License:Apache License
@Before public void setUp() throws Exception { dir = new RAMDirectory(); analyzer = new UIMAPayloadsAnalyzer("/HmmTaggerAggregate.xml"); writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_33, analyzer)); Document doc = new Document(); doc.add(new Field("title", "this is a dummy title containing an entity for London", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("contents", "there is some content written here about the british city", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc, analyzer);// w ww.ja va2s . com writer.commit(); // try the search over the first doc IndexSearcher indexSearcher = new IndexSearcher(writer.getReader()); TopDocs result = indexSearcher.search(new MatchAllDocsQuery("contents"), 10); assertTrue(result.totalHits > 0); Document d = indexSearcher.doc(result.scoreDocs[0].doc); assertNotNull(d); assertNotNull(d.getFieldable("title")); assertNotNull(d.getFieldable("contents")); // add a second doc doc = new Document(); doc.add(new Field("title", "some title regarding some article written in English", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("contents", "this is the content of the article about", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc, analyzer); writer.commit(); }
From source file:com.github.mosuka.apache.lucene.example.cmd.SearchCommand.java
License:Apache License
@Override public void execute(Map<String, Object> attrs) { Map<String, Object> responseMap = new LinkedHashMap<String, Object>(); String responseJSON = null;/*from w ww. j a v a 2 s.c o m*/ Directory indexDir = null; IndexReader reader = null; try { String index = (String) attrs.get("index"); String queryStr = (String) attrs.get("query"); indexDir = FSDirectory.open(new File(index).toPath()); QueryParser queryParser = new QueryParser("text", new JapaneseAnalyzer()); Query query = queryParser.parse(queryStr); reader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(query, 10); List<Map<String, Object>> documentList = new LinkedList<Map<String, Object>>(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { Document document = searcher.doc(scoreDoc.doc); Map<String, Object> documentMap = new LinkedHashMap<String, Object>(); for (IndexableField f : document.getFields()) { documentMap.put(f.name(), f.stringValue()); } documentMap.put("score", scoreDoc.score); documentList.add(documentMap); } responseMap.put("status", 0); responseMap.put("message", "OK"); responseMap.put("totalHits", topDocs.totalHits); responseMap.put("maxScore", topDocs.getMaxScore()); responseMap.put("result", documentList); } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } catch (ParseException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } finally { try { if (reader != null) { reader.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } try { if (indexDir != null) { indexDir.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } } try { ObjectMapper mapper = new ObjectMapper(); responseJSON = mapper.writeValueAsString(responseMap); } catch (IOException e) { responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage()); } System.out.println(responseJSON); }
From source file:com.github.msarhan.lucene.ArabicRootExtractorAnalyzerTests.java
License:Open Source License
private void print(IndexSearcher searcher, TopDocs docs) throws IOException { ScoreDoc[] hits = docs.scoreDocs;//ww w .j a v a 2 s .com System.out.println("Found " + hits.length + " hits."); for (ScoreDoc hit : hits) { int docId = hit.doc; Document d = searcher.doc(docId); System.out.println(d.get("number") + "\t" + d.get("title")); } }
From source file:com.github.rnewson.couchdb.lucene.DatabaseIndexer.java
License:Apache License
public void search(final HttpServletRequest req, final HttpServletResponse resp) throws IOException, JSONException { final IndexState state = getState(req, resp); if (state == null) return;/*from w ww. ja va 2 s.c om*/ final IndexSearcher searcher = state.borrowSearcher(isStaleOk(req)); final String etag = state.getEtag(); final FastVectorHighlighter fvh = new FastVectorHighlighter(true, true); final JSONArray result = new JSONArray(); try { if (state.notModified(req)) { resp.setStatus(304); return; } for (final String queryString : getQueryStrings(req)) { final Analyzer analyzer = state.analyzer(req.getParameter("analyzer")); final Operator operator = "and".equalsIgnoreCase(req.getParameter("default_operator")) ? Operator.AND : Operator.OR; final Query q = state.parse(queryString, operator, analyzer); final JSONObject queryRow = new JSONObject(); queryRow.put("q", q.toString()); if (getBooleanParameter(req, "debug")) { queryRow.put("plan", QueryPlan.toPlan(q)); queryRow.put("analyzer", analyzer.getClass()); } queryRow.put("etag", etag); if (getBooleanParameter(req, "rewrite")) { final Query rewritten_q = q.rewrite(searcher.getIndexReader()); queryRow.put("rewritten_q", rewritten_q.toString()); final JSONObject freqs = new JSONObject(); final Set<Term> terms = new HashSet<Term>(); rewritten_q.extractTerms(terms); for (final Object term : terms) { final int freq = searcher.getIndexReader().docFreq((Term) term); freqs.put(term.toString(), freq); } queryRow.put("freqs", freqs); } else { // Perform the search. final TopDocs td; final StopWatch stopWatch = new StopWatch(); final boolean include_docs = getBooleanParameter(req, "include_docs"); final int highlights = getIntParameter(req, "highlights", 0); final int highlight_length = max(getIntParameter(req, "highlight_length", 18), 18); // min for fast term vector highlighter is 18 final boolean include_termvectors = getBooleanParameter(req, "include_termvectors"); final int limit = getIntParameter(req, "limit", ini.getInt("lucene.limit", 25)); final Sort sort = CustomQueryParser.toSort(req.getParameter("sort")); final int skip = getIntParameter(req, "skip", 0); final Set<String> fieldsToLoad; if (req.getParameter("include_fields") == null) { fieldsToLoad = null; } else { final String[] fields = Utils.splitOnCommas(req.getParameter("include_fields")); final List<String> list = Arrays.asList(fields); fieldsToLoad = new HashSet<String>(list); } if (sort == null) { td = searcher.search(q, null, skip + limit); } else { td = searcher.search(q, null, skip + limit, sort); } stopWatch.lap("search"); // Fetch matches (if any). final int max = Math.max(0, Math.min(td.totalHits - skip, limit)); final JSONArray rows = new JSONArray(); final String[] fetch_ids = new String[max]; for (int i = skip; i < skip + max; i++) { final Document doc; if (fieldsToLoad == null) { doc = searcher.doc(td.scoreDocs[i].doc); } else { doc = searcher.doc(td.scoreDocs[i].doc, fieldsToLoad); } final JSONObject row = new JSONObject(); final JSONObject fields = new JSONObject(); final JSONObject highlight_rows = new JSONObject(); // Include stored fields. for (final IndexableField f : doc.getFields()) { if (!f.fieldType().stored()) { continue; } final String name = f.name(); final Object value; if (f.numericValue() != null) { value = f.numericValue(); } else { value = f.stringValue(); } if (value != null) { if ("_id".equals(name)) { row.put("id", value); } else { if (!fields.has(name)) { fields.put(name, value); } else { final Object obj = fields.get(name); if (obj instanceof String || obj instanceof Number) { final JSONArray arr = new JSONArray(); arr.put(obj); arr.put(value); fields.put(name, arr); } else { assert obj instanceof JSONArray; ((JSONArray) obj).put(value); } } if (highlights > 0) { String[] frags = fvh.getBestFragments(fvh.getFieldQuery(q), searcher.getIndexReader(), td.scoreDocs[i].doc, name, highlight_length, highlights); highlight_rows.put(name, frags); } } } } if (!Float.isNaN(td.scoreDocs[i].score)) { row.put("score", td.scoreDocs[i].score); } // Include sort order (if any). if (td instanceof TopFieldDocs) { final FieldDoc fd = (FieldDoc) ((TopFieldDocs) td).scoreDocs[i]; row.put("sort_order", fd.fields); } // Fetch document (if requested). if (include_docs) { fetch_ids[i - skip] = doc.get("_id"); } if (fields.length() > 0) { row.put("fields", fields); } if (highlight_rows.length() > 0) { row.put("highlights", highlight_rows); } rows.put(row); } // Fetch documents (if requested). if (include_docs && fetch_ids.length > 0) { final List<CouchDocument> fetched_docs = database.getDocuments(fetch_ids); for (int j = 0; j < max; j++) { final CouchDocument doc = fetched_docs.get(j); final JSONObject row = doc == null ? new JSONObject("{\"error\":\"not_found\"}") : doc.asJson(); rows.getJSONObject(j).put("doc", row); } } stopWatch.lap("fetch"); queryRow.put("skip", skip); queryRow.put("limit", limit); queryRow.put("total_rows", td.totalHits); queryRow.put("search_duration", stopWatch.getElapsed("search")); queryRow.put("fetch_duration", stopWatch.getElapsed("fetch")); // Include sort info (if requested). if (td instanceof TopFieldDocs) { queryRow.put("sort_order", CustomQueryParser.toJSON(((TopFieldDocs) td).fields)); } queryRow.put("rows", rows); } result.put(queryRow); } } catch (final ParseException e) { ServletUtils.sendJsonError(req, resp, 400, "Bad query syntax: " + e.getMessage()); return; } finally { state.returnSearcher(searcher); } resp.setHeader("ETag", etag); resp.setHeader("Cache-Control", "must-revalidate"); ServletUtils.setResponseContentTypeAndEncoding(req, resp); final Object json = result.length() > 1 ? result : result.getJSONObject(0); final String callback = req.getParameter("callback"); final String body; if (callback != null) { body = String.format("%s(%s)", callback, json); } else { if (json instanceof JSONObject) { final JSONObject obj = (JSONObject) json; body = getBooleanParameter(req, "debug") ? obj.toString(2) : obj.toString(); } else { final JSONArray arr = (JSONArray) json; body = getBooleanParameter(req, "debug") ? arr.toString(2) : arr.toString(); } } final Writer writer = resp.getWriter(); try { writer.write(body); } finally { writer.close(); } }
From source file:com.github.rnewson.couchdb.lucene.SearchRequest.java
License:Apache License
public String execute(final IndexSearcher searcher) throws IOException { // Decline requests over MAX_LIMIT. if (limit > Config.MAX_LIMIT) { return "{\"code\":400,\"body\":\"max limit was exceeded.\"}"; }// ww w .j a va2s. c om // Return "304 - Not Modified" if etag matches. final String etag = getETag(searcher); if (!debug && etag.equals(this.ifNoneMatch)) { return "{\"code\":304}"; } final JSONObject json = new JSONObject(); json.put("q", q.toString()); json.put("etag", etag); if (rewrite_query) { final Query rewritten_q = q.rewrite(searcher.getIndexReader()); json.put("rewritten_q", rewritten_q.toString()); final JSONObject freqs = new JSONObject(); final Set terms = new HashSet(); rewritten_q.extractTerms(terms); for (final Object term : terms) { final int freq = searcher.docFreq((Term) term); freqs.put(term, freq); } json.put("freqs", freqs); } else { // Perform search. final TopDocs td; final StopWatch stopWatch = new StopWatch(); if (sort == null) { td = searcher.search(q, filter, skip + limit); } else { td = searcher.search(q, filter, skip + limit, sort); } stopWatch.lap("search"); // Fetch matches (if any). final int max = max(0, min(td.totalHits - skip, limit)); final JSONArray rows = new JSONArray(); final String[] fetch_ids = new String[max]; for (int i = skip; i < skip + max; i++) { final Document doc = searcher.doc(td.scoreDocs[i].doc); final JSONObject row = new JSONObject(); final JSONObject fields = new JSONObject(); // Include stored fields. for (Object f : doc.getFields()) { Field fld = (Field) f; if (!fld.isStored()) continue; String name = fld.name(); String value = fld.stringValue(); if (value != null) { if (Config.ID.equals(name)) { row.put("id", value); } else { if (!fields.has(name)) { fields.put(name, value); } else { final Object obj = fields.get(name); if (obj instanceof String) { final JSONArray arr = new JSONArray(); arr.add((String) obj); arr.add(value); fields.put(name, arr); } else { assert obj instanceof JSONArray; ((JSONArray) obj).add(value); } } } } } row.put("score", td.scoreDocs[i].score); // Include sort order (if any). if (td instanceof TopFieldDocs) { final FieldDoc fd = (FieldDoc) ((TopFieldDocs) td).scoreDocs[i]; row.put("sort_order", fd.fields); } // Fetch document (if requested). if (include_docs) { fetch_ids[i - skip] = doc.get(Config.ID); } if (fields.size() > 0) { row.put("fields", fields); } rows.add(row); } // Fetch documents (if requested). if (include_docs) { final JSONArray fetched_docs = DB.getDocs(dbname, fetch_ids).getJSONArray("rows"); for (int i = 0; i < max; i++) { rows.getJSONObject(i).put("doc", fetched_docs.getJSONObject(i).getJSONObject("doc")); } } stopWatch.lap("fetch"); json.put("skip", skip); json.put("limit", limit); json.put("total_rows", td.totalHits); json.put("search_duration", stopWatch.getElapsed("search")); json.put("fetch_duration", stopWatch.getElapsed("fetch")); // Include sort info (if requested). if (td instanceof TopFieldDocs) { json.put("sort_order", toString(((TopFieldDocs) td).fields)); } json.put("rows", rows); } final JSONObject result = new JSONObject(); result.put("code", 200); final JSONObject headers = new JSONObject(); headers.put("Cache-Control", "max-age=" + Config.COMMIT_MAX / 1000); // Results can't change unless the IndexReader does. headers.put("ETag", etag); result.put("headers", headers); if (debug) { result.put("body", String.format("<pre>%s</pre>", StringEscapeUtils.escapeHtml(json.toString(2)))); } else { result.put("json", json); } return result.toString(); }
From source file:com.github.tenorviol.gitsearch.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.// w w w . j a v a 2s .c o m * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(IndexSearcher searcher, Query query, int hitsPerPage) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { System.out.print((i + 1) + ". "); Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.print(path); String title = doc.get("title"); if (title != null) { System.out.print(" Title: " + doc.get("title")); } } else { System.out.print("No path for this document"); } System.out.println(" (" + hits[i].score + ")"); } }
From source file:com.github.tteofili.apacheconeu14.oak.search.nls.NLSQueryIndex.java
License:Apache License
@Override public Cursor query(Filter filter, NodeState nodeState) { Thread thread = Thread.currentThread(); ClassLoader loader = thread.getContextClassLoader(); thread.setContextClassLoader(Client.class.getClassLoader()); try {/*ww w. java 2 s . c o m*/ final IndexSearcher searcher = IndexUtils.getSearcher(); if (searcher != null) { Filter.PropertyRestriction nativeQueryRestriction = filter.getPropertyRestriction(NATIVE_NLS_QUERY); String nativeQueryString = String .valueOf(nativeQueryRestriction.first.getValue(nativeQueryRestriction.first.getType())); // build the parse tree of the query and filter the uninteresting part (e.g. "who is the admin" -> "admin") String purgedQuery = pcfg.filterQuestion(nativeQueryString); BooleanQuery booleanClauses = new BooleanQuery(); // add clauses for the purged natural language query (if existing) if (purgedQuery != null) { booleanClauses.add(new BooleanClause(new TermQuery(new Term("jcr:title", purgedQuery)), BooleanClause.Occur.SHOULD)); booleanClauses.add(new BooleanClause(new TermQuery(new Term("jcr:description", purgedQuery)), BooleanClause.Occur.SHOULD)); booleanClauses.add(new BooleanClause(new TermQuery(new Term("text", purgedQuery)), BooleanClause.Occur.SHOULD)); } // infer "class" of the query and boost based on that try { initializeClassifier(searcher); ClassificationResult<BytesRef> result = null; try { result = classifier.assignClass(nativeQueryString); } catch (Exception e) { // do nothing } if (result != null) { booleanClauses.add(new BooleanClause(new BoostedQuery( new TermQuery(new Term("jcr:primaryType", result.getAssignedClass())), new ConstValueSource(2.0f)), BooleanClause.Occur.SHOULD)); } final TopDocs topDocs = searcher.search(booleanClauses, 100); final ScoreDoc[] scoreDocs = topDocs.scoreDocs; return new Cursor() { private int index = 0; @Override public IndexRow next() { final ScoreDoc scoreDoc = scoreDocs[index]; index++; return new IndexRow() { @Override public String getPath() { try { return searcher.doc(scoreDoc.doc).get("path"); } catch (IOException e) { return null; } } @Override public PropertyValue getValue(String s) { try { if ("jcr:score".equals(s)) { PropertyValues.newString(String.valueOf(scoreDoc.score)); } return PropertyValues.newString(searcher.doc(scoreDoc.doc).get(s)); } catch (IOException e) { return null; } } }; } @Override public boolean hasNext() { return index < scoreDocs.length; } @Override public void remove() { } }; } catch (IOException e) { // do nothing } } } finally { thread.setContextClassLoader(loader); } return null; }