List of usage examples for org.apache.lucene.search.vectorhighlight FastVectorHighlighter FastVectorHighlighter
public FastVectorHighlighter(boolean phraseHighlight, boolean fieldMatch)
From source file:com.github.rnewson.couchdb.lucene.DatabaseIndexer.java
License:Apache License
public void search(final HttpServletRequest req, final HttpServletResponse resp) throws IOException, JSONException { final IndexState state = getState(req, resp); if (state == null) return;//w w w . j av a 2s . c o m final IndexSearcher searcher = state.borrowSearcher(isStaleOk(req)); final String etag = state.getEtag(); final FastVectorHighlighter fvh = new FastVectorHighlighter(true, true); final JSONArray result = new JSONArray(); try { if (state.notModified(req)) { resp.setStatus(304); return; } for (final String queryString : getQueryStrings(req)) { final Analyzer analyzer = state.analyzer(req.getParameter("analyzer")); final Operator operator = "and".equalsIgnoreCase(req.getParameter("default_operator")) ? Operator.AND : Operator.OR; final Query q = state.parse(queryString, operator, analyzer); final JSONObject queryRow = new JSONObject(); queryRow.put("q", q.toString()); if (getBooleanParameter(req, "debug")) { queryRow.put("plan", QueryPlan.toPlan(q)); queryRow.put("analyzer", analyzer.getClass()); } queryRow.put("etag", etag); if (getBooleanParameter(req, "rewrite")) { final Query rewritten_q = q.rewrite(searcher.getIndexReader()); queryRow.put("rewritten_q", rewritten_q.toString()); final JSONObject freqs = new JSONObject(); final Set<Term> terms = new HashSet<Term>(); rewritten_q.extractTerms(terms); for (final Object term : terms) { final int freq = searcher.getIndexReader().docFreq((Term) term); freqs.put(term.toString(), freq); } queryRow.put("freqs", freqs); } else { // Perform the search. final TopDocs td; final StopWatch stopWatch = new StopWatch(); final boolean include_docs = getBooleanParameter(req, "include_docs"); final int highlights = getIntParameter(req, "highlights", 0); final int highlight_length = max(getIntParameter(req, "highlight_length", 18), 18); // min for fast term vector highlighter is 18 final boolean include_termvectors = getBooleanParameter(req, "include_termvectors"); final int limit = getIntParameter(req, "limit", ini.getInt("lucene.limit", 25)); final Sort sort = CustomQueryParser.toSort(req.getParameter("sort")); final int skip = getIntParameter(req, "skip", 0); final Set<String> fieldsToLoad; if (req.getParameter("include_fields") == null) { fieldsToLoad = null; } else { final String[] fields = Utils.splitOnCommas(req.getParameter("include_fields")); final List<String> list = Arrays.asList(fields); fieldsToLoad = new HashSet<String>(list); } if (sort == null) { td = searcher.search(q, null, skip + limit); } else { td = searcher.search(q, null, skip + limit, sort); } stopWatch.lap("search"); // Fetch matches (if any). final int max = Math.max(0, Math.min(td.totalHits - skip, limit)); final JSONArray rows = new JSONArray(); final String[] fetch_ids = new String[max]; for (int i = skip; i < skip + max; i++) { final Document doc; if (fieldsToLoad == null) { doc = searcher.doc(td.scoreDocs[i].doc); } else { doc = searcher.doc(td.scoreDocs[i].doc, fieldsToLoad); } final JSONObject row = new JSONObject(); final JSONObject fields = new JSONObject(); final JSONObject highlight_rows = new JSONObject(); // Include stored fields. for (final IndexableField f : doc.getFields()) { if (!f.fieldType().stored()) { continue; } final String name = f.name(); final Object value; if (f.numericValue() != null) { value = f.numericValue(); } else { value = f.stringValue(); } if (value != null) { if ("_id".equals(name)) { row.put("id", value); } else { if (!fields.has(name)) { fields.put(name, value); } else { final Object obj = fields.get(name); if (obj instanceof String || obj instanceof Number) { final JSONArray arr = new JSONArray(); arr.put(obj); arr.put(value); fields.put(name, arr); } else { assert obj instanceof JSONArray; ((JSONArray) obj).put(value); } } if (highlights > 0) { String[] frags = fvh.getBestFragments(fvh.getFieldQuery(q), searcher.getIndexReader(), td.scoreDocs[i].doc, name, highlight_length, highlights); highlight_rows.put(name, frags); } } } } if (!Float.isNaN(td.scoreDocs[i].score)) { row.put("score", td.scoreDocs[i].score); } // Include sort order (if any). if (td instanceof TopFieldDocs) { final FieldDoc fd = (FieldDoc) ((TopFieldDocs) td).scoreDocs[i]; row.put("sort_order", fd.fields); } // Fetch document (if requested). if (include_docs) { fetch_ids[i - skip] = doc.get("_id"); } if (fields.length() > 0) { row.put("fields", fields); } if (highlight_rows.length() > 0) { row.put("highlights", highlight_rows); } rows.put(row); } // Fetch documents (if requested). if (include_docs && fetch_ids.length > 0) { final List<CouchDocument> fetched_docs = database.getDocuments(fetch_ids); for (int j = 0; j < max; j++) { final CouchDocument doc = fetched_docs.get(j); final JSONObject row = doc == null ? new JSONObject("{\"error\":\"not_found\"}") : doc.asJson(); rows.getJSONObject(j).put("doc", row); } } stopWatch.lap("fetch"); queryRow.put("skip", skip); queryRow.put("limit", limit); queryRow.put("total_rows", td.totalHits); queryRow.put("search_duration", stopWatch.getElapsed("search")); queryRow.put("fetch_duration", stopWatch.getElapsed("fetch")); // Include sort info (if requested). if (td instanceof TopFieldDocs) { queryRow.put("sort_order", CustomQueryParser.toJSON(((TopFieldDocs) td).fields)); } queryRow.put("rows", rows); } result.put(queryRow); } } catch (final ParseException e) { ServletUtils.sendJsonError(req, resp, 400, "Bad query syntax: " + e.getMessage()); return; } finally { state.returnSearcher(searcher); } resp.setHeader("ETag", etag); resp.setHeader("Cache-Control", "must-revalidate"); ServletUtils.setResponseContentTypeAndEncoding(req, resp); final Object json = result.length() > 1 ? result : result.getJSONObject(0); final String callback = req.getParameter("callback"); final String body; if (callback != null) { body = String.format("%s(%s)", callback, json); } else { if (json instanceof JSONObject) { final JSONObject obj = (JSONObject) json; body = getBooleanParameter(req, "debug") ? obj.toString(2) : obj.toString(); } else { final JSONArray arr = (JSONArray) json; body = getBooleanParameter(req, "debug") ? arr.toString(2) : arr.toString(); } } final Writer writer = resp.getWriter(); try { writer.write(body); } finally { writer.close(); } }
From source file:org.apache.solr.handler.component.AlfrescoSolrHighlighter.java
License:Open Source License
/** * Generates a list of Highlighted query fragments for each item in a list * of documents, or returns null if highlighting is disabled. * * @param docs/*from ww w . java 2 s . c o m*/ * query results * @param query * the query * @param req * the current request * @param defaultFields * default list of fields to summarize * * @return NamedList containing a NamedList for each document, which in * turns contains sets (field, summary) pairs. */ @Override @SuppressWarnings("unchecked") public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { SolrParams params = req.getParams(); if (!isHighlightingEnabled(params)) // also returns early if no unique // key field return null; boolean rewrite = query != null && !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true")) && Boolean.valueOf(params.get(HighlightParams.HIGHLIGHT_MULTI_TERM, "true"))); if (rewrite) { query = query.rewrite(req.getSearcher().getIndexReader()); } SolrIndexSearcher searcher = req.getSearcher(); IndexSchema schema = searcher.getSchema(); // fetch unique key if one exists. SchemaField keyField = schema.getUniqueKeyField(); if (keyField == null) { return null;// exit early; we need a unique key field to populate // the response } String[] fieldNames = getHighlightFields(query, req, defaultFields); Set<String> preFetchFieldNames = getDocPrefetchFieldNames(fieldNames, req); if (preFetchFieldNames != null) { preFetchFieldNames.add(keyField.getName()); } FastVectorHighlighter fvh = null; // lazy FieldQuery fvhFieldQuery = null; // lazy IndexReader reader = new TermVectorReusingLeafReader(req.getSearcher().getSlowAtomicReader()); // SOLR-5855 // Highlight each document NamedList fragments = new SimpleOrderedMap(); DocIterator iterator = docs.iterator(); for (int i = 0; i < docs.size(); i++) { int docId = iterator.nextDoc(); Document doc = getDocument(searcher.doc(docId, preFetchFieldNames), req); @SuppressWarnings("rawtypes") NamedList docHighlights = new SimpleOrderedMap(); // Highlight per-field for (String fieldName : fieldNames) { String schemaFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(fieldName, FieldUse.HIGHLIGHT, req); // rewrite field specific parameters ..... SchemaField schemaField = schema.getFieldOrNull(schemaFieldName); rewriteRequestParameters(params, fieldName, schemaFieldName, req); Object fieldHighlights; // object type allows flexibility for // subclassers if (schemaField == null) { fieldHighlights = null; } else if (schemaField.getType() instanceof org.apache.solr.schema.TrieField) { // TODO: highlighting numeric fields is broken (Lucene) - so // we disable them until fixed (see LUCENE-3080)! fieldHighlights = null; } else if (useFastVectorHighlighter(req.getParams(), schemaField)) { if (fvhFieldQuery == null) { fvh = new FastVectorHighlighter( // FVH cannot process hl.usePhraseHighlighter parameter // per-field basis req.getParams().getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), // FVH cannot process hl.requireFieldMatch // parameter per-field basis req.getParams().getBool(HighlightParams.FIELD_MATCH, false)); fvh.setPhraseLimit(req.getParams().getInt(HighlightParams.PHRASE_LIMIT, SolrHighlighter.DEFAULT_PHRASE_LIMIT)); fvhFieldQuery = fvh.getFieldQuery(query, reader); } fieldHighlights = null; FvhContainer fvhContainer = new FvhContainer(fvh, fvhFieldQuery); fieldHighlights = doHighlightingByFastVectorHighlighter(doc, docId, schemaField, fvhContainer, reader, req); } else { // standard/default highlighter fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req); // Fall back to the best FTS field if highlight fails if (fieldHighlights == null) { schemaFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(fieldName, FieldUse.HIGHLIGHT, req, 1); if (schemaField != null) { schemaField = schema.getFieldOrNull(schemaFieldName); rewriteRequestParameters(params, fieldName, schemaFieldName, req); fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req); } } } if (fieldHighlights == null) { // no summaries made; copy text from alternate field fieldHighlights = alternateField(doc, fieldName, req); } if (fieldHighlights != null) { docHighlights.add(fieldName, fieldHighlights); } } // for each field if (doc.get("DBID") != null) { docHighlights.add("DBID", doc.get("DBID")); } fragments.add(schema.printableUniqueKey(doc), docHighlights); } // for each doc return fragments; }
From source file:org.apache.solr.highlight.DefaultSolrHighlighter.java
License:Apache License
/** * Generates a list of Highlighted query fragments for each item in a list * of documents, or returns null if highlighting is disabled. * * @param docs query results// w w w .j a v a 2s. c o m * @param query the query * @param req the current request * @param defaultFields default list of fields to summarize * * @return NamedList containing a NamedList for each document, which in * turns contains sets (field, summary) pairs. */ @Override @SuppressWarnings("unchecked") public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { SolrParams params = req.getParams(); if (!isHighlightingEnabled(params)) return null; SolrIndexSearcher searcher = req.getSearcher(); IndexSchema schema = searcher.getSchema(); NamedList fragments = new SimpleOrderedMap(); String[] fieldNames = getHighlightFields(query, req, defaultFields); Set<String> fset = new HashSet<String>(); { // pre-fetch documents using the Searcher's doc cache for (String f : fieldNames) { fset.add(f); } // fetch unique key if one exists. SchemaField keyField = schema.getUniqueKeyField(); if (null != keyField) fset.add(keyField.getName()); } // get FastVectorHighlighter instance out of the processing loop FastVectorHighlighter fvh = new FastVectorHighlighter( // FVH cannot process hl.usePhraseHighlighter parameter per-field basis params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), // FVH cannot process hl.requireFieldMatch parameter per-field basis params.getBool(HighlightParams.FIELD_MATCH, false)); fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE)); FieldQuery fieldQuery = fvh.getFieldQuery(query, searcher.getIndexReader()); // Highlight each document DocIterator iterator = docs.iterator(); for (int i = 0; i < docs.size(); i++) { int docId = iterator.nextDoc(); Document doc = searcher.doc(docId, fset); NamedList docSummaries = new SimpleOrderedMap(); for (String fieldName : fieldNames) { fieldName = fieldName.trim(); if (useFastVectorHighlighter(params, schema, fieldName)) doHighlightingByFastVectorHighlighter(fvh, fieldQuery, req, docSummaries, docId, doc, fieldName); else doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName); } String printId = schema.printableUniqueKey(doc); fragments.add(printId == null ? null : printId, docSummaries); } return fragments; }
From source file:perf.IndexState.java
License:Apache License
public IndexState(ReferenceManager<IndexSearcher> mgr, TaxonomyReader taxoReader, String textFieldName, DirectSpellChecker spellChecker, String hiliteImpl, FacetsConfig facetsConfig) throws IOException { this.mgr = mgr; this.spellChecker = spellChecker; this.textFieldName = textFieldName; this.taxoReader = taxoReader; this.facetsConfig = facetsConfig; groupEndQuery = new TermQuery(new Term("groupend", "x")); if (hiliteImpl.equals("FastVectorHighlighter")) { fastHighlighter = new FastVectorHighlighter(true, true); useHighlighter = false;//from ww w .ja v a2 s. c o m postingsHighlighter = null; } else if (hiliteImpl.equals("PostingsHighlighter")) { fastHighlighter = null; useHighlighter = false; postingsHighlighter = new PostingsHighlighter(); } else if (hiliteImpl.equals("Highlighter")) { fastHighlighter = null; useHighlighter = true; postingsHighlighter = null; } else { throw new IllegalArgumentException("unrecognized -hiliteImpl \"" + hiliteImpl + "\""); } IndexSearcher searcher = mgr.acquire(); try { hasDeletions = searcher.getIndexReader().hasDeletions(); for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) { pkLookupStates.put(ctx.reader().getCoreCacheKey(), new ThreadLocal<PKLookupState>()); pointsPKLookupStates.put(ctx.reader().getCoreCacheKey(), new ThreadLocal<PointsPKLookupState>()); } } finally { mgr.release(searcher); } }