Example usage for org.apache.lucene.search.vectorhighlight FastVectorHighlighter FastVectorHighlighter

List of usage examples for org.apache.lucene.search.vectorhighlight FastVectorHighlighter FastVectorHighlighter

Introduction

In this page you can find the example usage for org.apache.lucene.search.vectorhighlight FastVectorHighlighter FastVectorHighlighter.

Prototype

public FastVectorHighlighter(boolean phraseHighlight, boolean fieldMatch) 

Source Link

Document

a constructor.

Usage

From source file:com.github.rnewson.couchdb.lucene.DatabaseIndexer.java

License:Apache License

public void search(final HttpServletRequest req, final HttpServletResponse resp)
        throws IOException, JSONException {
    final IndexState state = getState(req, resp);
    if (state == null)
        return;//w w w .  j  av  a 2s  . c  o m
    final IndexSearcher searcher = state.borrowSearcher(isStaleOk(req));
    final String etag = state.getEtag();
    final FastVectorHighlighter fvh = new FastVectorHighlighter(true, true);
    final JSONArray result = new JSONArray();
    try {
        if (state.notModified(req)) {
            resp.setStatus(304);
            return;
        }
        for (final String queryString : getQueryStrings(req)) {
            final Analyzer analyzer = state.analyzer(req.getParameter("analyzer"));
            final Operator operator = "and".equalsIgnoreCase(req.getParameter("default_operator"))
                    ? Operator.AND
                    : Operator.OR;
            final Query q = state.parse(queryString, operator, analyzer);

            final JSONObject queryRow = new JSONObject();
            queryRow.put("q", q.toString());
            if (getBooleanParameter(req, "debug")) {
                queryRow.put("plan", QueryPlan.toPlan(q));
                queryRow.put("analyzer", analyzer.getClass());
            }
            queryRow.put("etag", etag);
            if (getBooleanParameter(req, "rewrite")) {
                final Query rewritten_q = q.rewrite(searcher.getIndexReader());
                queryRow.put("rewritten_q", rewritten_q.toString());

                final JSONObject freqs = new JSONObject();

                final Set<Term> terms = new HashSet<Term>();
                rewritten_q.extractTerms(terms);
                for (final Object term : terms) {
                    final int freq = searcher.getIndexReader().docFreq((Term) term);
                    freqs.put(term.toString(), freq);
                }
                queryRow.put("freqs", freqs);
            } else {
                // Perform the search.
                final TopDocs td;
                final StopWatch stopWatch = new StopWatch();

                final boolean include_docs = getBooleanParameter(req, "include_docs");
                final int highlights = getIntParameter(req, "highlights", 0);
                final int highlight_length = max(getIntParameter(req, "highlight_length", 18), 18); // min for fast term vector highlighter is 18
                final boolean include_termvectors = getBooleanParameter(req, "include_termvectors");
                final int limit = getIntParameter(req, "limit", ini.getInt("lucene.limit", 25));
                final Sort sort = CustomQueryParser.toSort(req.getParameter("sort"));
                final int skip = getIntParameter(req, "skip", 0);

                final Set<String> fieldsToLoad;
                if (req.getParameter("include_fields") == null) {
                    fieldsToLoad = null;
                } else {
                    final String[] fields = Utils.splitOnCommas(req.getParameter("include_fields"));
                    final List<String> list = Arrays.asList(fields);
                    fieldsToLoad = new HashSet<String>(list);
                }

                if (sort == null) {
                    td = searcher.search(q, null, skip + limit);
                } else {
                    td = searcher.search(q, null, skip + limit, sort);
                }
                stopWatch.lap("search");

                // Fetch matches (if any).
                final int max = Math.max(0, Math.min(td.totalHits - skip, limit));
                final JSONArray rows = new JSONArray();
                final String[] fetch_ids = new String[max];
                for (int i = skip; i < skip + max; i++) {
                    final Document doc;
                    if (fieldsToLoad == null) {
                        doc = searcher.doc(td.scoreDocs[i].doc);
                    } else {
                        doc = searcher.doc(td.scoreDocs[i].doc, fieldsToLoad);
                    }

                    final JSONObject row = new JSONObject();
                    final JSONObject fields = new JSONObject();
                    final JSONObject highlight_rows = new JSONObject();

                    // Include stored fields.
                    for (final IndexableField f : doc.getFields()) {
                        if (!f.fieldType().stored()) {
                            continue;
                        }
                        final String name = f.name();
                        final Object value;
                        if (f.numericValue() != null) {
                            value = f.numericValue();
                        } else {
                            value = f.stringValue();
                        }
                        if (value != null) {
                            if ("_id".equals(name)) {
                                row.put("id", value);
                            } else {
                                if (!fields.has(name)) {
                                    fields.put(name, value);
                                } else {
                                    final Object obj = fields.get(name);
                                    if (obj instanceof String || obj instanceof Number) {
                                        final JSONArray arr = new JSONArray();
                                        arr.put(obj);
                                        arr.put(value);
                                        fields.put(name, arr);
                                    } else {
                                        assert obj instanceof JSONArray;
                                        ((JSONArray) obj).put(value);
                                    }
                                }

                                if (highlights > 0) {
                                    String[] frags = fvh.getBestFragments(fvh.getFieldQuery(q),
                                            searcher.getIndexReader(), td.scoreDocs[i].doc, name,
                                            highlight_length, highlights);
                                    highlight_rows.put(name, frags);
                                }
                            }
                        }
                    }

                    if (!Float.isNaN(td.scoreDocs[i].score)) {
                        row.put("score", td.scoreDocs[i].score);
                    } // Include sort order (if any).
                    if (td instanceof TopFieldDocs) {
                        final FieldDoc fd = (FieldDoc) ((TopFieldDocs) td).scoreDocs[i];
                        row.put("sort_order", fd.fields);
                    }
                    // Fetch document (if requested).
                    if (include_docs) {
                        fetch_ids[i - skip] = doc.get("_id");
                    }
                    if (fields.length() > 0) {
                        row.put("fields", fields);
                    }
                    if (highlight_rows.length() > 0) {
                        row.put("highlights", highlight_rows);
                    }

                    rows.put(row);
                }
                // Fetch documents (if requested).
                if (include_docs && fetch_ids.length > 0) {
                    final List<CouchDocument> fetched_docs = database.getDocuments(fetch_ids);
                    for (int j = 0; j < max; j++) {
                        final CouchDocument doc = fetched_docs.get(j);
                        final JSONObject row = doc == null ? new JSONObject("{\"error\":\"not_found\"}")
                                : doc.asJson();
                        rows.getJSONObject(j).put("doc", row);
                    }
                }
                stopWatch.lap("fetch");

                queryRow.put("skip", skip);
                queryRow.put("limit", limit);
                queryRow.put("total_rows", td.totalHits);
                queryRow.put("search_duration", stopWatch.getElapsed("search"));
                queryRow.put("fetch_duration", stopWatch.getElapsed("fetch"));
                // Include sort info (if requested).
                if (td instanceof TopFieldDocs) {
                    queryRow.put("sort_order", CustomQueryParser.toJSON(((TopFieldDocs) td).fields));
                }
                queryRow.put("rows", rows);
            }
            result.put(queryRow);
        }
    } catch (final ParseException e) {
        ServletUtils.sendJsonError(req, resp, 400, "Bad query syntax: " + e.getMessage());
        return;
    } finally {
        state.returnSearcher(searcher);
    }

    resp.setHeader("ETag", etag);
    resp.setHeader("Cache-Control", "must-revalidate");
    ServletUtils.setResponseContentTypeAndEncoding(req, resp);

    final Object json = result.length() > 1 ? result : result.getJSONObject(0);
    final String callback = req.getParameter("callback");
    final String body;
    if (callback != null) {
        body = String.format("%s(%s)", callback, json);
    } else {
        if (json instanceof JSONObject) {
            final JSONObject obj = (JSONObject) json;
            body = getBooleanParameter(req, "debug") ? obj.toString(2) : obj.toString();
        } else {
            final JSONArray arr = (JSONArray) json;
            body = getBooleanParameter(req, "debug") ? arr.toString(2) : arr.toString();
        }
    }

    final Writer writer = resp.getWriter();
    try {
        writer.write(body);
    } finally {
        writer.close();
    }
}

From source file:org.apache.solr.handler.component.AlfrescoSolrHighlighter.java

License:Open Source License

/**
 * Generates a list of Highlighted query fragments for each item in a list
 * of documents, or returns null if highlighting is disabled.
 *
 * @param docs/*from   ww w  .  java 2 s  . c  o  m*/
 *            query results
 * @param query
 *            the query
 * @param req
 *            the current request
 * @param defaultFields
 *            default list of fields to summarize
 *
 * @return NamedList containing a NamedList for each document, which in
 *         turns contains sets (field, summary) pairs.
 */
@Override
@SuppressWarnings("unchecked")
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields)
        throws IOException {
    SolrParams params = req.getParams();
    if (!isHighlightingEnabled(params)) // also returns early if no unique
        // key field
        return null;

    boolean rewrite = query != null
            && !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))
                    && Boolean.valueOf(params.get(HighlightParams.HIGHLIGHT_MULTI_TERM, "true")));

    if (rewrite) {
        query = query.rewrite(req.getSearcher().getIndexReader());
    }

    SolrIndexSearcher searcher = req.getSearcher();
    IndexSchema schema = searcher.getSchema();

    // fetch unique key if one exists.
    SchemaField keyField = schema.getUniqueKeyField();
    if (keyField == null) {
        return null;// exit early; we need a unique key field to populate
                    // the response
    }

    String[] fieldNames = getHighlightFields(query, req, defaultFields);

    Set<String> preFetchFieldNames = getDocPrefetchFieldNames(fieldNames, req);
    if (preFetchFieldNames != null) {
        preFetchFieldNames.add(keyField.getName());
    }

    FastVectorHighlighter fvh = null; // lazy
    FieldQuery fvhFieldQuery = null; // lazy

    IndexReader reader = new TermVectorReusingLeafReader(req.getSearcher().getSlowAtomicReader()); // SOLR-5855

    // Highlight each document
    NamedList fragments = new SimpleOrderedMap();
    DocIterator iterator = docs.iterator();
    for (int i = 0; i < docs.size(); i++) {
        int docId = iterator.nextDoc();
        Document doc = getDocument(searcher.doc(docId, preFetchFieldNames), req);

        @SuppressWarnings("rawtypes")
        NamedList docHighlights = new SimpleOrderedMap();
        // Highlight per-field
        for (String fieldName : fieldNames) {
            String schemaFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(fieldName,
                    FieldUse.HIGHLIGHT, req);

            // rewrite field specific parameters .....
            SchemaField schemaField = schema.getFieldOrNull(schemaFieldName);
            rewriteRequestParameters(params, fieldName, schemaFieldName, req);

            Object fieldHighlights; // object type allows flexibility for
            // subclassers
            if (schemaField == null) {
                fieldHighlights = null;
            } else if (schemaField.getType() instanceof org.apache.solr.schema.TrieField) {
                // TODO: highlighting numeric fields is broken (Lucene) - so
                // we disable them until fixed (see LUCENE-3080)!
                fieldHighlights = null;
            } else if (useFastVectorHighlighter(req.getParams(), schemaField)) {
                if (fvhFieldQuery == null) {
                    fvh = new FastVectorHighlighter(
                            // FVH cannot process hl.usePhraseHighlighter parameter
                            // per-field basis
                            req.getParams().getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true),
                            // FVH cannot process hl.requireFieldMatch
                            // parameter per-field basis
                            req.getParams().getBool(HighlightParams.FIELD_MATCH, false));
                    fvh.setPhraseLimit(req.getParams().getInt(HighlightParams.PHRASE_LIMIT,
                            SolrHighlighter.DEFAULT_PHRASE_LIMIT));
                    fvhFieldQuery = fvh.getFieldQuery(query, reader);
                }
                fieldHighlights = null;

                FvhContainer fvhContainer = new FvhContainer(fvh, fvhFieldQuery);

                fieldHighlights = doHighlightingByFastVectorHighlighter(doc, docId, schemaField, fvhContainer,
                        reader, req);
            } else { // standard/default highlighter
                fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req);
                // Fall back to the best FTS field if highlight fails
                if (fieldHighlights == null) {
                    schemaFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(fieldName,
                            FieldUse.HIGHLIGHT, req, 1);
                    if (schemaField != null) {
                        schemaField = schema.getFieldOrNull(schemaFieldName);
                        rewriteRequestParameters(params, fieldName, schemaFieldName, req);
                        fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader,
                                req);
                    }
                }
            }

            if (fieldHighlights == null) {
                // no summaries made; copy text from alternate field
                fieldHighlights = alternateField(doc, fieldName, req);
            }

            if (fieldHighlights != null) {
                docHighlights.add(fieldName, fieldHighlights);
            }
        } // for each field
        if (doc.get("DBID") != null) {
            docHighlights.add("DBID", doc.get("DBID"));
        }
        fragments.add(schema.printableUniqueKey(doc), docHighlights);
    } // for each doc
    return fragments;
}

From source file:org.apache.solr.highlight.DefaultSolrHighlighter.java

License:Apache License

/**
 * Generates a list of Highlighted query fragments for each item in a list
 * of documents, or returns null if highlighting is disabled.
 *
 * @param docs query results//  w w  w .j a v a 2s.  c o m
 * @param query the query
 * @param req the current request
 * @param defaultFields default list of fields to summarize
 *
 * @return NamedList containing a NamedList for each document, which in 
 * turns contains sets (field, summary) pairs.
 */
@Override
@SuppressWarnings("unchecked")
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields)
        throws IOException {
    SolrParams params = req.getParams();
    if (!isHighlightingEnabled(params))
        return null;

    SolrIndexSearcher searcher = req.getSearcher();
    IndexSchema schema = searcher.getSchema();
    NamedList fragments = new SimpleOrderedMap();
    String[] fieldNames = getHighlightFields(query, req, defaultFields);
    Set<String> fset = new HashSet<String>();

    {
        // pre-fetch documents using the Searcher's doc cache
        for (String f : fieldNames) {
            fset.add(f);
        }
        // fetch unique key if one exists.
        SchemaField keyField = schema.getUniqueKeyField();
        if (null != keyField)
            fset.add(keyField.getName());
    }

    // get FastVectorHighlighter instance out of the processing loop
    FastVectorHighlighter fvh = new FastVectorHighlighter(
            // FVH cannot process hl.usePhraseHighlighter parameter per-field basis
            params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true),
            // FVH cannot process hl.requireFieldMatch parameter per-field basis
            params.getBool(HighlightParams.FIELD_MATCH, false));
    fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE));
    FieldQuery fieldQuery = fvh.getFieldQuery(query, searcher.getIndexReader());

    // Highlight each document
    DocIterator iterator = docs.iterator();
    for (int i = 0; i < docs.size(); i++) {
        int docId = iterator.nextDoc();
        Document doc = searcher.doc(docId, fset);
        NamedList docSummaries = new SimpleOrderedMap();
        for (String fieldName : fieldNames) {
            fieldName = fieldName.trim();
            if (useFastVectorHighlighter(params, schema, fieldName))
                doHighlightingByFastVectorHighlighter(fvh, fieldQuery, req, docSummaries, docId, doc,
                        fieldName);
            else
                doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName);
        }
        String printId = schema.printableUniqueKey(doc);
        fragments.add(printId == null ? null : printId, docSummaries);
    }
    return fragments;
}

From source file:perf.IndexState.java

License:Apache License

public IndexState(ReferenceManager<IndexSearcher> mgr, TaxonomyReader taxoReader, String textFieldName,
        DirectSpellChecker spellChecker, String hiliteImpl, FacetsConfig facetsConfig) throws IOException {
    this.mgr = mgr;
    this.spellChecker = spellChecker;
    this.textFieldName = textFieldName;
    this.taxoReader = taxoReader;
    this.facetsConfig = facetsConfig;

    groupEndQuery = new TermQuery(new Term("groupend", "x"));
    if (hiliteImpl.equals("FastVectorHighlighter")) {
        fastHighlighter = new FastVectorHighlighter(true, true);
        useHighlighter = false;//from  ww w  .ja v  a2 s. c  o m
        postingsHighlighter = null;
    } else if (hiliteImpl.equals("PostingsHighlighter")) {
        fastHighlighter = null;
        useHighlighter = false;
        postingsHighlighter = new PostingsHighlighter();
    } else if (hiliteImpl.equals("Highlighter")) {
        fastHighlighter = null;
        useHighlighter = true;
        postingsHighlighter = null;
    } else {
        throw new IllegalArgumentException("unrecognized -hiliteImpl \"" + hiliteImpl + "\"");
    }
    IndexSearcher searcher = mgr.acquire();
    try {
        hasDeletions = searcher.getIndexReader().hasDeletions();

        for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
            pkLookupStates.put(ctx.reader().getCoreCacheKey(), new ThreadLocal<PKLookupState>());
            pointsPKLookupStates.put(ctx.reader().getCoreCacheKey(), new ThreadLocal<PointsPKLookupState>());
        }
    } finally {
        mgr.release(searcher);
    }
}