Example usage for org.apache.solr.common.params HighlightParams USE_PHRASE_HIGHLIGHTER

List of usage examples for org.apache.solr.common.params HighlightParams USE_PHRASE_HIGHLIGHTER

Introduction

In this page you can find the example usage for org.apache.solr.common.params HighlightParams USE_PHRASE_HIGHLIGHTER.

Prototype

String USE_PHRASE_HIGHLIGHTER

To view the source code for org.apache.solr.common.params HighlightParams USE_PHRASE_HIGHLIGHTER.

Click Source Link

Usage

From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java

License:Apache License

/**
 * Generates a list of Highlighted query fragments for each item in a list
 * of documents, or returns null if highlighting is disabled.
 *
 * @param docs query results//from ww w  . j a v  a 2 s . c  o  m
 * @param query the query
 * @param req the current request
 * @param defaultFields default list of fields to summarize
 *
 * @return NamedList containing a NamedList for each document, which in
 * turns contains sets (field, summary) pairs.
 */
@Override
@SuppressWarnings("unchecked")
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields)
        throws IOException {

    NamedList fragments = new SimpleOrderedMap();

    SolrParams params = req.getParams();
    if (!isHighlightingEnabled(params))
        return null;

    SolrIndexSearcher searcher = req.getSearcher();
    IndexSchema schema = searcher.getSchema();
    String[] fieldNames = getHighlightFields(query, req, defaultFields);
    Set<String> fset = new HashSet<String>();

    {
        // pre-fetch documents using the Searcher's doc cache
        Collections.addAll(fset, fieldNames);
        // fetch unique key if one exists.
        SchemaField keyField = schema.getUniqueKeyField();
        if (null != keyField)
            fset.add(keyField.getName());
    }

    //CHANGE start
    //       int[] docIds = new int[docs.swordize()];
    TreeSet<Integer> docIds = new TreeSet<Integer>();
    DocIterator iterator = docs.iterator();
    for (int i = 0; i < docs.size(); i++) {
        docIds.add(iterator.nextDoc());
    }
    // Get Frag list builder
    String fragListBuilderString = params.get(HighlightParams.FRAG_LIST_BUILDER).toLowerCase();
    FragListBuilder fragListBuilder;
    if (fragListBuilderString.equals("single")) {
        fragListBuilder = new SingleFragListBuilder();
    } else {
        fragListBuilder = new com.o19s.solr.swan.highlight.SimpleFragListBuilder();
    }

    // get FastVectorHighlighter instance out of the processing loop
    SpanAwareFastVectorHighlighter safvh = new SpanAwareFastVectorHighlighter(
            // FVH cannot process hl.usePhraseHighlighter parameter per-field basis
            params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true),
            // FVH cannot process hl.requireFieldMatch parameter per-field basis
            params.getBool(HighlightParams.FIELD_MATCH, false), fragListBuilder,
            //new com.o19s.solr.swan.highlight.ScoreOrderFragmentsBuilder(),
            new WordHashFragmentsBuilder(),
            // List of docIds to filter spans
            docIds);
    safvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE));
    SpanAwareFieldQuery fieldQuery = safvh.getFieldQuery(query, searcher.getIndexReader(), docIds);

    // Highlight each document
    for (int docId : docIds) {
        Document doc = searcher.doc(docId, fset);
        NamedList docSummaries = new SimpleOrderedMap();
        for (String fieldName : fieldNames) {
            fieldName = fieldName.trim();
            if (useFastVectorHighlighter(params, schema, fieldName))
                doHighlightingByFastVectorHighlighter(safvh, fieldQuery, req, docSummaries, docId, doc,
                        fieldName);
            else
                doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName);
        }
        String printId = schema.printableUniqueKey(doc);
        fragments.add(printId == null ? null : printId, docSummaries);
    }
    //CHANGE end
    return fragments;
}

From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java

License:Apache License

private void doHighlightingByHighlighter(Query query, SolrQueryRequest req, NamedList docSummaries, int docId,
        Document doc, String fieldName) throws IOException {
    final SolrIndexSearcher searcher = req.getSearcher();
    final IndexSchema schema = searcher.getSchema();

    // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) -
    // so we disable them until fixed (see LUCENE-3080)!
    // BEGIN: Hack
    final SchemaField schemaField = schema.getFieldOrNull(fieldName);
    if (schemaField != null && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField)
            || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField)))
        return;// www .  ja va2s . c  o m
    // END: Hack

    SolrParams params = req.getParams();
    IndexableField[] docFields = doc.getFields(fieldName);
    List<String> listFields = new ArrayList<String>();
    for (IndexableField field : docFields) {
        listFields.add(field.stringValue());
    }

    String[] docTexts = listFields.toArray(new String[listFields.size()]);

    // according to Document javadoc, doc.getValues() never returns null. check empty instead of null
    if (docTexts.length == 0)
        return;

    TokenStream tokenStream;
    int numFragments = getMaxSnippets(fieldName, params);
    boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);

    List<TextFragment> frags = new ArrayList<TextFragment>();

    TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
    try {
        //      TokenStream tvStream = TokenSources.getTokenStream(searcher.getIndexReader(), docId, fieldName);
        //      if (tvStream != null) {
        //        tots = new TermOffsetsTokenStream(tvStream);
        //      }
    } catch (IllegalArgumentException e) {
        // No problem. But we can't use TermOffsets optimization.
    }

    for (int j = 0; j < docTexts.length; j++) {
        if (tots != null) {
            // if we're using TermOffsets optimization, then get the next
            // field value's TokenStream (i.e. get field j's TokenStream) from tots:
            tokenStream = tots.getMultiValuedTokenStream(docTexts[j].length());
        } else {
            // fall back to analyzer
            tokenStream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
        }

        int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS,
                Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);

        Highlighter highlighter;
        if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
            if (maxCharsToAnalyze < 0) {
                tokenStream = new CachingTokenFilter(tokenStream);
            } else {
                tokenStream = new CachingTokenFilter(
                        new OffsetLimitTokenFilter(tokenStream, maxCharsToAnalyze));
            }

            // get highlighter
            highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tokenStream);

            // after highlighter initialization, reset tstream since construction of highlighter already used it
            tokenStream.reset();
        } else {
            // use "the old way"
            highlighter = getHighlighter(query, fieldName, req);
        }

        if (maxCharsToAnalyze < 0) {
            highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
        } else {
            highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
        }

        try {
            TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tokenStream, docTexts[j],
                    mergeContiguousFragments, numFragments);
            for (int k = 0; k < bestTextFragments.length; k++) {
                if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
                    frags.add(bestTextFragments[k]);
                }
            }
        } catch (InvalidTokenOffsetsException e) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
        }
    }
    // sort such that the fragments with the highest score come first
    Collections.sort(frags, new Comparator<TextFragment>() {
        public int compare(TextFragment arg0, TextFragment arg1) {
            return Math.round(arg1.getScore() - arg0.getScore());
        }
    });

    // convert fragments back into text
    // TODO: we can include score and position information in output as snippet attributes
    String[] summaries = null;
    if (frags.size() > 0) {
        ArrayList<String> fragTexts = new ArrayList<String>();
        for (TextFragment fragment : frags) {
            if ((fragment != null) && (fragment.getScore() > 0)) {
                fragTexts.add(fragment.toString());
            }
            if (fragTexts.size() >= numFragments)
                break;
        }
        summaries = (String[]) fragTexts.toArray();
        if (summaries.length > 0)
            docSummaries.add(fieldName, summaries);
    }
    // no summeries made, copy text from alternate field
    if (summaries == null || summaries.length == 0) {
        alternateField(docSummaries, params, doc, fieldName);
    }
}

From source file:org.alfresco.repo.search.impl.solr.SolrQueryHTTPClient.java

License:Open Source License

protected void buildHightlightParameters(SearchParameters searchParameters, URLCodec encoder, StringBuilder url)
        throws UnsupportedEncodingException {
    if (searchParameters.getHighlight() != null) {
        url.append("&").append(HighlightParams.HIGHLIGHT + "=true");
        url.append("&" + HighlightParams.HIGHLIGHT + ".q=")
                .append(encoder.encode(searchParameters.getSearchTerm(), "UTF-8"));

        if (searchParameters.getHighlight().getSnippetCount() != null) {
            url.append("&").append(HighlightParams.SNIPPETS + "=")
                    .append(searchParameters.getHighlight().getSnippetCount());
        }//from  ww  w  . ja v a  2 s. c  o m
        if (searchParameters.getHighlight().getFragmentSize() != null) {
            url.append("&").append(HighlightParams.FRAGSIZE + "=")
                    .append(searchParameters.getHighlight().getFragmentSize());
        }
        if (searchParameters.getHighlight().getMaxAnalyzedChars() != null) {
            url.append("&").append(HighlightParams.MAX_CHARS + "=")
                    .append(searchParameters.getHighlight().getMaxAnalyzedChars());
        }
        if (searchParameters.getHighlight().getMergeContiguous() != null) {
            url.append("&").append(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS + "=")
                    .append(searchParameters.getHighlight().getMergeContiguous());
        }
        if (searchParameters.getHighlight().getUsePhraseHighlighter() != null) {
            url.append("&").append(HighlightParams.USE_PHRASE_HIGHLIGHTER + "=")
                    .append(searchParameters.getHighlight().getUsePhraseHighlighter());
        }
        if (searchParameters.getHighlight().getPrefix() != null) {
            url.append("&").append(HighlightParams.SIMPLE_PRE + "=")
                    .append(encoder.encode(searchParameters.getHighlight().getPrefix(), "UTF-8"));
        }
        if (searchParameters.getHighlight().getPostfix() != null) {
            url.append("&").append(HighlightParams.SIMPLE_POST + "=")
                    .append(encoder.encode(searchParameters.getHighlight().getPostfix(), "UTF-8"));
        }
        if (searchParameters.getHighlight().getFields() != null
                && !searchParameters.getHighlight().getFields().isEmpty()) {
            List<String> fieldNames = new ArrayList<>(searchParameters.getHighlight().getFields().size());
            for (FieldHighlightParameters aField : searchParameters.getHighlight().getFields()) {
                ParameterCheck.mandatoryString("highlight field", aField.getField());
                fieldNames.add(aField.getField());

                if (aField.getSnippetCount() != null) {
                    url.append("&f.").append(encoder.encode(aField.getField(), "UTF-8"))
                            .append("." + HighlightParams.SNIPPETS + "=").append(aField.getSnippetCount());
                }

                if (aField.getFragmentSize() != null) {
                    url.append("&f.").append(encoder.encode(aField.getField(), "UTF-8"))
                            .append("." + HighlightParams.FRAGSIZE + "=").append(aField.getFragmentSize());
                }

                if (aField.getFragmentSize() != null) {
                    url.append("&f.").append(encoder.encode(aField.getField(), "UTF-8"))
                            .append("." + HighlightParams.FRAGSIZE + "=").append(aField.getFragmentSize());
                }

                if (aField.getMergeContiguous() != null) {
                    url.append("&f.").append(encoder.encode(aField.getField(), "UTF-8"))
                            .append("." + HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS + "=")
                            .append(aField.getMergeContiguous());
                }

                if (aField.getPrefix() != null) {
                    url.append("&f.").append(encoder.encode(aField.getField(), "UTF-8"))
                            .append("." + HighlightParams.SIMPLE_PRE + "=")
                            .append(encoder.encode(aField.getPrefix(), "UTF-8"));
                }

                if (aField.getPostfix() != null) {
                    url.append("&f.").append(encoder.encode(aField.getField(), "UTF-8"))
                            .append("." + HighlightParams.SIMPLE_POST + "=")
                            .append(encoder.encode(aField.getPostfix(), "UTF-8"));
                }
            }
            url.append("&").append(HighlightParams.FIELDS + "=")
                    .append(encoder.encode(String.join(",", fieldNames), "UTF-8"));
        }
    }
}

From source file:org.alfresco.solr.highlight.AlfrescoHighlighterTest.java

License:Open Source License

@Test
public void highlightingPhraseQueriesTest() {
    logger.info("######### Testing PHRASE QUERIES ###########");

    //Phrase hightling is on by default
    SolrServletRequest req = areq(/*from   w ww.java  2  s.com*/
            params("q", "name:long", "qt", "/afts", "start", "0", "rows", "5", HighlightParams.HIGHLIGHT,
                    "true", HighlightParams.Q, "\"some long\"", HighlightParams.FIELDS, "name",
                    HighlightParams.SIMPLE_PRE, "(", HighlightParams.SIMPLE_POST, ")", HighlightParams.SNIPPETS,
                    String.valueOf(1), HighlightParams.FRAGSIZE, String.valueOf(100)),
            "{\"locales\":[\"en\"], \"tenants\": [ \"\" ]}");

    assertQ(req,
            "//lst[@name='highlighting']/lst/arr/str[.='this is (some) (long) text.  It has the word long in many places.  In fact, it has long on some']");

    req = areq(
            params("q", "name:long", "qt", "/afts", "start", "0", "rows", "5", HighlightParams.HIGHLIGHT,
                    "true", HighlightParams.Q, "\"some long\"", HighlightParams.FIELDS, "name",
                    HighlightParams.USE_PHRASE_HIGHLIGHTER, "false", HighlightParams.SIMPLE_PRE, "(",
                    HighlightParams.SIMPLE_POST, ")", HighlightParams.SNIPPETS, String.valueOf(1),
                    HighlightParams.FRAGSIZE, String.valueOf(100)),
            "{\"locales\":[\"en\"], \"tenants\": [ \"\" ]}");

    assertQ(req, "//lst[@name='highlighting']/lst/arr/str[.='(some) very (long) name']",
            "//lst[@name='highlighting']/lst/arr/str[.='this is (some) (long) text.  It has the word (long) in many places.  In fact, it has (long) on (some)']");
}

From source file:org.dspace.discovery.SolrServiceImpl.java

License:BSD License

protected SolrQuery resolveToSolrQuery(Context context, DiscoverQuery discoveryQuery,
        boolean includeUnDiscoverable) {
    SolrQuery solrQuery = new SolrQuery();

    String query = "*:*";
    if (discoveryQuery.getQuery() != null) {
        query = discoveryQuery.getQuery();
    }// w  w w .ja  v  a 2 s.  c om

    solrQuery.setQuery(query);
    if (discoveryQuery.isSpellCheck()) {
        solrQuery.setParam(SpellingParams.SPELLCHECK_Q, query);
        solrQuery.setParam(SpellingParams.SPELLCHECK_COLLATE, Boolean.TRUE);
        solrQuery.setParam("spellcheck", Boolean.TRUE);
    }

    if (!includeUnDiscoverable) {
        solrQuery.addFilterQuery("NOT(withdrawn:true)");
        solrQuery.addFilterQuery("NOT(discoverable:false)");
    }

    for (int i = 0; i < discoveryQuery.getFilterQueries().size(); i++) {
        String filterQuery = discoveryQuery.getFilterQueries().get(i);
        solrQuery.addFilterQuery(filterQuery);
    }
    if (discoveryQuery.getDSpaceObjectFilter() != -1) {
        solrQuery.addFilterQuery("search.resourcetype:" + discoveryQuery.getDSpaceObjectFilter());
    }

    for (int i = 0; i < discoveryQuery.getFieldPresentQueries().size(); i++) {
        String filterQuery = discoveryQuery.getFieldPresentQueries().get(i);
        solrQuery.addFilterQuery(filterQuery + ":[* TO *]");
    }

    if (discoveryQuery.getStart() != -1) {
        solrQuery.setStart(discoveryQuery.getStart());
    }

    if (discoveryQuery.getMaxResults() != -1) {
        solrQuery.setRows(discoveryQuery.getMaxResults());
    }

    if (discoveryQuery.getSortField() != null) {
        SolrQuery.ORDER order = SolrQuery.ORDER.asc;
        if (discoveryQuery.getSortOrder().equals(DiscoverQuery.SORT_ORDER.desc))
            order = SolrQuery.ORDER.desc;

        solrQuery.addSortField(discoveryQuery.getSortField(), order);
    }

    for (String property : discoveryQuery.getProperties().keySet()) {
        List<String> values = discoveryQuery.getProperties().get(property);
        solrQuery.add(property, values.toArray(new String[values.size()]));
    }

    List<DiscoverFacetField> facetFields = discoveryQuery.getFacetFields();
    if (0 < facetFields.size()) {
        //Only add facet information if there are any facets
        for (DiscoverFacetField facetFieldConfig : facetFields) {
            String field = transformFacetField(facetFieldConfig, facetFieldConfig.getField(), false);
            solrQuery.addFacetField(field);

            // Setting the facet limit in this fashion ensures that each facet can have its own max
            solrQuery.add("f." + field + "." + FacetParams.FACET_LIMIT,
                    String.valueOf(facetFieldConfig.getLimit()));
            String facetSort;
            if (DiscoveryConfigurationParameters.SORT.COUNT.equals(facetFieldConfig.getSortOrder())) {
                facetSort = FacetParams.FACET_SORT_COUNT;
            } else {
                facetSort = FacetParams.FACET_SORT_INDEX;
            }
            solrQuery.add("f." + field + "." + FacetParams.FACET_SORT, facetSort);
            if (facetFieldConfig.getOffset() != -1) {
                solrQuery.setParam("f." + field + "." + FacetParams.FACET_OFFSET,
                        String.valueOf(facetFieldConfig.getOffset()));
            }
            if (facetFieldConfig.getPrefix() != null) {
                solrQuery.setFacetPrefix(field, facetFieldConfig.getPrefix());
            }
        }

        List<String> facetQueries = discoveryQuery.getFacetQueries();
        for (String facetQuery : facetQueries) {
            solrQuery.addFacetQuery(facetQuery);
        }

        if (discoveryQuery.getFacetMinCount() != -1) {
            solrQuery.setFacetMinCount(discoveryQuery.getFacetMinCount());
        }

        solrQuery.setParam(FacetParams.FACET_OFFSET, String.valueOf(discoveryQuery.getFacetOffset()));
    }

    if (0 < discoveryQuery.getHitHighlightingFields().size()) {
        solrQuery.setHighlight(true);
        solrQuery.add(HighlightParams.USE_PHRASE_HIGHLIGHTER, Boolean.TRUE.toString());
        for (DiscoverHitHighlightingField highlightingField : discoveryQuery.getHitHighlightingFields()) {
            solrQuery.addHighlightField(highlightingField.getField() + "_hl");
            solrQuery.add("f." + highlightingField.getField() + "_hl." + HighlightParams.FRAGSIZE,
                    String.valueOf(highlightingField.getMaxChars()));
            solrQuery.add("f." + highlightingField.getField() + "_hl." + HighlightParams.SNIPPETS,
                    String.valueOf(highlightingField.getMaxSnippets()));
        }

    }

    //Add any configured search plugins !
    List<SolrServiceSearchPlugin> solrServiceSearchPlugins = new DSpace().getServiceManager()
            .getServicesByType(SolrServiceSearchPlugin.class);
    for (SolrServiceSearchPlugin searchPlugin : solrServiceSearchPlugins) {
        searchPlugin.additionalSearchParameters(context, discoveryQuery, solrQuery);
    }
    return solrQuery;
}