List of usage examples for org.apache.solr.common.params HighlightParams USE_PHRASE_HIGHLIGHTER
String USE_PHRASE_HIGHLIGHTER
To view the source code for org.apache.solr.common.params HighlightParams USE_PHRASE_HIGHLIGHTER.
Click Source Link
From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java
License:Apache License
/** * Generates a list of Highlighted query fragments for each item in a list * of documents, or returns null if highlighting is disabled. * * @param docs query results//from ww w . j a v a 2 s . c o m * @param query the query * @param req the current request * @param defaultFields default list of fields to summarize * * @return NamedList containing a NamedList for each document, which in * turns contains sets (field, summary) pairs. */ @Override @SuppressWarnings("unchecked") public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { NamedList fragments = new SimpleOrderedMap(); SolrParams params = req.getParams(); if (!isHighlightingEnabled(params)) return null; SolrIndexSearcher searcher = req.getSearcher(); IndexSchema schema = searcher.getSchema(); String[] fieldNames = getHighlightFields(query, req, defaultFields); Set<String> fset = new HashSet<String>(); { // pre-fetch documents using the Searcher's doc cache Collections.addAll(fset, fieldNames); // fetch unique key if one exists. SchemaField keyField = schema.getUniqueKeyField(); if (null != keyField) fset.add(keyField.getName()); } //CHANGE start // int[] docIds = new int[docs.swordize()]; TreeSet<Integer> docIds = new TreeSet<Integer>(); DocIterator iterator = docs.iterator(); for (int i = 0; i < docs.size(); i++) { docIds.add(iterator.nextDoc()); } // Get Frag list builder String fragListBuilderString = params.get(HighlightParams.FRAG_LIST_BUILDER).toLowerCase(); FragListBuilder fragListBuilder; if (fragListBuilderString.equals("single")) { fragListBuilder = new SingleFragListBuilder(); } else { fragListBuilder = new com.o19s.solr.swan.highlight.SimpleFragListBuilder(); } // get FastVectorHighlighter instance out of the processing loop SpanAwareFastVectorHighlighter safvh = new SpanAwareFastVectorHighlighter( // FVH cannot process hl.usePhraseHighlighter parameter per-field basis params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), // FVH cannot process hl.requireFieldMatch parameter per-field basis params.getBool(HighlightParams.FIELD_MATCH, false), fragListBuilder, //new com.o19s.solr.swan.highlight.ScoreOrderFragmentsBuilder(), new WordHashFragmentsBuilder(), // List of docIds to filter spans docIds); safvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE)); SpanAwareFieldQuery fieldQuery = safvh.getFieldQuery(query, searcher.getIndexReader(), docIds); // Highlight each document for (int docId : docIds) { Document doc = searcher.doc(docId, fset); NamedList docSummaries = new SimpleOrderedMap(); for (String fieldName : fieldNames) { fieldName = fieldName.trim(); if (useFastVectorHighlighter(params, schema, fieldName)) doHighlightingByFastVectorHighlighter(safvh, fieldQuery, req, docSummaries, docId, doc, fieldName); else doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName); } String printId = schema.printableUniqueKey(doc); fragments.add(printId == null ? null : printId, docSummaries); } //CHANGE end return fragments; }
From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java
License:Apache License
private void doHighlightingByHighlighter(Query query, SolrQueryRequest req, NamedList docSummaries, int docId, Document doc, String fieldName) throws IOException { final SolrIndexSearcher searcher = req.getSearcher(); final IndexSchema schema = searcher.getSchema(); // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) - // so we disable them until fixed (see LUCENE-3080)! // BEGIN: Hack final SchemaField schemaField = schema.getFieldOrNull(fieldName); if (schemaField != null && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField) || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField))) return;// www . ja va2s . c o m // END: Hack SolrParams params = req.getParams(); IndexableField[] docFields = doc.getFields(fieldName); List<String> listFields = new ArrayList<String>(); for (IndexableField field : docFields) { listFields.add(field.stringValue()); } String[] docTexts = listFields.toArray(new String[listFields.size()]); // according to Document javadoc, doc.getValues() never returns null. check empty instead of null if (docTexts.length == 0) return; TokenStream tokenStream; int numFragments = getMaxSnippets(fieldName, params); boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params); List<TextFragment> frags = new ArrayList<TextFragment>(); TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization try { // TokenStream tvStream = TokenSources.getTokenStream(searcher.getIndexReader(), docId, fieldName); // if (tvStream != null) { // tots = new TermOffsetsTokenStream(tvStream); // } } catch (IllegalArgumentException e) { // No problem. But we can't use TermOffsets optimization. } for (int j = 0; j < docTexts.length; j++) { if (tots != null) { // if we're using TermOffsets optimization, then get the next // field value's TokenStream (i.e. get field j's TokenStream) from tots: tokenStream = tots.getMultiValuedTokenStream(docTexts[j].length()); } else { // fall back to analyzer tokenStream = createAnalyzerTStream(schema, fieldName, docTexts[j]); } int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS, Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE); Highlighter highlighter; if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) { if (maxCharsToAnalyze < 0) { tokenStream = new CachingTokenFilter(tokenStream); } else { tokenStream = new CachingTokenFilter( new OffsetLimitTokenFilter(tokenStream, maxCharsToAnalyze)); } // get highlighter highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tokenStream); // after highlighter initialization, reset tstream since construction of highlighter already used it tokenStream.reset(); } else { // use "the old way" highlighter = getHighlighter(query, fieldName, req); } if (maxCharsToAnalyze < 0) { highlighter.setMaxDocCharsToAnalyze(docTexts[j].length()); } else { highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze); } try { TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tokenStream, docTexts[j], mergeContiguousFragments, numFragments); for (int k = 0; k < bestTextFragments.length; k++) { if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) { frags.add(bestTextFragments[k]); } } } catch (InvalidTokenOffsetsException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } } // sort such that the fragments with the highest score come first Collections.sort(frags, new Comparator<TextFragment>() { public int compare(TextFragment arg0, TextFragment arg1) { return Math.round(arg1.getScore() - arg0.getScore()); } }); // convert fragments back into text // TODO: we can include score and position information in output as snippet attributes String[] summaries = null; if (frags.size() > 0) { ArrayList<String> fragTexts = new ArrayList<String>(); for (TextFragment fragment : frags) { if ((fragment != null) && (fragment.getScore() > 0)) { fragTexts.add(fragment.toString()); } if (fragTexts.size() >= numFragments) break; } summaries = (String[]) fragTexts.toArray(); if (summaries.length > 0) docSummaries.add(fieldName, summaries); } // no summeries made, copy text from alternate field if (summaries == null || summaries.length == 0) { alternateField(docSummaries, params, doc, fieldName); } }
From source file:org.alfresco.repo.search.impl.solr.SolrQueryHTTPClient.java
License:Open Source License
protected void buildHightlightParameters(SearchParameters searchParameters, URLCodec encoder, StringBuilder url) throws UnsupportedEncodingException { if (searchParameters.getHighlight() != null) { url.append("&").append(HighlightParams.HIGHLIGHT + "=true"); url.append("&" + HighlightParams.HIGHLIGHT + ".q=") .append(encoder.encode(searchParameters.getSearchTerm(), "UTF-8")); if (searchParameters.getHighlight().getSnippetCount() != null) { url.append("&").append(HighlightParams.SNIPPETS + "=") .append(searchParameters.getHighlight().getSnippetCount()); }//from ww w . ja v a 2 s. c o m if (searchParameters.getHighlight().getFragmentSize() != null) { url.append("&").append(HighlightParams.FRAGSIZE + "=") .append(searchParameters.getHighlight().getFragmentSize()); } if (searchParameters.getHighlight().getMaxAnalyzedChars() != null) { url.append("&").append(HighlightParams.MAX_CHARS + "=") .append(searchParameters.getHighlight().getMaxAnalyzedChars()); } if (searchParameters.getHighlight().getMergeContiguous() != null) { url.append("&").append(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS + "=") .append(searchParameters.getHighlight().getMergeContiguous()); } if (searchParameters.getHighlight().getUsePhraseHighlighter() != null) { url.append("&").append(HighlightParams.USE_PHRASE_HIGHLIGHTER + "=") .append(searchParameters.getHighlight().getUsePhraseHighlighter()); } if (searchParameters.getHighlight().getPrefix() != null) { url.append("&").append(HighlightParams.SIMPLE_PRE + "=") .append(encoder.encode(searchParameters.getHighlight().getPrefix(), "UTF-8")); } if (searchParameters.getHighlight().getPostfix() != null) { url.append("&").append(HighlightParams.SIMPLE_POST + "=") .append(encoder.encode(searchParameters.getHighlight().getPostfix(), "UTF-8")); } if (searchParameters.getHighlight().getFields() != null && !searchParameters.getHighlight().getFields().isEmpty()) { List<String> fieldNames = new ArrayList<>(searchParameters.getHighlight().getFields().size()); for (FieldHighlightParameters aField : searchParameters.getHighlight().getFields()) { ParameterCheck.mandatoryString("highlight field", aField.getField()); fieldNames.add(aField.getField()); if (aField.getSnippetCount() != null) { url.append("&f.").append(encoder.encode(aField.getField(), "UTF-8")) .append("." + HighlightParams.SNIPPETS + "=").append(aField.getSnippetCount()); } if (aField.getFragmentSize() != null) { url.append("&f.").append(encoder.encode(aField.getField(), "UTF-8")) .append("." + HighlightParams.FRAGSIZE + "=").append(aField.getFragmentSize()); } if (aField.getFragmentSize() != null) { url.append("&f.").append(encoder.encode(aField.getField(), "UTF-8")) .append("." + HighlightParams.FRAGSIZE + "=").append(aField.getFragmentSize()); } if (aField.getMergeContiguous() != null) { url.append("&f.").append(encoder.encode(aField.getField(), "UTF-8")) .append("." + HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS + "=") .append(aField.getMergeContiguous()); } if (aField.getPrefix() != null) { url.append("&f.").append(encoder.encode(aField.getField(), "UTF-8")) .append("." + HighlightParams.SIMPLE_PRE + "=") .append(encoder.encode(aField.getPrefix(), "UTF-8")); } if (aField.getPostfix() != null) { url.append("&f.").append(encoder.encode(aField.getField(), "UTF-8")) .append("." + HighlightParams.SIMPLE_POST + "=") .append(encoder.encode(aField.getPostfix(), "UTF-8")); } } url.append("&").append(HighlightParams.FIELDS + "=") .append(encoder.encode(String.join(",", fieldNames), "UTF-8")); } } }
From source file:org.alfresco.solr.highlight.AlfrescoHighlighterTest.java
License:Open Source License
@Test public void highlightingPhraseQueriesTest() { logger.info("######### Testing PHRASE QUERIES ###########"); //Phrase hightling is on by default SolrServletRequest req = areq(/*from w ww.java 2 s.com*/ params("q", "name:long", "qt", "/afts", "start", "0", "rows", "5", HighlightParams.HIGHLIGHT, "true", HighlightParams.Q, "\"some long\"", HighlightParams.FIELDS, "name", HighlightParams.SIMPLE_PRE, "(", HighlightParams.SIMPLE_POST, ")", HighlightParams.SNIPPETS, String.valueOf(1), HighlightParams.FRAGSIZE, String.valueOf(100)), "{\"locales\":[\"en\"], \"tenants\": [ \"\" ]}"); assertQ(req, "//lst[@name='highlighting']/lst/arr/str[.='this is (some) (long) text. It has the word long in many places. In fact, it has long on some']"); req = areq( params("q", "name:long", "qt", "/afts", "start", "0", "rows", "5", HighlightParams.HIGHLIGHT, "true", HighlightParams.Q, "\"some long\"", HighlightParams.FIELDS, "name", HighlightParams.USE_PHRASE_HIGHLIGHTER, "false", HighlightParams.SIMPLE_PRE, "(", HighlightParams.SIMPLE_POST, ")", HighlightParams.SNIPPETS, String.valueOf(1), HighlightParams.FRAGSIZE, String.valueOf(100)), "{\"locales\":[\"en\"], \"tenants\": [ \"\" ]}"); assertQ(req, "//lst[@name='highlighting']/lst/arr/str[.='(some) very (long) name']", "//lst[@name='highlighting']/lst/arr/str[.='this is (some) (long) text. It has the word (long) in many places. In fact, it has (long) on (some)']"); }
From source file:org.dspace.discovery.SolrServiceImpl.java
License:BSD License
protected SolrQuery resolveToSolrQuery(Context context, DiscoverQuery discoveryQuery, boolean includeUnDiscoverable) { SolrQuery solrQuery = new SolrQuery(); String query = "*:*"; if (discoveryQuery.getQuery() != null) { query = discoveryQuery.getQuery(); }// w w w .ja v a 2 s. c om solrQuery.setQuery(query); if (discoveryQuery.isSpellCheck()) { solrQuery.setParam(SpellingParams.SPELLCHECK_Q, query); solrQuery.setParam(SpellingParams.SPELLCHECK_COLLATE, Boolean.TRUE); solrQuery.setParam("spellcheck", Boolean.TRUE); } if (!includeUnDiscoverable) { solrQuery.addFilterQuery("NOT(withdrawn:true)"); solrQuery.addFilterQuery("NOT(discoverable:false)"); } for (int i = 0; i < discoveryQuery.getFilterQueries().size(); i++) { String filterQuery = discoveryQuery.getFilterQueries().get(i); solrQuery.addFilterQuery(filterQuery); } if (discoveryQuery.getDSpaceObjectFilter() != -1) { solrQuery.addFilterQuery("search.resourcetype:" + discoveryQuery.getDSpaceObjectFilter()); } for (int i = 0; i < discoveryQuery.getFieldPresentQueries().size(); i++) { String filterQuery = discoveryQuery.getFieldPresentQueries().get(i); solrQuery.addFilterQuery(filterQuery + ":[* TO *]"); } if (discoveryQuery.getStart() != -1) { solrQuery.setStart(discoveryQuery.getStart()); } if (discoveryQuery.getMaxResults() != -1) { solrQuery.setRows(discoveryQuery.getMaxResults()); } if (discoveryQuery.getSortField() != null) { SolrQuery.ORDER order = SolrQuery.ORDER.asc; if (discoveryQuery.getSortOrder().equals(DiscoverQuery.SORT_ORDER.desc)) order = SolrQuery.ORDER.desc; solrQuery.addSortField(discoveryQuery.getSortField(), order); } for (String property : discoveryQuery.getProperties().keySet()) { List<String> values = discoveryQuery.getProperties().get(property); solrQuery.add(property, values.toArray(new String[values.size()])); } List<DiscoverFacetField> facetFields = discoveryQuery.getFacetFields(); if (0 < facetFields.size()) { //Only add facet information if there are any facets for (DiscoverFacetField facetFieldConfig : facetFields) { String field = transformFacetField(facetFieldConfig, facetFieldConfig.getField(), false); solrQuery.addFacetField(field); // Setting the facet limit in this fashion ensures that each facet can have its own max solrQuery.add("f." + field + "." + FacetParams.FACET_LIMIT, String.valueOf(facetFieldConfig.getLimit())); String facetSort; if (DiscoveryConfigurationParameters.SORT.COUNT.equals(facetFieldConfig.getSortOrder())) { facetSort = FacetParams.FACET_SORT_COUNT; } else { facetSort = FacetParams.FACET_SORT_INDEX; } solrQuery.add("f." + field + "." + FacetParams.FACET_SORT, facetSort); if (facetFieldConfig.getOffset() != -1) { solrQuery.setParam("f." + field + "." + FacetParams.FACET_OFFSET, String.valueOf(facetFieldConfig.getOffset())); } if (facetFieldConfig.getPrefix() != null) { solrQuery.setFacetPrefix(field, facetFieldConfig.getPrefix()); } } List<String> facetQueries = discoveryQuery.getFacetQueries(); for (String facetQuery : facetQueries) { solrQuery.addFacetQuery(facetQuery); } if (discoveryQuery.getFacetMinCount() != -1) { solrQuery.setFacetMinCount(discoveryQuery.getFacetMinCount()); } solrQuery.setParam(FacetParams.FACET_OFFSET, String.valueOf(discoveryQuery.getFacetOffset())); } if (0 < discoveryQuery.getHitHighlightingFields().size()) { solrQuery.setHighlight(true); solrQuery.add(HighlightParams.USE_PHRASE_HIGHLIGHTER, Boolean.TRUE.toString()); for (DiscoverHitHighlightingField highlightingField : discoveryQuery.getHitHighlightingFields()) { solrQuery.addHighlightField(highlightingField.getField() + "_hl"); solrQuery.add("f." + highlightingField.getField() + "_hl." + HighlightParams.FRAGSIZE, String.valueOf(highlightingField.getMaxChars())); solrQuery.add("f." + highlightingField.getField() + "_hl." + HighlightParams.SNIPPETS, String.valueOf(highlightingField.getMaxSnippets())); } } //Add any configured search plugins ! List<SolrServiceSearchPlugin> solrServiceSearchPlugins = new DSpace().getServiceManager() .getServicesByType(SolrServiceSearchPlugin.class); for (SolrServiceSearchPlugin searchPlugin : solrServiceSearchPlugins) { searchPlugin.additionalSearchParameters(context, discoveryQuery, solrQuery); } return solrQuery; }