List of usage examples for org.apache.lucene.search.highlight TextFragment toString
@Override
public String toString()
From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java
License:Apache License
private void doHighlightingByHighlighter(Query query, SolrQueryRequest req, NamedList docSummaries, int docId, Document doc, String fieldName) throws IOException { final SolrIndexSearcher searcher = req.getSearcher(); final IndexSchema schema = searcher.getSchema(); // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) - // so we disable them until fixed (see LUCENE-3080)! // BEGIN: Hack final SchemaField schemaField = schema.getFieldOrNull(fieldName); if (schemaField != null && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField) || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField))) return;// w ww . jav a2s . c om // END: Hack SolrParams params = req.getParams(); IndexableField[] docFields = doc.getFields(fieldName); List<String> listFields = new ArrayList<String>(); for (IndexableField field : docFields) { listFields.add(field.stringValue()); } String[] docTexts = listFields.toArray(new String[listFields.size()]); // according to Document javadoc, doc.getValues() never returns null. check empty instead of null if (docTexts.length == 0) return; TokenStream tokenStream; int numFragments = getMaxSnippets(fieldName, params); boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params); List<TextFragment> frags = new ArrayList<TextFragment>(); TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization try { // TokenStream tvStream = TokenSources.getTokenStream(searcher.getIndexReader(), docId, fieldName); // if (tvStream != null) { // tots = new TermOffsetsTokenStream(tvStream); // } } catch (IllegalArgumentException e) { // No problem. But we can't use TermOffsets optimization. } for (int j = 0; j < docTexts.length; j++) { if (tots != null) { // if we're using TermOffsets optimization, then get the next // field value's TokenStream (i.e. get field j's TokenStream) from tots: tokenStream = tots.getMultiValuedTokenStream(docTexts[j].length()); } else { // fall back to analyzer tokenStream = createAnalyzerTStream(schema, fieldName, docTexts[j]); } int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS, Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE); Highlighter highlighter; if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) { if (maxCharsToAnalyze < 0) { tokenStream = new CachingTokenFilter(tokenStream); } else { tokenStream = new CachingTokenFilter( new OffsetLimitTokenFilter(tokenStream, maxCharsToAnalyze)); } // get highlighter highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tokenStream); // after highlighter initialization, reset tstream since construction of highlighter already used it tokenStream.reset(); } else { // use "the old way" highlighter = getHighlighter(query, fieldName, req); } if (maxCharsToAnalyze < 0) { highlighter.setMaxDocCharsToAnalyze(docTexts[j].length()); } else { highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze); } try { TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tokenStream, docTexts[j], mergeContiguousFragments, numFragments); for (int k = 0; k < bestTextFragments.length; k++) { if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) { frags.add(bestTextFragments[k]); } } } catch (InvalidTokenOffsetsException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } } // sort such that the fragments with the highest score come first Collections.sort(frags, new Comparator<TextFragment>() { public int compare(TextFragment arg0, TextFragment arg1) { return Math.round(arg1.getScore() - arg0.getScore()); } }); // convert fragments back into text // TODO: we can include score and position information in output as snippet attributes String[] summaries = null; if (frags.size() > 0) { ArrayList<String> fragTexts = new ArrayList<String>(); for (TextFragment fragment : frags) { if ((fragment != null) && (fragment.getScore() > 0)) { fragTexts.add(fragment.toString()); } if (fragTexts.size() >= numFragments) break; } summaries = (String[]) fragTexts.toArray(); if (summaries.length > 0) docSummaries.add(fieldName, summaries); } // no summeries made, copy text from alternate field if (summaries == null || summaries.length == 0) { alternateField(docSummaries, params, doc, fieldName); } }
From source file:com.tripod.lucene.service.AbstractLuceneService.java
License:Apache License
/** * Performs highlighting for a given query and a given document. * * @param indexSearcher the IndexSearcher performing the query * @param query the Tripod LuceneQuery// w ww. j ava 2 s . com * @param scoreDoc the Lucene ScoreDoc * @param doc the Lucene Document * @param highlighter the Highlighter to use * @param result the QueryResult to add the highlights to * @throws IOException if an error occurs performing the highlighting * @throws InvalidTokenOffsetsException if an error occurs performing the highlighting */ protected void performHighlighting(final IndexSearcher indexSearcher, final Q query, final ScoreDoc scoreDoc, final Document doc, final Highlighter highlighter, final QR result) throws IOException, InvalidTokenOffsetsException { if (query.getHighlightFields() == null || query.getHighlightFields().isEmpty()) { return; } final List<Highlight> highlights = new ArrayList<>(); final List<String> hlFieldNames = getHighlightFieldNames(query, doc); // process each field to highlight on for (String hlField : hlFieldNames) { final String text = doc.get(hlField); if (StringUtils.isEmpty(text)) { continue; } final List<String> snippets = new ArrayList<>(); final Fields tvFields = indexSearcher.getIndexReader().getTermVectors(scoreDoc.doc); final int maxStartOffset = highlighter.getMaxDocCharsToAnalyze() - 1; // get the snippets for the given field final TokenStream tokenStream = TokenSources.getTokenStream(hlField, tvFields, text, analyzer, maxStartOffset); final TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, false, 10); for (TextFragment textFragment : textFragments) { if (textFragment != null && textFragment.getScore() > 0) { snippets.add(textFragment.toString()); } } // if we have snippets then add a highlight result to the QueryResult if (snippets.size() > 0) { highlights.add(new Highlight(hlField, snippets)); } } result.setHighlights(highlights); }
From source file:Example.lucene.SearchNHilight.java
public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException { //... Above, create documents with two fields, one with term vectors (tv) and one without (notv) Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45); Directory index = FSDirectory.open(new File("data/indexing")); String querystr = args.length > 0 ? args[0] : "golf user"; // the "title" arg specifies the default field to use // when no field is explicitly specified in the query. Query query = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer) .parse(querystr);/*from w ww . ja va2 s. com*/ // 3. search int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopDocs hits = searcher.search(query, 10); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); String Preview; for (int i = 0; i < 10; i++) { int id = hits.scoreDocs[i].doc; Document doc = searcher.doc(id); String text; Preview = ""; System.out.println(doc.get("url")); System.out.println(doc.get("title")); text = doc.get("content"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content", analyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); int k = 0; for (TextFragment frag1 : frag) { if ((frag1 != null) && (frag1.getScore() > 0)) { Preview += (frag1.toString()) + "...<br>"; k++; // Get 2 Line Preview if (k >= 2) break; } } //Term vector System.out.println("-------------"); } }
From source file:Main.WebAPI.Search.java
/** * /* w w w . ja va2s . co m*/ * @param args args[0] is a query * * @throws IOException * @throws ParseException * @throws InvalidTokenOffsetsException */ public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException { //... Above, create documents with two fields, one with term vectors (tv) and one without (notv) Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45); Directory index = FSDirectory.open(new File("data/indexing")); String querystr = args.length > 0 ? args[0] : "mike lab"; // the "title" arg specifies the default field to use // when no field is explicitly specified in the query. Query query = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer) .parse(querystr); // 3. search int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopDocs hits = searcher.search(query, 10); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); String Preview; for (int i = 0; i < 10; i++) { int id = hits.scoreDocs[i].doc; Document doc = searcher.doc(id); String text; Preview = ""; System.out.println(doc.get("url")); System.out.println(doc.get("title")); text = doc.get("content"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content", analyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); int k = 0; for (TextFragment frag1 : frag) { if ((frag1 != null) && (frag1.getScore() > 0)) { Preview += (frag1.toString()) + "...<br>"; k++; // Get 2 Line Preview if (k >= 2) break; } } //Term vector System.out.println("-------------"); } }
From source file:net.riezebos.thoth.content.search.Searcher.java
License:Apache License
public PagedList<SearchResult> search(Identity identity, String queryExpression, int pageNumber, int pageSize) throws SearchException { try {// w w w.j av a 2s . co m IndexReader reader = getIndexReader(contentManager); IndexSearcher searcher = getIndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); // We might need to restrict the results to books of the user does not have access to fragments: AccessManager accessManager = contentManager.getAccessManager(); boolean booksOnly = !accessManager.hasPermission(identity, "", Permission.READ_FRAGMENTS); if (booksOnly) { queryExpression = Indexer.INDEX_TYPE + ":" + Indexer.TYPE_DOCUMENT + " AND (" + queryExpression + ")"; } QueryParser parser = new QueryParser(Indexer.INDEX_CONTENTS, analyzer); Query query = parser.parse(queryExpression); // We add 1 to determine if there is more to be found after the current page int maxResults = pageSize * pageNumber + 1; TopDocs results = searcher.search(query, maxResults, Sort.RELEVANCE); ScoreDoc[] hits = results.scoreDocs; boolean hadMore = (hits.length == maxResults); List<SearchResult> searchResults = new ArrayList<>(); int idx = 0; for (ScoreDoc scoreDoc : hits) { if (searchResults.size() == pageSize) break; idx++; if (idx >= (pageNumber - 1) * pageSize) { Document document = searcher.doc(scoreDoc.doc); IndexableField field = document.getField(Indexer.INDEX_PATH); String documentPath = field.stringValue(); SearchResult searchResult = new SearchResult(); searchResult.setIndexNumber((pageNumber - 1) * pageSize + idx); searchResult.setDocument(documentPath); String type = document.get(Indexer.INDEX_TYPE); if (Indexer.TYPE_DOCUMENT.equals(type) || Indexer.TYPE_FRAGMENT.equals(type)) { searchResult.setResource(false); try { MarkDownDocument markDownDocument = contentManager.getMarkDownDocument(documentPath, true, CriticProcessingMode.DO_NOTHING); String contents = markDownDocument.getMarkdown(); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query, Indexer.INDEX_CONTENTS)); highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); TokenStream tokenStream = analyzer.tokenStream(Indexer.INDEX_CONTENTS, contents); TextFragment[] frags = highlighter.getBestTextFragments(tokenStream, contents, false, 99999); for (TextFragment frag : frags) { if ((frag != null) && (frag.getScore() > 0)) { String fragmentText = frag.toString(); searchResult.addFragment( new Fragment(ThothCoreUtil.escapeHtmlExcept("B", fragmentText))); } } } catch (FileNotFoundException e) { LOG.warn( "Index contains an invalid file reference); probably need to reindex to get rid of this. File: " + e.getMessage()); } } else { searchResult.setResource(true); String extension = ThothUtil.getExtension(documentPath); searchResult.setImage(getConfiguration().isImageExtension(extension)); searchResult.addFragment(new Fragment(document.get(Indexer.INDEX_TITLE))); } searchResults.add(searchResult); } } reader.close(); linkBooks(searchResults); PagedList<SearchResult> pagedList = new PagedList<>(searchResults, hadMore); return pagedList; } catch (Exception e) { throw new SearchException(e); } }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndex.java
License:Apache License
private String getExcerpt(Analyzer analyzer, IndexSearcher searcher, ScoreDoc doc) throws IOException { StringBuilder excerpt = new StringBuilder(); for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields()) { String name = field.name(); // only full text or analyzed fields if (name.startsWith(FieldNames.FULLTEXT) || name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX)) { String text = field.stringValue(); TokenStream tokenStream = analyzer.tokenStream(name, text); try { TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, true, 2); if (textFragments != null && textFragments.length > 0) { for (TextFragment fragment : textFragments) { if (excerpt.length() > 0) { excerpt.append("..."); }//from w w w. ja va 2 s . c o m excerpt.append(fragment.toString()); } break; } } catch (InvalidTokenOffsetsException e) { LOG.error("higlighting failed", e); } } } return excerpt.toString(); }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex.java
License:Apache License
private String getExcerpt(Query query, Analyzer analyzer, IndexSearcher searcher, ScoreDoc doc, FieldInfos fieldInfos) throws IOException { StringBuilder excerpt = new StringBuilder(); int docID = doc.doc; List<String> names = new LinkedList<String>(); for (IndexableField field : searcher.getIndexReader().document(docID).getFields()) { String name = field.name(); // postings highlighter can be used on analyzed fields with docs, freqs, positions and offsets stored. if (name.startsWith(ANALYZED_FIELD_PREFIX) && fieldInfos.hasProx() && fieldInfos.hasOffsets()) { names.add(name);/*from ww w. j a v a 2s .c o m*/ } } if (names.size() > 0) { int[] maxPassages = new int[names.size()]; for (int i = 0; i < maxPassages.length; i++) { maxPassages[i] = 1; } try { Map<String, String[]> stringMap = postingsHighlighter.highlightFields( names.toArray(new String[names.size()]), query, searcher, new int[] { docID }, maxPassages); for (Map.Entry<String, String[]> entry : stringMap.entrySet()) { String value = Arrays.toString(entry.getValue()); if (value.contains("<b>")) { if (excerpt.length() > 0) { excerpt.append("..."); } excerpt.append(value); } } } catch (Exception e) { LOG.error("postings highlighting failed", e); } } // fallback if no excerpt could be retrieved using postings highlighter if (excerpt.length() == 0) { for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields()) { String name = field.name(); // only full text or analyzed fields if (name.startsWith(FieldNames.FULLTEXT) || name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX)) { String text = field.stringValue(); TokenStream tokenStream = analyzer.tokenStream(name, text); try { TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, true, 1); if (textFragments != null && textFragments.length > 0) { for (TextFragment fragment : textFragments) { if (excerpt.length() > 0) { excerpt.append("..."); } excerpt.append(fragment.toString()); } break; } } catch (InvalidTokenOffsetsException e) { LOG.error("higlighting failed", e); } } } } return excerpt.toString(); }
From source file:org.apache.jena.query.text.TextIndexLucene.java
License:Apache License
private String frags2string(TextFragment[] frags, HighlightOpts opts) { String sep = ""; String rez = ""; for (TextFragment f : frags) { String s = opts.joinHi ? f.toString().replaceAll(opts.end + Z_MORE_SEPS + opts.start, "$1") : f.toString();/* w ww .j a va 2 s . co m*/ rez += sep + s; sep = opts.fragSep; } return rez; }
From source file:org.apache.solr.highlight.DefaultSolrHighlighter.java
License:Apache License
private void doHighlightingByHighlighter(Query query, SolrQueryRequest req, NamedList docSummaries, int docId, Document doc, String fieldName) throws IOException { final SolrIndexSearcher searcher = req.getSearcher(); final IndexSchema schema = searcher.getSchema(); // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) - // so we disable them until fixed (see LUCENE-3080)! // BEGIN: Hack final SchemaField schemaField = schema.getFieldOrNull(fieldName); if (schemaField != null && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField) || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField))) return;// w w w.j a v a2 s. com // END: Hack SolrParams params = req.getParams(); // preserve order of values in a multiValued list boolean preserveMulti = params.getFieldBool(fieldName, HighlightParams.PRESERVE_MULTI, false); List<IndexableField> allFields = doc.getFields(); if (allFields != null && allFields.size() == 0) return; // No explicit contract that getFields returns != null, // although currently it can't. TokenStream tstream = null; int numFragments = getMaxSnippets(fieldName, params); boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params); String[] summaries = null; List<TextFragment> frags = new ArrayList<TextFragment>(); TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization TokenStream tvStream = TokenSources.getTokenStreamWithOffsets(searcher.getIndexReader(), docId, fieldName); if (tvStream != null) { tots = new TermOffsetsTokenStream(tvStream); } int mvToExamine = Integer.parseInt(req.getParams().get(HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, Integer.toString(Integer.MAX_VALUE))); int mvToMatch = Integer.parseInt( req.getParams().get(HighlightParams.MAX_MULTIVALUED_TO_MATCH, Integer.toString(Integer.MAX_VALUE))); for (IndexableField thisField : allFields) { if (mvToExamine <= 0 || mvToMatch <= 0) break; if (!thisField.name().equals(fieldName)) continue; // Is there a better way to do this? --mvToExamine; String thisText = thisField.stringValue(); if (tots != null) { // if we're using TermOffsets optimization, then get the next // field value's TokenStream (i.e. get field j's TokenStream) from tots: tstream = tots.getMultiValuedTokenStream(thisText.length()); } else { // fall back to analyzer tstream = createAnalyzerTStream(schema, fieldName, thisText); } int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS, Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE); Highlighter highlighter; if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) { if (maxCharsToAnalyze < 0) { tstream = new CachingTokenFilter(tstream); } else { tstream = new CachingTokenFilter(new OffsetLimitTokenFilter(tstream, maxCharsToAnalyze)); } // get highlighter highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream); // after highlighter initialization, reset tstream since construction of highlighter already used it tstream.reset(); } else { // use "the old way" highlighter = getHighlighter(query, fieldName, req); } if (maxCharsToAnalyze < 0) { highlighter.setMaxDocCharsToAnalyze(thisText.length()); } else { highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze); } try { TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, thisText, mergeContiguousFragments, numFragments); for (int k = 0; k < bestTextFragments.length; k++) { if (preserveMulti) { if (bestTextFragments[k] != null) { frags.add(bestTextFragments[k]); --mvToMatch; } } else { if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) { frags.add(bestTextFragments[k]); --mvToMatch; } } } } catch (InvalidTokenOffsetsException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } } // sort such that the fragments with the highest score come first if (!preserveMulti) { Collections.sort(frags, new Comparator<TextFragment>() { @Override public int compare(TextFragment arg0, TextFragment arg1) { return Math.round(arg1.getScore() - arg0.getScore()); } }); } // convert fragments back into text // TODO: we can include score and position information in output as snippet attributes if (frags.size() > 0) { ArrayList<String> fragTexts = new ArrayList<String>(); for (TextFragment fragment : frags) { if (preserveMulti) { if (fragment != null) { fragTexts.add(fragment.toString()); } } else { if ((fragment != null) && (fragment.getScore() > 0)) { fragTexts.add(fragment.toString()); } } if (fragTexts.size() >= numFragments && !preserveMulti) break; } summaries = fragTexts.toArray(new String[0]); if (summaries.length > 0) docSummaries.add(fieldName, summaries); } // no summeries made, copy text from alternate field if (summaries == null || summaries.length == 0) { alternateField(docSummaries, params, doc, fieldName); } }
From source file:org.apache.solr.highlight.ParsedContentSolrHighlighter.java
License:Apache License
/** * Generates a list of Highlighted query fragments for each item in a list * of documents, or returns null if highlighting is disabled. * //ww w. j a v a2 s.com * @param docs * query results * @param query * the query * @param req * the current request * @param defaultFields * default list of fields to summarize * @return NamedList containing a NamedList for each document, which in * turns contains sets (field, summary) pairs. */ @SuppressWarnings("unchecked") public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { SolrParams params = req.getParams(); if (!isHighlightingEnabled(params)) return null; SolrIndexSearcher searcher = req.getSearcher(); IndexSchema schema = searcher.getSchema(); NamedList fragments = new SimpleOrderedMap(); String[] fieldNames = getHighlightFields(query, req, defaultFields); Document[] readDocs = new Document[docs.size()]; { // pre-fetch documents using the Searcher's doc cache Set<String> fset = new HashSet<String>(); for (String f : fieldNames) { fset.add(f); } // fetch unique key if one exists. SchemaField keyField = schema.getUniqueKeyField(); if (null != keyField) fset.add(keyField.getName()); searcher.readDocs(readDocs, docs, fset); } // Highlight each document DocIterator iterator = docs.iterator(); for (int i = 0; i < docs.size(); i++) { int docId = iterator.nextDoc(); Document doc = readDocs[i]; NamedList docSummaries = new SimpleOrderedMap(); for (String fieldName : fieldNames) { fieldName = fieldName.trim(); // begin String[] docTexts = doc.getValues(fieldName); //Highlight only the parsed content, instead of all fields if (IndexField.DEFAULT_SEARCH_FIELD.equals(fieldName)) { docTexts = doc.getValues(IndexField.PARSED_CONTENT_FIELD); } // IndexFieldServices indexFieldServices = ConstellioSpringUtils.getIndexFieldServices(); // String collectionName = params.get(ConstellioSolrQueryParams.COLLECTION_NAME); // RecordCollectionServices collectionServices = ConstellioSpringUtils.getRecordCollectionServices(); // RecordCollection collection = collectionServices.get(collectionName); // IndexField defaultSearchField = collection.getDefaultSearchIndexField(); // // List<String> defaultSearchFieldDocTextsList = new ArrayList<String>(); // for (CopyField copyField : defaultSearchField.getCopyFieldsDest()) { // IndexField sourceIndexField = copyField.getIndexFieldSource(); // if (sourceIndexField != null) { // String sourceIndexFieldName = sourceIndexField.getName(); // String[] copyFieldValues = doc.getValues(sourceIndexFieldName); // if (copyFieldValues != null) { // for (int k = 0; k < copyFieldValues.length; k++) { // String copyFieldValue = copyFieldValues[k]; // if (!defaultSearchFieldDocTextsList.contains(copyFieldValue)) { // defaultSearchFieldDocTextsList.add(copyFieldValue); // } // } // } // } // } // docTexts = defaultSearchFieldDocTextsList.toArray(new String[0]); // if ((docTexts == null || docTexts.length == 0)) { // RecordServices recordServices = ConstellioSpringUtils.getRecordServices(); // Long recordId = new Long(doc.getField(IndexField.RECORD_ID_FIELD).stringValue()); // Record record; // try { // record = recordServices.get(recordId, collection); // } catch (Exception e) { // record = null; // e.printStackTrace(); // } // if (record != null) { // List<Object> fieldValues = indexFieldServices.extractFieldValues(record, defaultSearchField); // // List<String> docTextsList = new ArrayList<String>(); // for (Object fieldValue : fieldValues) { // String strFieldValue = fieldValue != null ? fieldValue.toString() : null; // if (StringUtils.isNotBlank(strFieldValue)) { // docTextsList.add(strFieldValue); // } // } // // if (!docTextsList.isEmpty()) { // docTexts = docTextsList.toArray(new String[0]); // } // } // } // // end if (docTexts == null) continue; TokenStream tstream = null; int numFragments = getMaxSnippets(fieldName, params); boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params); String[] summaries = null; List<TextFragment> frags = new ArrayList<TextFragment>(); for (int j = 0; j < docTexts.length; j++) { // create TokenStream try { // attempt term vectors tstream = TokenSources.getTokenStreamWithOffsets(searcher.getIndexReader(), docId, fieldName); } catch (IllegalArgumentException e) { // fall back to anaylzer tstream = new TokenOrderingFilter( schema.getAnalyzer().tokenStream(fieldName, new StringReader(docTexts[j])), 10); } Highlighter highlighter; if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER))) { // wrap CachingTokenFilter around TokenStream for reuse tstream = new CachingTokenFilter(tstream); // get highlighter highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream); // after highlighter initialization, reset tstream since construction of highlighter // already used it tstream.reset(); } else { // use "the old way" highlighter = getHighlighter(query, fieldName, req); } int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS, Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE); if (maxCharsToAnalyze < 0) { highlighter.setMaxDocCharsToAnalyze(docTexts[j].length()); } else { highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze); } try { TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, docTexts[j], mergeContiguousFragments, numFragments); for (int k = 0; k < bestTextFragments.length; k++) { if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) { frags.add(bestTextFragments[k]); } } } catch (InvalidTokenOffsetsException e) { throw new RuntimeException(e); } } // sort such that the fragments with the highest score come first Collections.sort(frags, new Comparator<TextFragment>() { public int compare(TextFragment arg0, TextFragment arg1) { return Math.round(arg1.getScore() - arg0.getScore()); } }); // convert fragments back into text // TODO: we can include score and position information in output as snippet attributes if (frags.size() > 0) { ArrayList<String> fragTexts = new ArrayList<String>(); for (TextFragment fragment : frags) { if ((fragment != null) && (fragment.getScore() > 0)) { // fragTexts.add(fragment.toString()); fragTexts.add(StringEscapeUtils.escapeHtml(fragment.toString())); } if (fragTexts.size() >= numFragments) break; } summaries = fragTexts.toArray(new String[0]); if (summaries.length > 0) docSummaries.add(fieldName, summaries); } // no summeries made, copy text from alternate field if (summaries == null || summaries.length == 0) { String alternateField = req.getParams().getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD); if (alternateField != null && alternateField.length() > 0) { String[] altTexts = doc.getValues(alternateField); if (altTexts != null && altTexts.length > 0) { int alternateFieldLen = req.getParams().getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH, 0); if (alternateFieldLen <= 0) { docSummaries.add(fieldName, altTexts); } else { List<String> altList = new ArrayList<String>(); int len = 0; for (String altText : altTexts) { altList.add(len + altText.length() > alternateFieldLen ? altText.substring(0, alternateFieldLen - len) : altText); len += altText.length(); if (len >= alternateFieldLen) break; } docSummaries.add(fieldName, altList); } } } } } String printId = schema.printableUniqueKey(doc); fragments.add(printId == null ? null : printId, docSummaries); } return fragments; }