Example usage for org.apache.lucene.search.highlight TextFragment toString

List of usage examples for org.apache.lucene.search.highlight TextFragment toString

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight TextFragment toString.

Prototype

@Override
    public String toString() 

Source Link

Usage

From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java

License:Apache License

private void doHighlightingByHighlighter(Query query, SolrQueryRequest req, NamedList docSummaries, int docId,
        Document doc, String fieldName) throws IOException {
    final SolrIndexSearcher searcher = req.getSearcher();
    final IndexSchema schema = searcher.getSchema();

    // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) -
    // so we disable them until fixed (see LUCENE-3080)!
    // BEGIN: Hack
    final SchemaField schemaField = schema.getFieldOrNull(fieldName);
    if (schemaField != null && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField)
            || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField)))
        return;// w  ww  . jav a2s  .  c  om
    // END: Hack

    SolrParams params = req.getParams();
    IndexableField[] docFields = doc.getFields(fieldName);
    List<String> listFields = new ArrayList<String>();
    for (IndexableField field : docFields) {
        listFields.add(field.stringValue());
    }

    String[] docTexts = listFields.toArray(new String[listFields.size()]);

    // according to Document javadoc, doc.getValues() never returns null. check empty instead of null
    if (docTexts.length == 0)
        return;

    TokenStream tokenStream;
    int numFragments = getMaxSnippets(fieldName, params);
    boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);

    List<TextFragment> frags = new ArrayList<TextFragment>();

    TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
    try {
        //      TokenStream tvStream = TokenSources.getTokenStream(searcher.getIndexReader(), docId, fieldName);
        //      if (tvStream != null) {
        //        tots = new TermOffsetsTokenStream(tvStream);
        //      }
    } catch (IllegalArgumentException e) {
        // No problem. But we can't use TermOffsets optimization.
    }

    for (int j = 0; j < docTexts.length; j++) {
        if (tots != null) {
            // if we're using TermOffsets optimization, then get the next
            // field value's TokenStream (i.e. get field j's TokenStream) from tots:
            tokenStream = tots.getMultiValuedTokenStream(docTexts[j].length());
        } else {
            // fall back to analyzer
            tokenStream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
        }

        int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS,
                Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);

        Highlighter highlighter;
        if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
            if (maxCharsToAnalyze < 0) {
                tokenStream = new CachingTokenFilter(tokenStream);
            } else {
                tokenStream = new CachingTokenFilter(
                        new OffsetLimitTokenFilter(tokenStream, maxCharsToAnalyze));
            }

            // get highlighter
            highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tokenStream);

            // after highlighter initialization, reset tstream since construction of highlighter already used it
            tokenStream.reset();
        } else {
            // use "the old way"
            highlighter = getHighlighter(query, fieldName, req);
        }

        if (maxCharsToAnalyze < 0) {
            highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
        } else {
            highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
        }

        try {
            TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tokenStream, docTexts[j],
                    mergeContiguousFragments, numFragments);
            for (int k = 0; k < bestTextFragments.length; k++) {
                if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
                    frags.add(bestTextFragments[k]);
                }
            }
        } catch (InvalidTokenOffsetsException e) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
        }
    }
    // sort such that the fragments with the highest score come first
    Collections.sort(frags, new Comparator<TextFragment>() {
        public int compare(TextFragment arg0, TextFragment arg1) {
            return Math.round(arg1.getScore() - arg0.getScore());
        }
    });

    // convert fragments back into text
    // TODO: we can include score and position information in output as snippet attributes
    String[] summaries = null;
    if (frags.size() > 0) {
        ArrayList<String> fragTexts = new ArrayList<String>();
        for (TextFragment fragment : frags) {
            if ((fragment != null) && (fragment.getScore() > 0)) {
                fragTexts.add(fragment.toString());
            }
            if (fragTexts.size() >= numFragments)
                break;
        }
        summaries = (String[]) fragTexts.toArray();
        if (summaries.length > 0)
            docSummaries.add(fieldName, summaries);
    }
    // no summeries made, copy text from alternate field
    if (summaries == null || summaries.length == 0) {
        alternateField(docSummaries, params, doc, fieldName);
    }
}

From source file:com.tripod.lucene.service.AbstractLuceneService.java

License:Apache License

/**
 * Performs highlighting for a given query and a given document.
 *
 * @param indexSearcher the IndexSearcher performing the query
 * @param query the Tripod LuceneQuery// w ww.  j ava 2 s .  com
 * @param scoreDoc the Lucene ScoreDoc
 * @param doc the Lucene Document
 * @param highlighter the Highlighter to use
 * @param result the QueryResult to add the highlights to
 * @throws IOException if an error occurs performing the highlighting
 * @throws InvalidTokenOffsetsException if an error occurs performing the highlighting
 */
protected void performHighlighting(final IndexSearcher indexSearcher, final Q query, final ScoreDoc scoreDoc,
        final Document doc, final Highlighter highlighter, final QR result)
        throws IOException, InvalidTokenOffsetsException {

    if (query.getHighlightFields() == null || query.getHighlightFields().isEmpty()) {
        return;
    }

    final List<Highlight> highlights = new ArrayList<>();
    final List<String> hlFieldNames = getHighlightFieldNames(query, doc);

    // process each field to highlight on
    for (String hlField : hlFieldNames) {
        final String text = doc.get(hlField);
        if (StringUtils.isEmpty(text)) {
            continue;
        }

        final List<String> snippets = new ArrayList<>();
        final Fields tvFields = indexSearcher.getIndexReader().getTermVectors(scoreDoc.doc);
        final int maxStartOffset = highlighter.getMaxDocCharsToAnalyze() - 1;

        // get the snippets for the given field
        final TokenStream tokenStream = TokenSources.getTokenStream(hlField, tvFields, text, analyzer,
                maxStartOffset);
        final TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, false, 10);
        for (TextFragment textFragment : textFragments) {
            if (textFragment != null && textFragment.getScore() > 0) {
                snippets.add(textFragment.toString());
            }
        }

        // if we have snippets then add a highlight result to the QueryResult
        if (snippets.size() > 0) {
            highlights.add(new Highlight(hlField, snippets));
        }
    }

    result.setHighlights(highlights);
}

From source file:Example.lucene.SearchNHilight.java

public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException {
    //... Above, create documents with two fields, one with term vectors (tv) and one without (notv)
    Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45);

    Directory index = FSDirectory.open(new File("data/indexing"));
    String querystr = args.length > 0 ? args[0] : "golf user";
    // the "title" arg specifies the default field to use
    // when no field is explicitly specified in the query.
    Query query = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer)
            .parse(querystr);/*from w  ww .  ja va2 s.  com*/

    // 3. search
    int hitsPerPage = 10;
    IndexReader reader = DirectoryReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);

    TopDocs hits = searcher.search(query, 10);

    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
    Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
    String Preview;
    for (int i = 0; i < 10; i++) {
        int id = hits.scoreDocs[i].doc;
        Document doc = searcher.doc(id);
        String text;
        Preview = "";
        System.out.println(doc.get("url"));
        System.out.println(doc.get("title"));
        text = doc.get("content");
        TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content",
                analyzer);
        TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
        int k = 0;
        for (TextFragment frag1 : frag) {
            if ((frag1 != null) && (frag1.getScore() > 0)) {
                Preview += (frag1.toString()) + "...<br>";
                k++;
                // Get 2 Line Preview
                if (k >= 2)
                    break;
            }
        }
        //Term vector
        System.out.println("-------------");
    }
}

From source file:Main.WebAPI.Search.java

/**
 * /* w  w w . ja va2s  .  co  m*/
 * @param args args[0] is a query
 * 
 * @throws IOException
 * @throws ParseException
 * @throws InvalidTokenOffsetsException 
 */

public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException {
    //... Above, create documents with two fields, one with term vectors (tv) and one without (notv)
    Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45);

    Directory index = FSDirectory.open(new File("data/indexing"));
    String querystr = args.length > 0 ? args[0] : "mike lab";
    // the "title" arg specifies the default field to use
    // when no field is explicitly specified in the query.
    Query query = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer)
            .parse(querystr);

    // 3. search
    int hitsPerPage = 10;
    IndexReader reader = DirectoryReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);

    TopDocs hits = searcher.search(query, 10);

    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
    Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
    String Preview;
    for (int i = 0; i < 10; i++) {
        int id = hits.scoreDocs[i].doc;
        Document doc = searcher.doc(id);
        String text;
        Preview = "";
        System.out.println(doc.get("url"));
        System.out.println(doc.get("title"));
        text = doc.get("content");
        TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content",
                analyzer);
        TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
        int k = 0;
        for (TextFragment frag1 : frag) {
            if ((frag1 != null) && (frag1.getScore() > 0)) {
                Preview += (frag1.toString()) + "...<br>";
                k++;
                // Get 2 Line Preview
                if (k >= 2)
                    break;
            }
        }
        //Term vector
        System.out.println("-------------");
    }
}

From source file:net.riezebos.thoth.content.search.Searcher.java

License:Apache License

public PagedList<SearchResult> search(Identity identity, String queryExpression, int pageNumber, int pageSize)
        throws SearchException {
    try {// w  w w.j av a  2s . co m
        IndexReader reader = getIndexReader(contentManager);
        IndexSearcher searcher = getIndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer();

        // We might need to restrict the results to books of the user does not have access to fragments:
        AccessManager accessManager = contentManager.getAccessManager();
        boolean booksOnly = !accessManager.hasPermission(identity, "", Permission.READ_FRAGMENTS);
        if (booksOnly) {
            queryExpression = Indexer.INDEX_TYPE + ":" + Indexer.TYPE_DOCUMENT + " AND (" + queryExpression
                    + ")";
        }

        QueryParser parser = new QueryParser(Indexer.INDEX_CONTENTS, analyzer);
        Query query = parser.parse(queryExpression);

        // We add 1 to determine if there is more to be found after the current page
        int maxResults = pageSize * pageNumber + 1;
        TopDocs results = searcher.search(query, maxResults, Sort.RELEVANCE);
        ScoreDoc[] hits = results.scoreDocs;

        boolean hadMore = (hits.length == maxResults);

        List<SearchResult> searchResults = new ArrayList<>();
        int idx = 0;
        for (ScoreDoc scoreDoc : hits) {
            if (searchResults.size() == pageSize)
                break;
            idx++;
            if (idx >= (pageNumber - 1) * pageSize) {
                Document document = searcher.doc(scoreDoc.doc);
                IndexableField field = document.getField(Indexer.INDEX_PATH);
                String documentPath = field.stringValue();
                SearchResult searchResult = new SearchResult();
                searchResult.setIndexNumber((pageNumber - 1) * pageSize + idx);
                searchResult.setDocument(documentPath);

                String type = document.get(Indexer.INDEX_TYPE);
                if (Indexer.TYPE_DOCUMENT.equals(type) || Indexer.TYPE_FRAGMENT.equals(type)) {
                    searchResult.setResource(false);

                    try {
                        MarkDownDocument markDownDocument = contentManager.getMarkDownDocument(documentPath,
                                true, CriticProcessingMode.DO_NOTHING);
                        String contents = markDownDocument.getMarkdown();

                        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
                        Highlighter highlighter = new Highlighter(htmlFormatter,
                                new QueryScorer(query, Indexer.INDEX_CONTENTS));
                        highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);

                        TokenStream tokenStream = analyzer.tokenStream(Indexer.INDEX_CONTENTS, contents);

                        TextFragment[] frags = highlighter.getBestTextFragments(tokenStream, contents, false,
                                99999);
                        for (TextFragment frag : frags) {
                            if ((frag != null) && (frag.getScore() > 0)) {
                                String fragmentText = frag.toString();
                                searchResult.addFragment(
                                        new Fragment(ThothCoreUtil.escapeHtmlExcept("B", fragmentText)));
                            }
                        }
                    } catch (FileNotFoundException e) {
                        LOG.warn(
                                "Index contains an invalid file reference); probably need to reindex to get rid of this. File: "
                                        + e.getMessage());
                    }
                } else {
                    searchResult.setResource(true);
                    String extension = ThothUtil.getExtension(documentPath);
                    searchResult.setImage(getConfiguration().isImageExtension(extension));

                    searchResult.addFragment(new Fragment(document.get(Indexer.INDEX_TITLE)));
                }
                searchResults.add(searchResult);
            }
        }
        reader.close();
        linkBooks(searchResults);
        PagedList<SearchResult> pagedList = new PagedList<>(searchResults, hadMore);
        return pagedList;
    } catch (Exception e) {
        throw new SearchException(e);
    }
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndex.java

License:Apache License

private String getExcerpt(Analyzer analyzer, IndexSearcher searcher, ScoreDoc doc) throws IOException {
    StringBuilder excerpt = new StringBuilder();

    for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields()) {
        String name = field.name();
        // only full text or analyzed fields
        if (name.startsWith(FieldNames.FULLTEXT) || name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX)) {
            String text = field.stringValue();
            TokenStream tokenStream = analyzer.tokenStream(name, text);
            try {
                TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, true, 2);
                if (textFragments != null && textFragments.length > 0) {
                    for (TextFragment fragment : textFragments) {
                        if (excerpt.length() > 0) {
                            excerpt.append("...");
                        }//from   w w w.  ja  va 2  s  .  c o  m
                        excerpt.append(fragment.toString());
                    }
                    break;
                }
            } catch (InvalidTokenOffsetsException e) {
                LOG.error("higlighting failed", e);
            }
        }
    }
    return excerpt.toString();
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex.java

License:Apache License

private String getExcerpt(Query query, Analyzer analyzer, IndexSearcher searcher, ScoreDoc doc,
        FieldInfos fieldInfos) throws IOException {
    StringBuilder excerpt = new StringBuilder();
    int docID = doc.doc;
    List<String> names = new LinkedList<String>();

    for (IndexableField field : searcher.getIndexReader().document(docID).getFields()) {
        String name = field.name();
        // postings highlighter can be used on analyzed fields with docs, freqs, positions and offsets stored.
        if (name.startsWith(ANALYZED_FIELD_PREFIX) && fieldInfos.hasProx() && fieldInfos.hasOffsets()) {
            names.add(name);/*from   ww w. j a v a 2s .c o m*/
        }
    }

    if (names.size() > 0) {
        int[] maxPassages = new int[names.size()];
        for (int i = 0; i < maxPassages.length; i++) {
            maxPassages[i] = 1;
        }
        try {
            Map<String, String[]> stringMap = postingsHighlighter.highlightFields(
                    names.toArray(new String[names.size()]), query, searcher, new int[] { docID }, maxPassages);
            for (Map.Entry<String, String[]> entry : stringMap.entrySet()) {
                String value = Arrays.toString(entry.getValue());
                if (value.contains("<b>")) {
                    if (excerpt.length() > 0) {
                        excerpt.append("...");
                    }
                    excerpt.append(value);
                }
            }
        } catch (Exception e) {
            LOG.error("postings highlighting failed", e);
        }
    }

    // fallback if no excerpt could be retrieved using postings highlighter
    if (excerpt.length() == 0) {

        for (IndexableField field : searcher.getIndexReader().document(doc.doc).getFields()) {
            String name = field.name();
            // only full text or analyzed fields
            if (name.startsWith(FieldNames.FULLTEXT) || name.startsWith(FieldNames.ANALYZED_FIELD_PREFIX)) {
                String text = field.stringValue();
                TokenStream tokenStream = analyzer.tokenStream(name, text);

                try {
                    TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, true, 1);
                    if (textFragments != null && textFragments.length > 0) {
                        for (TextFragment fragment : textFragments) {
                            if (excerpt.length() > 0) {
                                excerpt.append("...");
                            }
                            excerpt.append(fragment.toString());
                        }
                        break;
                    }
                } catch (InvalidTokenOffsetsException e) {
                    LOG.error("higlighting failed", e);
                }
            }
        }
    }
    return excerpt.toString();
}

From source file:org.apache.jena.query.text.TextIndexLucene.java

License:Apache License

private String frags2string(TextFragment[] frags, HighlightOpts opts) {
    String sep = "";
    String rez = "";

    for (TextFragment f : frags) {
        String s = opts.joinHi ? f.toString().replaceAll(opts.end + Z_MORE_SEPS + opts.start, "$1")
                : f.toString();/* w ww  .j  a  va  2  s .  co m*/
        rez += sep + s;
        sep = opts.fragSep;
    }

    return rez;
}

From source file:org.apache.solr.highlight.DefaultSolrHighlighter.java

License:Apache License

private void doHighlightingByHighlighter(Query query, SolrQueryRequest req, NamedList docSummaries, int docId,
        Document doc, String fieldName) throws IOException {
    final SolrIndexSearcher searcher = req.getSearcher();
    final IndexSchema schema = searcher.getSchema();

    // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) -
    // so we disable them until fixed (see LUCENE-3080)!
    // BEGIN: Hack
    final SchemaField schemaField = schema.getFieldOrNull(fieldName);
    if (schemaField != null && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField)
            || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField)))
        return;//  w w  w.j  a v  a2 s. com
    // END: Hack

    SolrParams params = req.getParams();

    // preserve order of values in a multiValued list
    boolean preserveMulti = params.getFieldBool(fieldName, HighlightParams.PRESERVE_MULTI, false);

    List<IndexableField> allFields = doc.getFields();
    if (allFields != null && allFields.size() == 0)
        return; // No explicit contract that getFields returns != null,
    // although currently it can't.

    TokenStream tstream = null;
    int numFragments = getMaxSnippets(fieldName, params);
    boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);

    String[] summaries = null;
    List<TextFragment> frags = new ArrayList<TextFragment>();

    TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
    TokenStream tvStream = TokenSources.getTokenStreamWithOffsets(searcher.getIndexReader(), docId, fieldName);
    if (tvStream != null) {
        tots = new TermOffsetsTokenStream(tvStream);
    }
    int mvToExamine = Integer.parseInt(req.getParams().get(HighlightParams.MAX_MULTIVALUED_TO_EXAMINE,
            Integer.toString(Integer.MAX_VALUE)));
    int mvToMatch = Integer.parseInt(
            req.getParams().get(HighlightParams.MAX_MULTIVALUED_TO_MATCH, Integer.toString(Integer.MAX_VALUE)));

    for (IndexableField thisField : allFields) {
        if (mvToExamine <= 0 || mvToMatch <= 0)
            break;

        if (!thisField.name().equals(fieldName))
            continue; // Is there a better way to do this?

        --mvToExamine;
        String thisText = thisField.stringValue();

        if (tots != null) {
            // if we're using TermOffsets optimization, then get the next
            // field value's TokenStream (i.e. get field j's TokenStream) from tots:
            tstream = tots.getMultiValuedTokenStream(thisText.length());
        } else {
            // fall back to analyzer
            tstream = createAnalyzerTStream(schema, fieldName, thisText);
        }

        int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS,
                Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);

        Highlighter highlighter;
        if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
            if (maxCharsToAnalyze < 0) {
                tstream = new CachingTokenFilter(tstream);
            } else {
                tstream = new CachingTokenFilter(new OffsetLimitTokenFilter(tstream, maxCharsToAnalyze));
            }

            // get highlighter
            highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream);

            // after highlighter initialization, reset tstream since construction of highlighter already used it
            tstream.reset();
        } else {
            // use "the old way"
            highlighter = getHighlighter(query, fieldName, req);
        }

        if (maxCharsToAnalyze < 0) {
            highlighter.setMaxDocCharsToAnalyze(thisText.length());
        } else {
            highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
        }

        try {
            TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, thisText,
                    mergeContiguousFragments, numFragments);
            for (int k = 0; k < bestTextFragments.length; k++) {
                if (preserveMulti) {
                    if (bestTextFragments[k] != null) {
                        frags.add(bestTextFragments[k]);
                        --mvToMatch;
                    }
                } else {
                    if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
                        frags.add(bestTextFragments[k]);
                        --mvToMatch;
                    }
                }
            }
        } catch (InvalidTokenOffsetsException e) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
        }
    }
    // sort such that the fragments with the highest score come first
    if (!preserveMulti) {
        Collections.sort(frags, new Comparator<TextFragment>() {
            @Override
            public int compare(TextFragment arg0, TextFragment arg1) {
                return Math.round(arg1.getScore() - arg0.getScore());
            }
        });
    }

    // convert fragments back into text
    // TODO: we can include score and position information in output as snippet attributes
    if (frags.size() > 0) {
        ArrayList<String> fragTexts = new ArrayList<String>();
        for (TextFragment fragment : frags) {
            if (preserveMulti) {
                if (fragment != null) {
                    fragTexts.add(fragment.toString());
                }
            } else {
                if ((fragment != null) && (fragment.getScore() > 0)) {
                    fragTexts.add(fragment.toString());
                }
            }

            if (fragTexts.size() >= numFragments && !preserveMulti)
                break;
        }
        summaries = fragTexts.toArray(new String[0]);
        if (summaries.length > 0)
            docSummaries.add(fieldName, summaries);
    }
    // no summeries made, copy text from alternate field
    if (summaries == null || summaries.length == 0) {
        alternateField(docSummaries, params, doc, fieldName);
    }
}

From source file:org.apache.solr.highlight.ParsedContentSolrHighlighter.java

License:Apache License

/**
 * Generates a list of Highlighted query fragments for each item in a list
 * of documents, or returns null if highlighting is disabled.
 * //ww  w. j  a  v a2 s.com
 * @param docs
 *            query results
 * @param query
 *            the query
 * @param req
 *            the current request
 * @param defaultFields
 *            default list of fields to summarize
 * @return NamedList containing a NamedList for each document, which in
 *         turns contains sets (field, summary) pairs.
 */
@SuppressWarnings("unchecked")
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields)
        throws IOException {
    SolrParams params = req.getParams();
    if (!isHighlightingEnabled(params))
        return null;

    SolrIndexSearcher searcher = req.getSearcher();
    IndexSchema schema = searcher.getSchema();
    NamedList fragments = new SimpleOrderedMap();
    String[] fieldNames = getHighlightFields(query, req, defaultFields);
    Document[] readDocs = new Document[docs.size()];
    {
        // pre-fetch documents using the Searcher's doc cache
        Set<String> fset = new HashSet<String>();
        for (String f : fieldNames) {
            fset.add(f);
        }
        // fetch unique key if one exists.
        SchemaField keyField = schema.getUniqueKeyField();
        if (null != keyField)
            fset.add(keyField.getName());
        searcher.readDocs(readDocs, docs, fset);
    }

    // Highlight each document
    DocIterator iterator = docs.iterator();
    for (int i = 0; i < docs.size(); i++) {
        int docId = iterator.nextDoc();
        Document doc = readDocs[i];
        NamedList docSummaries = new SimpleOrderedMap();
        for (String fieldName : fieldNames) {
            fieldName = fieldName.trim();

            // begin
            String[] docTexts = doc.getValues(fieldName);
            //Highlight only the parsed content, instead of all fields
            if (IndexField.DEFAULT_SEARCH_FIELD.equals(fieldName)) {
                docTexts = doc.getValues(IndexField.PARSED_CONTENT_FIELD);
            }

            //                IndexFieldServices indexFieldServices = ConstellioSpringUtils.getIndexFieldServices();
            //                String collectionName = params.get(ConstellioSolrQueryParams.COLLECTION_NAME);
            //               RecordCollectionServices collectionServices = ConstellioSpringUtils.getRecordCollectionServices();
            //                RecordCollection collection = collectionServices.get(collectionName);
            //                IndexField defaultSearchField = collection.getDefaultSearchIndexField();
            //
            //                List<String> defaultSearchFieldDocTextsList = new ArrayList<String>();
            //                for (CopyField copyField : defaultSearchField.getCopyFieldsDest()) {
            //               IndexField sourceIndexField = copyField.getIndexFieldSource();
            //               if (sourceIndexField != null) {
            //                  String sourceIndexFieldName = sourceIndexField.getName();
            //                      String[] copyFieldValues = doc.getValues(sourceIndexFieldName);
            //                      if (copyFieldValues != null) {
            //                         for (int k = 0; k < copyFieldValues.length; k++) {
            //                        String copyFieldValue = copyFieldValues[k];
            //                        if (!defaultSearchFieldDocTextsList.contains(copyFieldValue)) {
            //                           defaultSearchFieldDocTextsList.add(copyFieldValue);
            //                        }
            //                     }
            //                      }
            //               }
            //            }
            //                docTexts = defaultSearchFieldDocTextsList.toArray(new String[0]);

            //                if ((docTexts == null || docTexts.length == 0)) {
            //                    RecordServices recordServices = ConstellioSpringUtils.getRecordServices();
            //                    Long recordId = new Long(doc.getField(IndexField.RECORD_ID_FIELD).stringValue());
            //                    Record record;
            //                    try {
            //                       record = recordServices.get(recordId, collection);
            //               } catch (Exception e) {
            //                  record = null;
            //                  e.printStackTrace();
            //               }
            //                    if (record != null) {
            //                        List<Object> fieldValues = indexFieldServices.extractFieldValues(record, defaultSearchField);
            //
            //                        List<String> docTextsList = new ArrayList<String>();
            //                        for (Object fieldValue : fieldValues) {
            //                            String strFieldValue = fieldValue != null ? fieldValue.toString() : null;
            //                            if (StringUtils.isNotBlank(strFieldValue)) {
            //                                docTextsList.add(strFieldValue);
            //                            }
            //                        }
            //
            //                        if (!docTextsList.isEmpty()) {
            //                            docTexts = docTextsList.toArray(new String[0]);
            //                        }
            //                    }
            //                }
            //                // end

            if (docTexts == null)
                continue;

            TokenStream tstream = null;
            int numFragments = getMaxSnippets(fieldName, params);
            boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);

            String[] summaries = null;
            List<TextFragment> frags = new ArrayList<TextFragment>();
            for (int j = 0; j < docTexts.length; j++) {
                // create TokenStream
                try {
                    // attempt term vectors
                    tstream = TokenSources.getTokenStreamWithOffsets(searcher.getIndexReader(), docId,
                            fieldName);
                } catch (IllegalArgumentException e) {
                    // fall back to anaylzer
                    tstream = new TokenOrderingFilter(
                            schema.getAnalyzer().tokenStream(fieldName, new StringReader(docTexts[j])), 10);
                }

                Highlighter highlighter;
                if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER))) {
                    // wrap CachingTokenFilter around TokenStream for reuse
                    tstream = new CachingTokenFilter(tstream);

                    // get highlighter
                    highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream);

                    // after highlighter initialization, reset tstream since construction of highlighter
                    // already used it
                    tstream.reset();
                } else {
                    // use "the old way"
                    highlighter = getHighlighter(query, fieldName, req);
                }

                int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS,
                        Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
                if (maxCharsToAnalyze < 0) {
                    highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
                } else {
                    highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
                }

                try {
                    TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, docTexts[j],
                            mergeContiguousFragments, numFragments);
                    for (int k = 0; k < bestTextFragments.length; k++) {
                        if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
                            frags.add(bestTextFragments[k]);
                        }
                    }
                } catch (InvalidTokenOffsetsException e) {
                    throw new RuntimeException(e);
                }
            }
            // sort such that the fragments with the highest score come first
            Collections.sort(frags, new Comparator<TextFragment>() {
                public int compare(TextFragment arg0, TextFragment arg1) {
                    return Math.round(arg1.getScore() - arg0.getScore());
                }
            });

            // convert fragments back into text
            // TODO: we can include score and position information in output as snippet attributes
            if (frags.size() > 0) {
                ArrayList<String> fragTexts = new ArrayList<String>();
                for (TextFragment fragment : frags) {
                    if ((fragment != null) && (fragment.getScore() > 0)) {
                        //                            fragTexts.add(fragment.toString());
                        fragTexts.add(StringEscapeUtils.escapeHtml(fragment.toString()));
                    }
                    if (fragTexts.size() >= numFragments)
                        break;
                }
                summaries = fragTexts.toArray(new String[0]);
                if (summaries.length > 0)
                    docSummaries.add(fieldName, summaries);
            }
            // no summeries made, copy text from alternate field
            if (summaries == null || summaries.length == 0) {
                String alternateField = req.getParams().getFieldParam(fieldName,
                        HighlightParams.ALTERNATE_FIELD);
                if (alternateField != null && alternateField.length() > 0) {
                    String[] altTexts = doc.getValues(alternateField);
                    if (altTexts != null && altTexts.length > 0) {
                        int alternateFieldLen = req.getParams().getFieldInt(fieldName,
                                HighlightParams.ALTERNATE_FIELD_LENGTH, 0);
                        if (alternateFieldLen <= 0) {
                            docSummaries.add(fieldName, altTexts);
                        } else {
                            List<String> altList = new ArrayList<String>();
                            int len = 0;
                            for (String altText : altTexts) {
                                altList.add(len + altText.length() > alternateFieldLen
                                        ? altText.substring(0, alternateFieldLen - len)
                                        : altText);
                                len += altText.length();
                                if (len >= alternateFieldLen)
                                    break;
                            }
                            docSummaries.add(fieldName, altList);
                        }
                    }
                }
            }

        }
        String printId = schema.printableUniqueKey(doc);
        fragments.add(printId == null ? null : printId, docSummaries);
    }
    return fragments;
}