aos.lucene.tools.FastVectorHighlighterSample.java Source code

Java tutorial

Introduction

Here is the source code for aos.lucene.tools.FastVectorHighlighterSample.java

Source

/****************************************************************
 * Licensed to the AOS Community (AOS) under one or more        *
 * contributor license agreements.  See the NOTICE file         *
 * distributed with this work for additional information        *
 * regarding copyright ownership.  The AOS licenses this file   *
 * to you under the Apache License, Version 2.0 (the            *
 * "License"); you may not use this file except in compliance   *
 * with the License.  You may obtain a copy of the License at   *
 *                                                              *
 *   http://www.apache.org/licenses/LICENSE-2.0                 *
 *                                                              *
 * Unless required by applicable law or agreed to in writing,   *
 * software distributed under the License is distributed on an  *
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
 * KIND, either express or implied.  See the License for the    *
 * specific language governing permissions and limitations      *
 * under the License.                                           *
 ****************************************************************/
package aos.lucene.tools;

import java.io.FileWriter;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder;
import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
import org.apache.lucene.search.vectorhighlight.FieldQuery;
import org.apache.lucene.search.vectorhighlight.FragListBuilder;
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
import org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

public class FastVectorHighlighterSample {

    static final String[] DOCS = { // #A
            "the quick brown fox jumps over the lazy dog", // #A
            "the quick gold fox jumped over the lazy black dog", // #A
            "the quick fox jumps over the black dog", // #A
            "the red fox jumped over the lazy dark gray dog" // #A
    };
    static final String QUERY = "quick OR fox OR \"lazy dog\"~1"; // #B
    static final String F = "f";
    static Directory dir = new RAMDirectory();
    static Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);

    public static void main(String[] args) throws Exception {
        if (args.length != 1) {
            System.err.println("Usage: FastVectorHighlighterSample <filename>");
            System.exit(-1);
        }
        makeIndex();
        searchIndex(args[0]);
    }

    static void makeIndex() throws IOException {
        IndexWriter writer = new IndexWriter(dir, analyzer, true, MaxFieldLength.UNLIMITED);
        for (String d : DOCS) {
            Document doc = new Document();
            doc.add(new Field(F, d, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
            writer.addDocument(doc);
        }
        writer.close();
    }

    static void searchIndex(String filename) throws Exception {
        QueryParser parser = new QueryParser(Version.LUCENE_46, F, analyzer);
        Query query = parser.parse(QUERY);
        FastVectorHighlighter highlighter = getHighlighter(); // #C
        FieldQuery fieldQuery = highlighter.getFieldQuery(query); // #D
        IndexSearcher searcher = new IndexSearcher(dir);
        TopDocs docs = searcher.search(query, 10);

        FileWriter writer = new FileWriter(filename);
        writer.write("<html>");
        writer.write("<body>");
        writer.write("<p>QUERY : " + QUERY + "</p>");
        for (ScoreDoc scoreDoc : docs.scoreDocs) {
            String snippet = highlighter.getBestFragment( // #E
                    fieldQuery, searcher.getIndexReader(), // #E
                    scoreDoc.doc, F, 100); // #E
            if (snippet != null) {
                writer.write(scoreDoc.doc + " : " + snippet + "<br/>");
            }
        }
        writer.write("</body></html>");
        writer.close();
    }

    static FastVectorHighlighter getHighlighter() {
        FragListBuilder fragListBuilder = new SimpleFragListBuilder(); // #F
        FragmentsBuilder fragmentBuilder = // #F
                new ScoreOrderFragmentsBuilder( // #F
                        BaseFragmentsBuilder.COLORED_PRE_TAGS, // #F
                        BaseFragmentsBuilder.COLORED_POST_TAGS); // #F
        return new FastVectorHighlighter(true, true, // #F
                fragListBuilder, fragmentBuilder); // #F
    }
}

/*
 * #A Index these documents #B Run this query #C Get FastVectorHighlighter #D
 * Create FieldQuery #E Highlight top fragment #F Create FastVectorHighlighter
 */