Example usage for org.apache.lucene.search.highlight SimpleFragmenter SimpleFragmenter

List of usage examples for org.apache.lucene.search.highlight SimpleFragmenter SimpleFragmenter

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight SimpleFragmenter SimpleFragmenter.

Prototype

public SimpleFragmenter() 

Source Link

Usage

From source file:it.cnr.ilc.lc.claviusweb.ClaviusSearch.java

private static List<Annotation> fullTextSearch(String term)
        throws IOException, ParseException, InvalidTokenOffsetsException {

    log.info("fullTextSearch (" + term + ")");
    List<Annotation> result = new ArrayList<>();

    try {//  www . ja  va  2 s .  c o  m
        Directory indexDirectory = FSDirectory
                .open(Paths.get("/var/lucene/clavius-1.0.5/indexes/it.cnr.ilc.lc.claviusweb.entity.PlainText"));
        DirectoryReader ireader = DirectoryReader.open(indexDirectory);

        IndexSearcher searcher = new IndexSearcher(ireader);

        Analyzer fullTextAnalyzer = CustomAnalyzer.builder()
                .addCharFilter("patternReplace", "pattern", "([\\-\\(\\)\\[\\],\\.;:])", "replacement", " $1 ")
                .withTokenizer("whitespace").build();

        //QueryParser parserTerm = new QueryParser("content", fullTextAnalyzer);
        //            AnalyzingQueryParser parser = new AnalyzingQueryParser("content", fullTextAnalyzer);
        //            Query query2 = parser.parse(term);
        //            
        Query query = new WildcardQuery(new Term("content", term));
        TopDocs hits = searcher.search(query, MAX_SEARCH_HITS);

        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
        //Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
        ClaviusHighlighter highlighter = new ClaviusHighlighter(htmlFormatter, new QueryScorer(query));
        highlighter.setTextFragmenter(new SimpleFragmenter());

        log.info("hits.totalHits=(" + hits.totalHits + ")");
        for (int i = 0; i < hits.totalHits; i++) {
            int id = hits.scoreDocs[i].doc;
            Document doc = searcher.doc(id);
            String idDoc = doc.get("idDoc");

            //String text = doc.get("content");
            TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content",
                    fullTextAnalyzer);

            List<Annotation> frag = highlighter.getBestTextClaviusFragments(tokenStream, doc, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
            for (int j = 0; j < frag.size(); j++) {
                log.debug("idDoc: " + idDoc + ", Annotation[" + j + "] " + frag.get(j).toString());
            }
            result.addAll(frag);
        }
    } catch (InvalidTokenOffsetsException | IOException e) {
        log.error(e);
    }
    log.info("Full Text Search found " + result.size() + " result(s) for term " + term);
    return result;
}