List of usage examples for org.apache.lucene.search.highlight SimpleFragmenter SimpleFragmenter
public SimpleFragmenter()
From source file:it.cnr.ilc.lc.claviusweb.ClaviusSearch.java
private static List<Annotation> fullTextSearch(String term) throws IOException, ParseException, InvalidTokenOffsetsException { log.info("fullTextSearch (" + term + ")"); List<Annotation> result = new ArrayList<>(); try {// www . ja va 2 s . c o m Directory indexDirectory = FSDirectory .open(Paths.get("/var/lucene/clavius-1.0.5/indexes/it.cnr.ilc.lc.claviusweb.entity.PlainText")); DirectoryReader ireader = DirectoryReader.open(indexDirectory); IndexSearcher searcher = new IndexSearcher(ireader); Analyzer fullTextAnalyzer = CustomAnalyzer.builder() .addCharFilter("patternReplace", "pattern", "([\\-\\(\\)\\[\\],\\.;:])", "replacement", " $1 ") .withTokenizer("whitespace").build(); //QueryParser parserTerm = new QueryParser("content", fullTextAnalyzer); // AnalyzingQueryParser parser = new AnalyzingQueryParser("content", fullTextAnalyzer); // Query query2 = parser.parse(term); // Query query = new WildcardQuery(new Term("content", term)); TopDocs hits = searcher.search(query, MAX_SEARCH_HITS); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); //Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); ClaviusHighlighter highlighter = new ClaviusHighlighter(htmlFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter()); log.info("hits.totalHits=(" + hits.totalHits + ")"); for (int i = 0; i < hits.totalHits; i++) { int id = hits.scoreDocs[i].doc; Document doc = searcher.doc(id); String idDoc = doc.get("idDoc"); //String text = doc.get("content"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content", fullTextAnalyzer); List<Annotation> frag = highlighter.getBestTextClaviusFragments(tokenStream, doc, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); for (int j = 0; j < frag.size(); j++) { log.debug("idDoc: " + idDoc + ", Annotation[" + j + "] " + frag.get(j).toString()); } result.addAll(frag); } } catch (InvalidTokenOffsetsException | IOException e) { log.error(e); } log.info("Full Text Search found " + result.size() + " result(s) for term " + term); return result; }