List of usage examples for org.apache.lucene.search.highlight SimpleSpanFragmenter SimpleSpanFragmenter
public SimpleSpanFragmenter(QueryScorer queryScorer, int fragmentSize)
From source file:blackbelt.lucene.testHighlight.MainHighlight.java
License:Open Source License
public static void main(String[] args) throws ParseException, IOException { String keyWord = "hibernate"; String language = "en"; String text = "Hibernate is an object-relational mapping (ORM) library for the Java language," + "providing a framework for mapping an object-oriented domain model to a traditional relational" + "database. Hibernate solves object-relational impedance mismatch problems by replacing direct " + "persistence-related database accesses with high-level object handling functions. " + "Hibernate is free software that is distributed under the GNU Lesser General Public License. " + "Hibernate's primary feature is mapping from Java classes to database tables " + "(and from Java data types to SQL data types). Hibernate also provides data query" + " and retrieval facilities. Hibernate generates the SQL calls and attempts to relieve" + " the developer from manual result set handling and object conversion and keep the application" + " portable to all supported SQL databases with little performance overhead."; String result;/*from w ww . j a v a 2 s. c om*/ QueryParser parser = new QueryParser(Version.LUCENE_30, "title", new StandardAnalyzer(Version.LUCENE_30)); Query query = parser.parse(keyWord); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>"); TokenStream tokens = new StandardAnalyzer(Version.LUCENE_30).tokenStream("title", new StringReader(text)); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 85)); try { result = highlighter.getBestFragments(tokens, text, 4, "<BR/>..."); System.out.println(result); System.out.println("\n" + result.length()); } catch (InvalidTokenOffsetsException e) { throw new RuntimeException(e); } result = "<html><body>" + result + "</body></html>"; File file = new File("C:\\Users\\forma702\\Desktop\\testHighlight.html"); try { PrintWriter pw = new PrintWriter(file); pw.print(result); pw.close(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.gauronit.tagmata.core.Indexer.java
License:Open Source License
public ArrayList<CardSnapshot> search(String searchText, ArrayList<String> indexNames, boolean searchInTitle, boolean searchInTags, boolean searchInText, boolean superFuzzy) { ArrayList<CardSnapshot> cardSnaps = new ArrayList(); try {/*from www.j a v a 2 s .co m*/ ArrayList<IndexSearcher> searchers = new ArrayList<IndexSearcher>(); for (String indexName : indexNames) { IndexReader reader = IndexReader .open(FSDirectory.open(new File(indexDir + File.separator + indexName), new SimpleFSLockFactory(indexDir + File.separator + indexName))); IndexSearcher searcher = new IndexSearcher(reader); searchers.add(searcher); } BooleanQuery query = new BooleanQuery(); if (searchInTitle) { IndexerUtil.getTokenizedQuery(query, "title", searchText, superFuzzy); } if (searchInTags) { IndexerUtil.getTokenizedQuery(query, "tags", searchText, superFuzzy); } if (searchInText) { IndexerUtil.getTokenizedQuery(query, "text", searchText, superFuzzy); IndexerUtil.getTokenizedQuery(query, "analyzedText", searchText, superFuzzy); } for (IndexSearcher searcher : searchers) { TopScoreDocCollector collector = TopScoreDocCollector.create(10000, false); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); TokenStream stream = TokenSources.getTokenStream("text", doc.get("analyzedText"), new StandardAnalyzer(Version.LUCENE_20.LUCENE_35)); QueryScorer scorer = new QueryScorer(query, "analyzedText"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 20); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(fragmenter); String[] fragments = highlighter.getBestFragments(stream, doc.get("text"), 5); String highlights = ""; for (String fragment : fragments) { highlights += fragment + "..."; } if (highlights.equals("")) { String text = doc.get("text"); if (text.length() > 100) { highlights += doc.get("text").substring(0, 100); } else { highlights += doc.get("text"); } } cardSnaps.add(new CardSnapshot(highlights, doc)); } searcher.getIndexReader().close(); searcher.close(); searcher = null; } } catch (Exception ex) { ex.printStackTrace(); } return cardSnaps; }
From source file:com.gitblit.LuceneExecutor.java
License:Apache License
/** * //from w ww .j av a 2 s. c o m * @param analyzer * @param query * @param content * @param result * @return * @throws IOException * @throws InvalidTokenOffsetsException */ private String getHighlightedFragment(Analyzer analyzer, Query query, String content, SearchResult result) throws IOException, InvalidTokenOffsetsException { if (content == null) { content = ""; } int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150; QueryScorer scorer = new QueryScorer(query, "content"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength); // use an artificial delimiter for the token String termTag = "!!--["; String termTagEnd = "]--!!"; SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(fragmenter); String[] fragments = highlighter.getBestFragments(analyzer, "content", content, 3); if (ArrayUtils.isEmpty(fragments)) { if (SearchObjectType.blob == result.type) { return ""; } // clip commit message String fragment = content; if (fragment.length() > fragmentLength) { fragment = fragment.substring(0, fragmentLength) + "..."; } return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true) + "</pre>"; } // make sure we have unique fragments Set<String> uniqueFragments = new LinkedHashSet<String>(); for (String fragment : fragments) { uniqueFragments.add(fragment); } fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]); StringBuilder sb = new StringBuilder(); for (int i = 0, len = fragments.length; i < len; i++) { String fragment = fragments[i]; String tag = "<pre class=\"text\">"; // resurrect the raw fragment from removing the artificial delimiters String raw = fragment.replace(termTag, "").replace(termTagEnd, ""); // determine position of the raw fragment in the content int pos = content.indexOf(raw); // restore complete first line of fragment int c = pos; while (c > 0) { c--; if (content.charAt(c) == '\n') { break; } } if (c > 0) { // inject leading chunk of first fragment line fragment = content.substring(c + 1, pos) + fragment; } if (SearchObjectType.blob == result.type) { // count lines as offset into the content for this fragment int line = Math.max(1, StringUtils.countLines(content.substring(0, pos))); // create fragment tag with line number and language String lang = ""; String ext = StringUtils.getFileExtension(result.path).toLowerCase(); if (!StringUtils.isEmpty(ext)) { // maintain leading space! lang = " lang-" + ext; } tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang); } sb.append(tag); // replace the artificial delimiter with html tags String html = StringUtils.escapeForHtml(fragment, false); html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>"); sb.append(html); sb.append("</pre>"); if (i < len - 1) { sb.append("<span class=\"ellipses\">...</span><br/>"); } } return sb.toString(); }
From source file:com.gitblit.service.LuceneService.java
License:Apache License
/** * * @param analyzer/*from ww w .j av a2s .c o m*/ * @param query * @param content * @param result * @return * @throws IOException * @throws InvalidTokenOffsetsException */ private String getHighlightedFragment(Analyzer analyzer, Query query, String content, SearchResult result) throws IOException, InvalidTokenOffsetsException { if (content == null) { content = ""; } int tabLength = storedSettings.getInteger(Keys.web.tabLength, 4); int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150; QueryScorer scorer = new QueryScorer(query, "content"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength); // use an artificial delimiter for the token String termTag = "!!--["; String termTagEnd = "]--!!"; SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(fragmenter); String[] fragments = highlighter.getBestFragments(analyzer, "content", content, 3); if (ArrayUtils.isEmpty(fragments)) { if (SearchObjectType.blob == result.type) { return ""; } // clip commit message String fragment = content; if (fragment.length() > fragmentLength) { fragment = fragment.substring(0, fragmentLength) + "..."; } return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true, tabLength) + "</pre>"; } // make sure we have unique fragments Set<String> uniqueFragments = new LinkedHashSet<String>(); for (String fragment : fragments) { uniqueFragments.add(fragment); } fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]); StringBuilder sb = new StringBuilder(); for (int i = 0, len = fragments.length; i < len; i++) { String fragment = fragments[i]; String tag = "<pre class=\"text\">"; // resurrect the raw fragment from removing the artificial delimiters String raw = fragment.replace(termTag, "").replace(termTagEnd, ""); // determine position of the raw fragment in the content int pos = content.indexOf(raw); // restore complete first line of fragment int c = pos; while (c > 0) { c--; if (content.charAt(c) == '\n') { break; } } if (c > 0) { // inject leading chunk of first fragment line fragment = content.substring(c + 1, pos) + fragment; } if (SearchObjectType.blob == result.type) { // count lines as offset into the content for this fragment int line = Math.max(1, StringUtils.countLines(content.substring(0, pos))); // create fragment tag with line number and language String lang = ""; String ext = StringUtils.getFileExtension(result.path).toLowerCase(); if (!StringUtils.isEmpty(ext)) { // maintain leading space! lang = " lang-" + ext; } tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang); } sb.append(tag); // replace the artificial delimiter with html tags String html = StringUtils.escapeForHtml(fragment, false); html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>"); sb.append(html); sb.append("</pre>"); if (i < len - 1) { sb.append("<span class=\"ellipses\">...</span><br/>"); } } return sb.toString(); }
From source file:com.ikon.dao.SearchDAO.java
License:Open Source License
/** * Security is evaluated by Lucene, so query result are already pruned. This means that every node * should have its security (user and role) info stored in Lucene. This provides very quick search * but security modifications need to be recursively applied to reach every document node in the * repository. This may take several hours (or days) is big repositories. */// w ww . j a v a 2 s . co m @SuppressWarnings("unchecked") private NodeResultSet runQueryLucene(FullTextSession ftSession, Query query, int offset, int limit) throws IOException, InvalidTokenOffsetsException, HibernateException { log.debug("runQueryLucene({}, {}, {}, {})", new Object[] { ftSession, query, offset, limit }); List<NodeQueryResult> results = new ArrayList<NodeQueryResult>(); NodeResultSet result = new NodeResultSet(); FullTextQuery ftq = ftSession.createFullTextQuery(query, NodeDocument.class, NodeFolder.class, NodeMail.class); ftq.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS); ftq.enableFullTextFilter("readAccess"); QueryScorer scorer = new QueryScorer(query, NodeDocument.TEXT_FIELD); // Set limits ftq.setFirstResult(offset); ftq.setMaxResults(limit); // Highlight using a CSS style SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, MAX_FRAGMENT_LEN)); for (Iterator<Object[]> it = ftq.iterate(); it.hasNext();) { Object[] qRes = it.next(); Float score = (Float) qRes[0]; NodeBase nBase = (NodeBase) qRes[1]; // Add result addResult(ftSession, results, highlighter, score, nBase); } result.setTotal(ftq.getResultSize()); result.setResults(results); log.debug("runQueryLucene: {}", result); return result; }
From source file:com.ikon.dao.SearchDAO.java
License:Open Source License
/** * Security is not evaluate in Lucene but by AccessManager. This means that Lucene will return all the * matched documents and this list need further prune by checking the READ permission in the AccessManager. * If the returned document list is very big, maybe lots of documents will be pruned because the user has * no read access and this would be a time consuming task. * /*from w w w .ja v a2 s .co m*/ * This method will read and check document from the Lucene query result until reach a given offset. After * that will add all the given document which the user have read access until the limit is reached. After * that will check if there is another document more who the user can read. */ @SuppressWarnings("unchecked") private NodeResultSet runQueryAccessManagerMore(FullTextSession ftSession, Query query, int offset, int limit) throws IOException, InvalidTokenOffsetsException, DatabaseException, HibernateException { log.debug("runQueryAccessManagerMore({}, {}, {}, {})", new Object[] { ftSession, query, offset, limit }); List<NodeQueryResult> results = new ArrayList<NodeQueryResult>(); NodeResultSet result = new NodeResultSet(); FullTextQuery ftq = ftSession.createFullTextQuery(query, NodeDocument.class, NodeFolder.class, NodeMail.class); ftq.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS); ftq.enableFullTextFilter("readAccess"); QueryScorer scorer = new QueryScorer(query, NodeDocument.TEXT_FIELD); int count = 0; // Highlight using a CSS style SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, MAX_FRAGMENT_LEN)); // Set limits Iterator<Object[]> it = ftq.iterate(); DbAccessManager am = SecurityHelper.getAccessManager(); // Bypass offset while (it.hasNext() && count < offset) { Object[] qRes = it.next(); NodeBase nBase = (NodeBase) qRes[1]; if (am.isGranted(nBase, Permission.READ)) { count++; } } // Read limit results while (it.hasNext() && results.size() < limit) { Object[] qRes = it.next(); Float score = (Float) qRes[0]; NodeBase nBase = (NodeBase) qRes[1]; if (am.isGranted(nBase, Permission.READ)) { // Add result addResult(ftSession, results, highlighter, score, nBase); } } // Check if pending results count = results.size() + offset; while (it.hasNext() && count < offset + limit + 1) { Object[] qRes = it.next(); NodeBase nBase = (NodeBase) qRes[1]; if (am.isGranted(nBase, Permission.READ)) { count++; } } result.setTotal(count); result.setResults(results); log.debug("runQueryAccessManagerMore: {}", result); return result; }
From source file:com.ikon.dao.SearchDAO.java
License:Open Source License
/** * Security is not evaluate in Lucene but by AccessManager. This means that Lucene will return all the * matched documents and this list need further prune by checking the READ permission in the AccessManager. * If the returned document list is very big, maybe lots of documents will be pruned because the user has * no read access and this would be a time consuming task. * //from ww w.j a v a2s . c o m * This method will read and check document from the Lucene query result until reach a given offset. After * that will add all the given document which the user have read access until the limit is reached. After * that will check if there are more documents (2 * limit) the user can read. */ @SuppressWarnings("unchecked") private NodeResultSet runQueryAccessManagerWindow(FullTextSession ftSession, Query query, int offset, int limit) throws IOException, InvalidTokenOffsetsException, DatabaseException, HibernateException { log.debug("runQueryAccessManagerWindow({}, {}, {}, {})", new Object[] { ftSession, query, offset, limit }); List<NodeQueryResult> results = new ArrayList<NodeQueryResult>(); NodeResultSet result = new NodeResultSet(); FullTextQuery ftq = ftSession.createFullTextQuery(query, NodeDocument.class, NodeFolder.class, NodeMail.class); ftq.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS); ftq.enableFullTextFilter("readAccess"); QueryScorer scorer = new QueryScorer(query, NodeDocument.TEXT_FIELD); int count = 0; // Highlight using a CSS style SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, MAX_FRAGMENT_LEN)); // Set limits Iterator<Object[]> it = ftq.iterate(); DbAccessManager am = SecurityHelper.getAccessManager(); // Bypass offset while (it.hasNext() && count < offset) { Object[] qRes = it.next(); NodeBase nBase = (NodeBase) qRes[1]; if (am.isGranted(nBase, Permission.READ)) { count++; } } // Read limit results while (it.hasNext() && results.size() < limit) { Object[] qRes = it.next(); Float score = (Float) qRes[0]; NodeBase nBase = (NodeBase) qRes[1]; if (am.isGranted(nBase, Permission.READ)) { // Add result addResult(ftSession, results, highlighter, score, nBase); } } // Check if pending results count = results.size() + offset; while (it.hasNext() && count < offset + limit * 2) { Object[] qRes = it.next(); NodeBase nBase = (NodeBase) qRes[1]; if (am.isGranted(nBase, Permission.READ)) { count++; } } result.setTotal(count); result.setResults(results); log.debug("runQueryAccessManagerWindow: {}", result); return result; }
From source file:com.ikon.dao.SearchDAO.java
License:Open Source License
/** * Security is not evaluate in Lucene but by AccessManager. This means that Lucene will return all the * matched documents and this list need further prune by checking the READ permission in the AccessManager. * If the returned document list is very big, maybe lots of documents will be pruned because the user has * no read access and this would be a time consuming task. * /* w w w . j av a2 s . c o m*/ * This method will read and check document from the Lucene query result until reach a given offset. After * that will add all the given document which the user have read access until the limit is reached. After * that will check if there are more documents (MAX_SEARCH_RESULTS) the user can read. */ @SuppressWarnings("unchecked") private NodeResultSet runQueryAccessManagerLimited(FullTextSession ftSession, Query query, int offset, int limit) throws IOException, InvalidTokenOffsetsException, DatabaseException, HibernateException { log.debug("runQueryAccessManagerLimited({}, {}, {}, {})", new Object[] { ftSession, query, offset, limit }); List<NodeQueryResult> results = new ArrayList<NodeQueryResult>(); NodeResultSet result = new NodeResultSet(); FullTextQuery ftq = ftSession.createFullTextQuery(query, NodeDocument.class, NodeFolder.class, NodeMail.class); ftq.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS); ftq.enableFullTextFilter("readAccess"); QueryScorer scorer = new QueryScorer(query, NodeDocument.TEXT_FIELD); int count = 0; // Highlight using a CSS style SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, MAX_FRAGMENT_LEN)); // Set limits Iterator<Object[]> it = ftq.iterate(); DbAccessManager am = SecurityHelper.getAccessManager(); // Bypass offset while (it.hasNext() && count < offset) { Object[] qRes = it.next(); NodeBase nBase = (NodeBase) qRes[1]; if (am.isGranted(nBase, Permission.READ)) { count++; } } // Read limit results while (it.hasNext() && results.size() < limit) { Object[] qRes = it.next(); Float score = (Float) qRes[0]; NodeBase nBase = (NodeBase) qRes[1]; if (am.isGranted(nBase, Permission.READ)) { // Add result addResult(ftSession, results, highlighter, score, nBase); } } // Check if pending results count = results.size() + offset; while (it.hasNext() && count < Config.MAX_SEARCH_RESULTS) { Object[] qRes = it.next(); NodeBase nBase = (NodeBase) qRes[1]; if (am.isGranted(nBase, Permission.READ)) { count++; } } result.setTotal(count); result.setResults(results); log.debug("Size: {}", results.size()); log.debug("runQueryAccessManagerLimited: {}", result); return result; }
From source file:com.main.Searcher.java
public List<Bean> searching(String s1, String s2, String radioBtn) throws IOException, ParseException, InvalidTokenOffsetsException { //getting reference of directory Directory dir = FSDirectory.open(Paths.get(Index_Dir)); //Index reader - an interface for accessing a point-in-time view of a lucene index IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); //analyzer with the default stop words, takes out the stop words Analyzer analyzer = new StandardAnalyzer(); String contents = "contents"; QueryParser parser = new QueryParser(contents, analyzer); int numOfDoc = reader.numDocs(); for (int i = 0; i < numOfDoc; i++) { Document d = reader.document(i); }/*from w w w . java2s . c o m*/ Query q1 = parser.parse(s1); Query q2 = parser.parse(s2); //conjuction, disjunction and negation BooleanQuery.Builder bq = new BooleanQuery.Builder(); //occur.must : both queries required in a doc if (radioBtn.equals("conjunction")) { bq.add(q1, BooleanClause.Occur.MUST); bq.add(q2, BooleanClause.Occur.MUST); bq.build(); } //occur.should: one of the q1 should be presen t in doc else if (radioBtn.equals("disjunction")) { bq.add(q1, BooleanClause.Occur.SHOULD); bq.add(q2, BooleanClause.Occur.SHOULD); bq.build(); } //negation: first should present , second should not else { bq.add(q1, BooleanClause.Occur.MUST); bq.add(q2, BooleanClause.Occur.MUST_NOT); bq.build(); } TopDocs hits = searcher.search(bq.build(), 10); Formatter formatter = new SimpleHTMLFormatter(); QueryScorer scorer = new QueryScorer(bq.build()); //used to markup highlighted terms found in the best sections of a cont Highlighter highlighter = new Highlighter(formatter, scorer); //It breaks cont up into same-size texts but does not split up spans Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10); //breaks cont up into same-size fragments with no concerns over spotting sentence boundaries. //set fragmenter to highlighter highlighter.setTextFragmenter(fragmenter); for (int i = 0; i < hits.scoreDocs.length; i++) { Bean bean = new Bean(); int outResult = hits.scoreDocs.length; bean.setNumFile(outResult); int docid = hits.scoreDocs[i].doc; double rank = hits.scoreDocs[i].score; bean.setRankSc(rank); Document doc = searcher.doc(docid); String name = doc.get("name"); String title = doc.get("title"); bean.setTitle(name); String path = doc.get("path"); bean.setPath(path); String cont = doc.get("contents"); //Create token stream TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer); //Get highlighted cont fragments String[] frags = highlighter.getBestFragments(stream, cont, 10); ArrayList<String> dummy = new ArrayList<>(); for (String frag : frags) { dummy.add(frag); } bean.setContent(dummy); beanList.add(bean); } dir.close(); // } return beanList; }
From source file:com.main.Searcher.java
public List<Bean> searching(String s1) throws IOException, ParseException, InvalidTokenOffsetsException { //Get directory reference Directory dir = FSDirectory.open(Paths.get(Index_Dir)); //Index reader - an interface for accessing a point-in-time view of a lucene index IndexReader reader = DirectoryReader.open(dir); //CreateIndexReader reader = DirectoryReader.open(dir); lucene searcher. It search over a single IndexReader. IndexSearcher searcher = new IndexSearcher(reader); //analyzer with the default stop words Analyzer analyzer = new StandardAnalyzer(); //Query parser to be used for creating TermQuery String queries = null;//from w ww . j a v a 2s . c o m String queryString = null; //regular search String contents = "contents"; BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(contents, analyzer); int numOfDoc = reader.numDocs(); for (int i = 0; i < numOfDoc; i++) { Document d = reader.document(i); } Query q1 = parser.parse(s1); BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(q1, BooleanClause.Occur.MUST); //Search the lucene documents TopDocs hits = searcher.search(bq.build(), 10); // TopScoreDocCollector collector = TopScoreDocCollector.create(5); /** * Highlighter Code Start *** */ //Uses HTML <B></B> tag to highlight the searched terms Formatter formatter = new SimpleHTMLFormatter(); //It scores cont fragments by the number of unique q1 terms found //Basically the matching score in layman terms QueryScorer scorer = new QueryScorer(bq.build()); //used to markup highlighted terms found in the best sections of a cont Highlighter highlighter = new Highlighter(formatter, scorer); //It breaks cont up into same-size texts but does not split up spans Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10); //breaks cont up into same-size fragments with no concerns over spotting sentence boundaries. //set fragmenter to highlighter highlighter.setTextFragmenter(fragmenter); //Iterate over found results for (int i = 0; i < hits.scoreDocs.length; i++) { Bean bean = new Bean(); //int rank = hits.scoreDocs.length; int outResult = hits.scoreDocs.length; bean.setNumFile(outResult); int docid = hits.scoreDocs[i].doc; double rank = hits.scoreDocs[i].score; bean.setRankSc(rank); Document doc = searcher.doc(docid); // String title = doc.get("title"); String name = doc.get("name"); String title = doc.get("title"); bean.setTitle(name); String path = doc.get("path"); bean.setPath(path); String cont = doc.get("contents"); //Create token stream TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer); //Get highlighted cont fragments String[] frags = highlighter.getBestFragments(stream, cont, 10); ArrayList<String> dummy = new ArrayList<>(); for (String frag : frags) { dummy.add(frag); } bean.setContent(dummy); beanList.add(bean); } dir.close(); // } return beanList; }