Example usage for org.apache.lucene.search.highlight SimpleSpanFragmenter SimpleSpanFragmenter

List of usage examples for org.apache.lucene.search.highlight SimpleSpanFragmenter SimpleSpanFragmenter

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight SimpleSpanFragmenter SimpleSpanFragmenter.

Prototype

public SimpleSpanFragmenter(QueryScorer queryScorer, int fragmentSize) 

Source Link

Usage

From source file:blackbelt.lucene.testHighlight.MainHighlight.java

License:Open Source License

public static void main(String[] args) throws ParseException, IOException {

    String keyWord = "hibernate";
    String language = "en";
    String text = "Hibernate is an object-relational mapping (ORM) library for the Java language,"
            + "providing a framework for mapping an object-oriented domain model to a traditional relational"
            + "database. Hibernate solves object-relational impedance mismatch problems by replacing direct "
            + "persistence-related database accesses with high-level object handling functions. "
            + "Hibernate is free software that is distributed under the GNU Lesser General Public License. "
            + "Hibernate's primary feature is mapping from Java classes to database tables "
            + "(and from Java data types to SQL data types). Hibernate also provides data query"
            + " and retrieval facilities. Hibernate generates the SQL calls and attempts to relieve"
            + " the developer from manual result set handling and object conversion and keep the application"
            + " portable to all supported SQL databases with little performance overhead.";
    String result;/*from   w ww  .  j  a v a 2 s. c om*/

    QueryParser parser = new QueryParser(Version.LUCENE_30, "title", new StandardAnalyzer(Version.LUCENE_30));
    Query query = parser.parse(keyWord);

    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
    TokenStream tokens = new StandardAnalyzer(Version.LUCENE_30).tokenStream("title", new StringReader(text));

    QueryScorer scorer = new QueryScorer(query, "title");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 85));

    try {
        result = highlighter.getBestFragments(tokens, text, 4, "<BR/>...");
        System.out.println(result);
        System.out.println("\n" + result.length());
    } catch (InvalidTokenOffsetsException e) {
        throw new RuntimeException(e);

    }
    result = "<html><body>" + result + "</body></html>";
    File file = new File("C:\\Users\\forma702\\Desktop\\testHighlight.html");
    try {
        PrintWriter pw = new PrintWriter(file);
        pw.print(result);
        pw.close();
    } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:com.gauronit.tagmata.core.Indexer.java

License:Open Source License

public ArrayList<CardSnapshot> search(String searchText, ArrayList<String> indexNames, boolean searchInTitle,
        boolean searchInTags, boolean searchInText, boolean superFuzzy) {
    ArrayList<CardSnapshot> cardSnaps = new ArrayList();
    try {/*from www.j  a  v  a  2 s .co  m*/
        ArrayList<IndexSearcher> searchers = new ArrayList<IndexSearcher>();

        for (String indexName : indexNames) {
            IndexReader reader = IndexReader
                    .open(FSDirectory.open(new File(indexDir + File.separator + indexName),
                            new SimpleFSLockFactory(indexDir + File.separator + indexName)));
            IndexSearcher searcher = new IndexSearcher(reader);
            searchers.add(searcher);
        }

        BooleanQuery query = new BooleanQuery();
        if (searchInTitle) {
            IndexerUtil.getTokenizedQuery(query, "title", searchText, superFuzzy);
        }
        if (searchInTags) {
            IndexerUtil.getTokenizedQuery(query, "tags", searchText, superFuzzy);
        }
        if (searchInText) {
            IndexerUtil.getTokenizedQuery(query, "text", searchText, superFuzzy);
            IndexerUtil.getTokenizedQuery(query, "analyzedText", searchText, superFuzzy);
        }

        for (IndexSearcher searcher : searchers) {
            TopScoreDocCollector collector = TopScoreDocCollector.create(10000, false);
            searcher.search(query, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;

            for (ScoreDoc hit : hits) {
                Document doc = searcher.doc(hit.doc);

                TokenStream stream = TokenSources.getTokenStream("text", doc.get("analyzedText"),
                        new StandardAnalyzer(Version.LUCENE_20.LUCENE_35));
                QueryScorer scorer = new QueryScorer(query, "analyzedText");
                Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 20);
                Highlighter highlighter = new Highlighter(scorer);
                highlighter.setTextFragmenter(fragmenter);
                String[] fragments = highlighter.getBestFragments(stream, doc.get("text"), 5);
                String highlights = "";

                for (String fragment : fragments) {
                    highlights += fragment + "...";
                }

                if (highlights.equals("")) {
                    String text = doc.get("text");
                    if (text.length() > 100) {
                        highlights += doc.get("text").substring(0, 100);
                    } else {
                        highlights += doc.get("text");
                    }
                }

                cardSnaps.add(new CardSnapshot(highlights, doc));
            }
            searcher.getIndexReader().close();
            searcher.close();
            searcher = null;
        }

    } catch (Exception ex) {
        ex.printStackTrace();
    }
    return cardSnaps;
}

From source file:com.gitblit.LuceneExecutor.java

License:Apache License

/**
 * //from w  ww .j av  a  2 s.  c o m
 * @param analyzer
 * @param query
 * @param content
 * @param result
 * @return
 * @throws IOException
 * @throws InvalidTokenOffsetsException
 */
private String getHighlightedFragment(Analyzer analyzer, Query query, String content, SearchResult result)
        throws IOException, InvalidTokenOffsetsException {
    if (content == null) {
        content = "";
    }

    int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;

    QueryScorer scorer = new QueryScorer(query, "content");
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);

    // use an artificial delimiter for the token
    String termTag = "!!--[";
    String termTagEnd = "]--!!";
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(fragmenter);

    String[] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
    if (ArrayUtils.isEmpty(fragments)) {
        if (SearchObjectType.blob == result.type) {
            return "";
        }
        // clip commit message
        String fragment = content;
        if (fragment.length() > fragmentLength) {
            fragment = fragment.substring(0, fragmentLength) + "...";
        }
        return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true) + "</pre>";
    }

    // make sure we have unique fragments
    Set<String> uniqueFragments = new LinkedHashSet<String>();
    for (String fragment : fragments) {
        uniqueFragments.add(fragment);
    }
    fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);

    StringBuilder sb = new StringBuilder();
    for (int i = 0, len = fragments.length; i < len; i++) {
        String fragment = fragments[i];
        String tag = "<pre class=\"text\">";

        // resurrect the raw fragment from removing the artificial delimiters
        String raw = fragment.replace(termTag, "").replace(termTagEnd, "");

        // determine position of the raw fragment in the content
        int pos = content.indexOf(raw);

        // restore complete first line of fragment
        int c = pos;
        while (c > 0) {
            c--;
            if (content.charAt(c) == '\n') {
                break;
            }
        }
        if (c > 0) {
            // inject leading chunk of first fragment line
            fragment = content.substring(c + 1, pos) + fragment;
        }

        if (SearchObjectType.blob == result.type) {
            // count lines as offset into the content for this fragment
            int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));

            // create fragment tag with line number and language
            String lang = "";
            String ext = StringUtils.getFileExtension(result.path).toLowerCase();
            if (!StringUtils.isEmpty(ext)) {
                // maintain leading space!
                lang = " lang-" + ext;
            }
            tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);

        }

        sb.append(tag);

        // replace the artificial delimiter with html tags
        String html = StringUtils.escapeForHtml(fragment, false);
        html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
        sb.append(html);
        sb.append("</pre>");
        if (i < len - 1) {
            sb.append("<span class=\"ellipses\">...</span><br/>");
        }
    }
    return sb.toString();
}

From source file:com.gitblit.service.LuceneService.java

License:Apache License

/**
 *
 * @param analyzer/*from  ww w .j  av  a2s  .c o m*/
 * @param query
 * @param content
 * @param result
 * @return
 * @throws IOException
 * @throws InvalidTokenOffsetsException
 */
private String getHighlightedFragment(Analyzer analyzer, Query query, String content, SearchResult result)
        throws IOException, InvalidTokenOffsetsException {
    if (content == null) {
        content = "";
    }

    int tabLength = storedSettings.getInteger(Keys.web.tabLength, 4);
    int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;

    QueryScorer scorer = new QueryScorer(query, "content");
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);

    // use an artificial delimiter for the token
    String termTag = "!!--[";
    String termTagEnd = "]--!!";
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(fragmenter);

    String[] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
    if (ArrayUtils.isEmpty(fragments)) {
        if (SearchObjectType.blob == result.type) {
            return "";
        }
        // clip commit message
        String fragment = content;
        if (fragment.length() > fragmentLength) {
            fragment = fragment.substring(0, fragmentLength) + "...";
        }
        return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true, tabLength) + "</pre>";
    }

    // make sure we have unique fragments
    Set<String> uniqueFragments = new LinkedHashSet<String>();
    for (String fragment : fragments) {
        uniqueFragments.add(fragment);
    }
    fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);

    StringBuilder sb = new StringBuilder();
    for (int i = 0, len = fragments.length; i < len; i++) {
        String fragment = fragments[i];
        String tag = "<pre class=\"text\">";

        // resurrect the raw fragment from removing the artificial delimiters
        String raw = fragment.replace(termTag, "").replace(termTagEnd, "");

        // determine position of the raw fragment in the content
        int pos = content.indexOf(raw);

        // restore complete first line of fragment
        int c = pos;
        while (c > 0) {
            c--;
            if (content.charAt(c) == '\n') {
                break;
            }
        }
        if (c > 0) {
            // inject leading chunk of first fragment line
            fragment = content.substring(c + 1, pos) + fragment;
        }

        if (SearchObjectType.blob == result.type) {
            // count lines as offset into the content for this fragment
            int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));

            // create fragment tag with line number and language
            String lang = "";
            String ext = StringUtils.getFileExtension(result.path).toLowerCase();
            if (!StringUtils.isEmpty(ext)) {
                // maintain leading space!
                lang = " lang-" + ext;
            }
            tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);

        }

        sb.append(tag);

        // replace the artificial delimiter with html tags
        String html = StringUtils.escapeForHtml(fragment, false);
        html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
        sb.append(html);
        sb.append("</pre>");
        if (i < len - 1) {
            sb.append("<span class=\"ellipses\">...</span><br/>");
        }
    }
    return sb.toString();
}

From source file:com.ikon.dao.SearchDAO.java

License:Open Source License

/**
 * Security is evaluated by Lucene, so query result are already pruned. This means that every node
 * should have its security (user and role) info stored in Lucene. This provides very quick search
 * but security modifications need to be recursively applied to reach every document node in the
 * repository. This may take several hours (or days) is big repositories.
 *///  w ww  . j  a v a 2  s  .  co m
@SuppressWarnings("unchecked")
private NodeResultSet runQueryLucene(FullTextSession ftSession, Query query, int offset, int limit)
        throws IOException, InvalidTokenOffsetsException, HibernateException {
    log.debug("runQueryLucene({}, {}, {}, {})", new Object[] { ftSession, query, offset, limit });
    List<NodeQueryResult> results = new ArrayList<NodeQueryResult>();
    NodeResultSet result = new NodeResultSet();
    FullTextQuery ftq = ftSession.createFullTextQuery(query, NodeDocument.class, NodeFolder.class,
            NodeMail.class);
    ftq.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS);
    ftq.enableFullTextFilter("readAccess");
    QueryScorer scorer = new QueryScorer(query, NodeDocument.TEXT_FIELD);

    // Set limits
    ftq.setFirstResult(offset);
    ftq.setMaxResults(limit);

    // Highlight using a CSS style
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, MAX_FRAGMENT_LEN));

    for (Iterator<Object[]> it = ftq.iterate(); it.hasNext();) {
        Object[] qRes = it.next();
        Float score = (Float) qRes[0];
        NodeBase nBase = (NodeBase) qRes[1];

        // Add result
        addResult(ftSession, results, highlighter, score, nBase);
    }

    result.setTotal(ftq.getResultSize());
    result.setResults(results);
    log.debug("runQueryLucene: {}", result);
    return result;
}

From source file:com.ikon.dao.SearchDAO.java

License:Open Source License

/**
 * Security is not evaluate in Lucene but by AccessManager. This means that Lucene will return all the
 * matched documents and this list need further prune by checking the READ permission in the AccessManager.
 * If the returned document list is very big, maybe lots of documents will be pruned because the user has
 * no read access and this would be a time consuming task.
 * /*from  w w w  .ja v  a2  s .co m*/
 * This method will read and check document from the Lucene query result until reach a given offset. After
 * that will add all the given document which the user have read access until the limit is reached. After
 * that will check if there is another document more who the user can read.
 */
@SuppressWarnings("unchecked")
private NodeResultSet runQueryAccessManagerMore(FullTextSession ftSession, Query query, int offset, int limit)
        throws IOException, InvalidTokenOffsetsException, DatabaseException, HibernateException {
    log.debug("runQueryAccessManagerMore({}, {}, {}, {})", new Object[] { ftSession, query, offset, limit });
    List<NodeQueryResult> results = new ArrayList<NodeQueryResult>();
    NodeResultSet result = new NodeResultSet();
    FullTextQuery ftq = ftSession.createFullTextQuery(query, NodeDocument.class, NodeFolder.class,
            NodeMail.class);
    ftq.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS);
    ftq.enableFullTextFilter("readAccess");
    QueryScorer scorer = new QueryScorer(query, NodeDocument.TEXT_FIELD);
    int count = 0;

    // Highlight using a CSS style
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, MAX_FRAGMENT_LEN));

    // Set limits
    Iterator<Object[]> it = ftq.iterate();
    DbAccessManager am = SecurityHelper.getAccessManager();

    // Bypass offset
    while (it.hasNext() && count < offset) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            count++;
        }
    }

    // Read limit results
    while (it.hasNext() && results.size() < limit) {
        Object[] qRes = it.next();
        Float score = (Float) qRes[0];
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            // Add result
            addResult(ftSession, results, highlighter, score, nBase);
        }
    }

    // Check if pending results
    count = results.size() + offset;

    while (it.hasNext() && count < offset + limit + 1) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            count++;
        }
    }

    result.setTotal(count);
    result.setResults(results);
    log.debug("runQueryAccessManagerMore: {}", result);
    return result;
}

From source file:com.ikon.dao.SearchDAO.java

License:Open Source License

/**
 * Security is not evaluate in Lucene but by AccessManager. This means that Lucene will return all the
 * matched documents and this list need further prune by checking the READ permission in the AccessManager.
 * If the returned document list is very big, maybe lots of documents will be pruned because the user has
 * no read access and this would be a time consuming task.
 * //from  ww w.j a  v  a2s .  c o  m
 * This method will read and check document from the Lucene query result until reach a given offset. After
 * that will add all the given document which the user have read access until the limit is reached. After
 * that will check if there are more documents (2 * limit) the user can read.
 */
@SuppressWarnings("unchecked")
private NodeResultSet runQueryAccessManagerWindow(FullTextSession ftSession, Query query, int offset, int limit)
        throws IOException, InvalidTokenOffsetsException, DatabaseException, HibernateException {
    log.debug("runQueryAccessManagerWindow({}, {}, {}, {})", new Object[] { ftSession, query, offset, limit });
    List<NodeQueryResult> results = new ArrayList<NodeQueryResult>();
    NodeResultSet result = new NodeResultSet();
    FullTextQuery ftq = ftSession.createFullTextQuery(query, NodeDocument.class, NodeFolder.class,
            NodeMail.class);
    ftq.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS);
    ftq.enableFullTextFilter("readAccess");
    QueryScorer scorer = new QueryScorer(query, NodeDocument.TEXT_FIELD);
    int count = 0;

    // Highlight using a CSS style
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, MAX_FRAGMENT_LEN));

    // Set limits
    Iterator<Object[]> it = ftq.iterate();
    DbAccessManager am = SecurityHelper.getAccessManager();

    // Bypass offset
    while (it.hasNext() && count < offset) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            count++;
        }
    }

    // Read limit results
    while (it.hasNext() && results.size() < limit) {
        Object[] qRes = it.next();
        Float score = (Float) qRes[0];
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            // Add result
            addResult(ftSession, results, highlighter, score, nBase);
        }
    }

    // Check if pending results
    count = results.size() + offset;

    while (it.hasNext() && count < offset + limit * 2) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            count++;
        }
    }

    result.setTotal(count);
    result.setResults(results);
    log.debug("runQueryAccessManagerWindow: {}", result);
    return result;
}

From source file:com.ikon.dao.SearchDAO.java

License:Open Source License

/**
 * Security is not evaluate in Lucene but by AccessManager. This means that Lucene will return all the
 * matched documents and this list need further prune by checking the READ permission in the AccessManager.
 * If the returned document list is very big, maybe lots of documents will be pruned because the user has
 * no read access and this would be a time consuming task.
 * /* w w w .  j av  a2  s .  c o  m*/
 * This method will read and check document from the Lucene query result until reach a given offset. After
 * that will add all the given document which the user have read access until the limit is reached. After
 * that will check if there are more documents (MAX_SEARCH_RESULTS) the user can read.
 */
@SuppressWarnings("unchecked")
private NodeResultSet runQueryAccessManagerLimited(FullTextSession ftSession, Query query, int offset,
        int limit) throws IOException, InvalidTokenOffsetsException, DatabaseException, HibernateException {
    log.debug("runQueryAccessManagerLimited({}, {}, {}, {})", new Object[] { ftSession, query, offset, limit });
    List<NodeQueryResult> results = new ArrayList<NodeQueryResult>();
    NodeResultSet result = new NodeResultSet();
    FullTextQuery ftq = ftSession.createFullTextQuery(query, NodeDocument.class, NodeFolder.class,
            NodeMail.class);
    ftq.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS);
    ftq.enableFullTextFilter("readAccess");
    QueryScorer scorer = new QueryScorer(query, NodeDocument.TEXT_FIELD);
    int count = 0;

    // Highlight using a CSS style
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, MAX_FRAGMENT_LEN));

    // Set limits
    Iterator<Object[]> it = ftq.iterate();
    DbAccessManager am = SecurityHelper.getAccessManager();

    // Bypass offset
    while (it.hasNext() && count < offset) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            count++;
        }
    }

    // Read limit results
    while (it.hasNext() && results.size() < limit) {
        Object[] qRes = it.next();
        Float score = (Float) qRes[0];
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            // Add result
            addResult(ftSession, results, highlighter, score, nBase);
        }
    }

    // Check if pending results
    count = results.size() + offset;

    while (it.hasNext() && count < Config.MAX_SEARCH_RESULTS) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            count++;
        }
    }

    result.setTotal(count);
    result.setResults(results);
    log.debug("Size: {}", results.size());
    log.debug("runQueryAccessManagerLimited: {}", result);
    return result;
}

From source file:com.main.Searcher.java

public List<Bean> searching(String s1, String s2, String radioBtn)
        throws IOException, ParseException, InvalidTokenOffsetsException {
    //getting reference of directory
    Directory dir = FSDirectory.open(Paths.get(Index_Dir));

    //Index reader - an interface for accessing a point-in-time view of a lucene index
    IndexReader reader = DirectoryReader.open(dir);

    IndexSearcher searcher = new IndexSearcher(reader);
    //analyzer with the default stop words, takes out the stop words
    Analyzer analyzer = new StandardAnalyzer();

    String contents = "contents";

    QueryParser parser = new QueryParser(contents, analyzer);

    int numOfDoc = reader.numDocs();

    for (int i = 0; i < numOfDoc; i++) {

        Document d = reader.document(i);

    }/*from w  w  w . java2s  .  c o m*/

    Query q1 = parser.parse(s1);
    Query q2 = parser.parse(s2);

    //conjuction, disjunction and negation
    BooleanQuery.Builder bq = new BooleanQuery.Builder();

    //occur.must : both queries required in a doc
    if (radioBtn.equals("conjunction")) {
        bq.add(q1, BooleanClause.Occur.MUST);
        bq.add(q2, BooleanClause.Occur.MUST);
        bq.build();
    } //occur.should: one of the q1 should be presen t in doc
    else if (radioBtn.equals("disjunction")) {
        bq.add(q1, BooleanClause.Occur.SHOULD);
        bq.add(q2, BooleanClause.Occur.SHOULD);
        bq.build();
    } //negation: first should present , second should not
    else {
        bq.add(q1, BooleanClause.Occur.MUST);
        bq.add(q2, BooleanClause.Occur.MUST_NOT);
        bq.build();
    }

    TopDocs hits = searcher.search(bq.build(), 10);

    Formatter formatter = new SimpleHTMLFormatter();

    QueryScorer scorer = new QueryScorer(bq.build());

    //used to markup highlighted terms found in the best sections of a cont
    Highlighter highlighter = new Highlighter(formatter, scorer);
    //It breaks cont up into same-size texts but does not split up spans
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10);
    //breaks cont up into same-size fragments with no concerns over spotting sentence boundaries.

    //set fragmenter to highlighter
    highlighter.setTextFragmenter(fragmenter);

    for (int i = 0; i < hits.scoreDocs.length; i++) {
        Bean bean = new Bean();

        int outResult = hits.scoreDocs.length;
        bean.setNumFile(outResult);
        int docid = hits.scoreDocs[i].doc;
        double rank = hits.scoreDocs[i].score;
        bean.setRankSc(rank);
        Document doc = searcher.doc(docid);

        String name = doc.get("name");
        String title = doc.get("title");
        bean.setTitle(name);

        String path = doc.get("path");
        bean.setPath(path);

        String cont = doc.get("contents");
        //Create token stream
        TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer);
        //Get highlighted cont fragments
        String[] frags = highlighter.getBestFragments(stream, cont, 10);

        ArrayList<String> dummy = new ArrayList<>();
        for (String frag : frags) {

            dummy.add(frag);
        }

        bean.setContent(dummy);
        beanList.add(bean);
    }

    dir.close();
    // }
    return beanList;
}

From source file:com.main.Searcher.java

public List<Bean> searching(String s1) throws IOException, ParseException, InvalidTokenOffsetsException {
    //Get directory reference
    Directory dir = FSDirectory.open(Paths.get(Index_Dir));
    //Index reader - an interface for accessing a point-in-time view of a lucene index
    IndexReader reader = DirectoryReader.open(dir);
    //CreateIndexReader reader = DirectoryReader.open(dir); lucene searcher. It search over a single IndexReader.
    IndexSearcher searcher = new IndexSearcher(reader);
    //analyzer with the default stop words
    Analyzer analyzer = new StandardAnalyzer();
    //Query parser to be used for creating TermQuery

    String queries = null;//from  w  ww  . j a v a  2s .  c o m
    String queryString = null; //regular search
    String contents = "contents";
    BufferedReader in = null;
    if (queries != null) {
        in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8);
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
    }
    QueryParser parser = new QueryParser(contents, analyzer);

    int numOfDoc = reader.numDocs();

    for (int i = 0; i < numOfDoc; i++) {

        Document d = reader.document(i);

    }

    Query q1 = parser.parse(s1);

    BooleanQuery.Builder bq = new BooleanQuery.Builder();

    bq.add(q1, BooleanClause.Occur.MUST);
    //Search the lucene documents
    TopDocs hits = searcher.search(bq.build(), 10);
    // TopScoreDocCollector collector = TopScoreDocCollector.create(5);
    /**
     * Highlighter Code Start ***
     */
    //Uses HTML &lt;B&gt;&lt;/B&gt; tag to highlight the searched terms
    Formatter formatter = new SimpleHTMLFormatter();
    //It scores cont fragments by the number of unique q1 terms found
    //Basically the matching score in layman terms
    QueryScorer scorer = new QueryScorer(bq.build());
    //used to markup highlighted terms found in the best sections of a cont
    Highlighter highlighter = new Highlighter(formatter, scorer);
    //It breaks cont up into same-size texts but does not split up spans
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10);
    //breaks cont up into same-size fragments with no concerns over spotting sentence boundaries.

    //set fragmenter to highlighter
    highlighter.setTextFragmenter(fragmenter);
    //Iterate over found results
    for (int i = 0; i < hits.scoreDocs.length; i++) {
        Bean bean = new Bean();
        //int rank = hits.scoreDocs.length;
        int outResult = hits.scoreDocs.length;
        bean.setNumFile(outResult);
        int docid = hits.scoreDocs[i].doc;
        double rank = hits.scoreDocs[i].score;
        bean.setRankSc(rank);
        Document doc = searcher.doc(docid);
        // String title = doc.get("title");
        String name = doc.get("name");
        String title = doc.get("title");
        bean.setTitle(name);

        String path = doc.get("path");
        bean.setPath(path);

        String cont = doc.get("contents");
        //Create token stream
        TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer);
        //Get highlighted cont fragments
        String[] frags = highlighter.getBestFragments(stream, cont, 10);

        ArrayList<String> dummy = new ArrayList<>();
        for (String frag : frags) {

            dummy.add(frag);
        }

        bean.setContent(dummy);
        beanList.add(bean);
    }

    dir.close();
    // }
    return beanList;
}