Example usage for org.apache.lucene.search.highlight TokenSources getAnyTokenStream

List of usage examples for org.apache.lucene.search.highlight TokenSources getAnyTokenStream

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight TokenSources getAnyTokenStream.

Prototype

@Deprecated 
public static TokenStream getAnyTokenStream(IndexReader reader, int docId, String field, Document document,
        Analyzer analyzer) throws IOException 

Source Link

Document

A convenience method that tries to first get a TokenStreamFromTermVector for the specified docId, then, falls back to using the passed in org.apache.lucene.document.Document to retrieve the TokenStream.

Usage

From source file:aos.lucene.tools.HighlightTest.java

License:Apache License

public void testHits() throws Exception {
    IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory());
    TermQuery query = new TermQuery(new Term("title", "action"));
    TopDocs hits = searcher.search(query, 10);

    QueryScorer scorer = new QueryScorer(query, "title");
    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));

    Analyzer analyzer = new SimpleAnalyzer();

    for (ScoreDoc sd : hits.scoreDocs) {
        Document doc = searcher.doc(sd.doc);
        String title = doc.get("title");

        TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc,
                analyzer);//from  w w  w.  j  a  va2 s.  c om
        String fragment = highlighter.getBestFragment(stream, title);

        LOGGER.info(fragment);
    }
}

From source file:ci6226.loadIndex.java

/**
 * This demonstrates a typical paging search scenario, where the search
 * engine presents pages of size n to the user. The user can then go to the
 * next page if interested in the next hits.
 *
 * When the query is executed for the first time, then only enough results
 * are collected to fill 5 result pages. If the user wants to page beyond
 * this limit, then the query is executed another time and all hits are
 * collected./*from  w ww .java  2s  .  c o m*/
 *
 */
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive, Analyzer analyzer) throws IOException, InvalidTokenOffsetsException {

    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("review_id");
            if (path != null) {
                System.out.println(ANSI_BLUE + (i + 1) + ANSI_RESET + "\nScore=\t" + hits[i].score);
                String title = doc.get("business_id");
                if (title != null) {

                    String text = doc.get("text");
                    TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(),
                            hits[i].doc, "text", doc, analyzer);//TokenSources.getAnyTokenStream(searcher.getIndexReader() ,"text", analyzer);
                    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(ANSI_RED, ANSI_RESET);
                    // SimpleFragmenter fragmenter = new SimpleFragmenter(80);
                    Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
                    TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 4);
                    System.out.print("Snippet=\t");
                    for (int j = 0; j < frag.length; j++) {
                        if ((frag[j] != null) && (frag[j].getScore() > 0)) {
                            System.out.println((frag[j].toString()));
                        }
                    }
                    //System.out.print("\n");
                    System.out.println("Full Review=\t" + doc.get("text") + "\nBusinessID=\t" + title);
                }
            } else {
                System.out.println((i + 1) + ". " + "No path for this document");
            }

        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {

                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");
                int cpage = start / hitsPerPage;
                System.out.println(String.format("Current page=%d,max page=%d", cpage + 1,
                        1 + numTotalHits / hitsPerPage));
                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit) {
                break;
            }
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }

}

From source file:com.leavesfly.lia.tool.HighlightTest.java

License:Apache License

public void testHits() throws Exception {
    IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory());
    TermQuery query = new TermQuery(new Term("title", "action"));
    TopDocs hits = searcher.search(query, 10);

    QueryScorer scorer = new QueryScorer(query, "title");
    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));

    Analyzer analyzer = new SimpleAnalyzer();

    for (ScoreDoc sd : hits.scoreDocs) {
        Document doc = searcher.doc(sd.doc);
        String title = doc.get("title");

        TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc,
                analyzer);//from w  w  w. j  a v a  2 s .  c o  m
        String fragment = highlighter.getBestFragment(stream, title);

        System.out.println(fragment);
    }
}

From source file:com.mathworks.xzheng.tools.HighlightTest.java

License:Apache License

public void testHits() throws Exception {
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(TestUtil.getBookIndexDirectory()));
    TermQuery query = new TermQuery(new Term("title", "action"));
    TopDocs hits = searcher.search(query, 10);

    QueryScorer scorer = new QueryScorer(query, "title");
    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));

    Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_46);

    for (ScoreDoc sd : hits.scoreDocs) {
        Document doc = searcher.doc(sd.doc);
        String title = doc.get("title");

        TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc,
                analyzer);// w w  w  .  ja va 2s. co  m
        String fragment = highlighter.getBestFragment(stream, title);

        System.out.println(fragment);
    }
}

From source file:de.blizzy.documentr.search.GetSearchHitTask.java

License:Open Source License

@Override
public SearchHit call() throws IOException {
    Formatter formatter = new SimpleHTMLFormatter("<strong>", "</strong>"); //$NON-NLS-1$ //$NON-NLS-2$
    Scorer scorer = new QueryScorer(query);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(FRAGMENT_SIZE));
    highlighter.setEncoder(new SimpleHTMLEncoder());

    Document doc = reader.document(docId);
    String projectName = doc.get(PageIndex.PROJECT);
    String branchName = doc.get(PageIndex.BRANCH);
    String path = doc.get(PageIndex.PATH);
    String title = doc.get(PageIndex.TITLE);
    String text = doc.get(PageIndex.TEXT);
    String[] tagsArray = doc.getValues(PageIndex.TAG);
    List<String> tags = Lists.newArrayList(tagsArray);
    Collections.sort(tags);//from  w ww. j  av a  2s.  c om
    TokenStream tokenStream = null;
    String highlightedText = StringUtils.EMPTY;
    try {
        tokenStream = TokenSources.getAnyTokenStream(reader, docId, PageIndex.TEXT, doc, analyzer);
        String[] fragments = highlighter.getBestFragments(tokenStream, text, NUM_FRAGMENTS);
        cleanupFragments(fragments);
        highlightedText = Util.join(fragments, " <strong>...</strong> "); //$NON-NLS-1$
    } catch (InvalidTokenOffsetsException e) {
        // ignore
    } finally {
        Closeables.closeQuietly(tokenStream);
    }
    return new SearchHit(projectName, branchName, path, title, highlightedText, tags);
}