Example usage for org.apache.lucene.search.highlight SimpleSpanFragmenter SimpleSpanFragmenter

List of usage examples for org.apache.lucene.search.highlight SimpleSpanFragmenter SimpleSpanFragmenter

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight SimpleSpanFragmenter SimpleSpanFragmenter.

Prototype

public SimpleSpanFragmenter(QueryScorer queryScorer) 

Source Link

Usage

From source file:aos.lucene.tools.HighlightIt.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 1) {
        System.err.println("Usage: HighlightIt <filename-out>");
        System.exit(-1);//w  ww  .ja  v a  2  s.c om
    }

    String filename = args[0];

    String searchText = "term"; //
    QueryParser parser = new QueryParser(Version.LUCENE_46, //
            "f", //
            new StandardAnalyzer(Version.LUCENE_46));// #1
    Query query = parser.parse(searchText); //

    SimpleHTMLFormatter formatter = //
            new SimpleHTMLFormatter("<span class=\"highlight\">", //
                    "</span>"); //

    TokenStream tokens = new StandardAnalyzer(Version.LUCENE_46) //
            .tokenStream("f", new StringReader(text)); //

    QueryScorer scorer = new QueryScorer(query, "f"); //

    Highlighter highlighter = new Highlighter(formatter, scorer); //
    highlighter.setTextFragmenter( //
            new SimpleSpanFragmenter(scorer)); //

    String result = //
            highlighter.getBestFragments(tokens, text, 3, "..."); //

    FileWriter writer = new FileWriter(filename); //
    writer.write("<html>"); //
    writer.write("<style>\n" + //
            ".highlight {\n" + //
            " background: yellow;\n" + //
            "}\n" + //
            "</style>"); //
    writer.write("<body>"); //
    writer.write(result); //
    writer.write("</body></html>"); //
    writer.close(); //
}

From source file:aos.lucene.tools.HighlightTest.java

License:Apache License

public void testHighlighting() throws Exception {
    String text = "The quick brown fox jumps over the lazy dog";

    TermQuery query = new TermQuery(new Term("field", "fox"));

    TokenStream tokenStream = new SimpleAnalyzer().tokenStream("field", new StringReader(text));

    QueryScorer scorer = new QueryScorer(query, "field");
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(fragmenter);
    assertEquals("The quick brown <B>fox</B> jumps over the lazy dog",
            highlighter.getBestFragment(tokenStream, text));
}

From source file:aos.lucene.tools.HighlightTest.java

License:Apache License

public void testHits() throws Exception {
    IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory());
    TermQuery query = new TermQuery(new Term("title", "action"));
    TopDocs hits = searcher.search(query, 10);

    QueryScorer scorer = new QueryScorer(query, "title");
    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));

    Analyzer analyzer = new SimpleAnalyzer();

    for (ScoreDoc sd : hits.scoreDocs) {
        Document doc = searcher.doc(sd.doc);
        String title = doc.get("title");

        TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc,
                analyzer);//from w  w  w. j a v  a  2  s  . c o  m
        String fragment = highlighter.getBestFragment(stream, title);

        LOGGER.info(fragment);
    }
}

From source file:com.aurel.track.lucene.search.LuceneSearcher.java

License:Open Source License

private static int[] getQueryResults(Query query, String userQueryString, String preprocessedQueryString,
        Map<Integer, String> highlightedTextMap) {
    int[] hitIDs = new int[0];
    IndexSearcher indexSearcher = null;/*from   ww w .  j a  va2  s . c o  m*/
    try {
        long start = 0;
        if (LOGGER.isDebugEnabled()) {
            start = new Date().getTime();
        }
        indexSearcher = getIndexSearcher(LuceneUtil.INDEXES.WORKITEM_INDEX);
        if (indexSearcher == null) {
            return hitIDs;
        }
        ScoreDoc[] scoreDocs;
        try {
            TopDocsCollector<ScoreDoc> collector = TopScoreDocCollector.create(MAXIMAL_HITS);
            indexSearcher.search(query, collector);
            scoreDocs = collector.topDocs().scoreDocs;
        } catch (IOException e) {
            LOGGER.warn("Getting the workitem search results failed with failed with " + e.getMessage());
            LOGGER.debug(ExceptionUtils.getStackTrace(e));
            return hitIDs;
        }
        if (LOGGER.isDebugEnabled()) {
            long end = new Date().getTime();
            LOGGER.debug("Found " + scoreDocs.length + " document(s) (in " + (end - start)
                    + " milliseconds) that matched the user query '" + userQueryString
                    + "' the preprocessed query '" + preprocessedQueryString + "' and the query.toString() '"
                    + query.toString() + "'");
        }
        QueryScorer queryScorer = new QueryScorer(query/*, LuceneUtil.HIGHLIGHTER_FIELD*/);
        Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer);
        Highlighter highlighter = new Highlighter(queryScorer); // Set the best scorer fragments
        highlighter.setTextFragmenter(fragmenter); // Set fragment to highlight
        hitIDs = new int[scoreDocs.length];
        for (int i = 0; i < scoreDocs.length; i++) {
            int docID = scoreDocs[i].doc;
            Document doc = null;
            try {
                doc = indexSearcher.doc(docID);
            } catch (IOException e) {
                LOGGER.error("Getting the workitem documents failed with " + e.getMessage());
                LOGGER.debug(ExceptionUtils.getStackTrace(e));
            }
            if (doc != null) {
                Integer itemID = Integer.valueOf(doc.get(LuceneUtil.getFieldName(SystemFields.ISSUENO)));
                if (itemID != null) {
                    hitIDs[i] = itemID.intValue();
                    if (highlightedTextMap != null) {
                        String highligherFieldValue = doc.get(LuceneUtil.HIGHLIGHTER_FIELD);
                        TokenStream tokenStream = null;
                        try {
                            tokenStream = TokenSources.getTokenStream(LuceneUtil.HIGHLIGHTER_FIELD, null,
                                    highligherFieldValue, LuceneUtil.getAnalyzer(), -1);
                        } catch (Exception ex) {
                            LOGGER.debug(ex.getMessage());
                        }
                        if (tokenStream != null) {
                            String fragment = highlighter.getBestFragment(tokenStream, highligherFieldValue);
                            if (fragment != null) {
                                highlightedTextMap.put(itemID, fragment);
                            }
                        }
                    }
                }
            }
        }
        return hitIDs;
    } catch (BooleanQuery.TooManyClauses e) {
        LOGGER.error("Searching the query resulted in too many clauses. Try to narrow the query results. "
                + e.getMessage());
        LOGGER.debug(ExceptionUtils.getStackTrace(e));
        throw e;
    } catch (Exception e) {
        LOGGER.error("Searching the workitems failed with " + e.getMessage());
        LOGGER.debug(ExceptionUtils.getStackTrace(e));
        return hitIDs;
    } finally {
        closeIndexSearcherAndUnderlyingIndexReader(indexSearcher, "workItem");
    }
}

From source file:com.bugull.mongo.lucene.BuguHighlighter.java

License:Apache License

public String getResult(String fieldName, String fieldValue) throws Exception {
    BuguIndex index = BuguIndex.getInstance();
    QueryParser parser = new QueryParser(index.getVersion(), fieldName, index.getAnalyzer());
    Query query = parser.parse(keywords);
    TokenStream tokens = index.getAnalyzer().tokenStream(fieldName, new StringReader(fieldValue));
    QueryScorer scorer = new QueryScorer(query, fieldName);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
    return highlighter.getBestFragments(tokens, fieldValue, maxFragments, "...");
}

From source file:com.difference.historybook.index.lucene.LuceneIndex.java

License:Apache License

@Override
public SearchResultWrapper search(String collection, String query, int offset, int size, boolean includeDebug)
        throws IndexException {
    try {// ww w  . j a v  a 2  s. c  om
        //TODO: make age be a component in the ranking?
        BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
        queryBuilder.add(parser.parse(query), Occur.MUST);
        queryBuilder.add(new TermQuery(new Term(IndexDocumentAdapter.FIELD_COLLECTION, collection)),
                Occur.FILTER);
        Query baseQuery = queryBuilder.build();

        FunctionQuery boostQuery = new FunctionQuery(
                new ReciprocalFloatFunction(new DurationValueSource(new Date().getTime() / 1000,
                        new LongFieldSource(IndexDocumentAdapter.FIELD_TIMESTAMP)), RECIP, 1F, 1F));

        Query q = new CustomScoreQuery(baseQuery, boostQuery);

        QueryScorer queryScorer = new QueryScorer(q, IndexDocumentAdapter.FIELD_SEARCH);
        Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer);
        Highlighter highlighter = new Highlighter(queryScorer);
        highlighter.setTextFragmenter(fragmenter);

        GroupingSearch gsearch = new GroupingSearch(IndexDocumentAdapter.FIELD_URL_GROUP).setGroupDocsLimit(1)
                .setAllGroups(true).setIncludeMaxScore(true);
        TopGroups<?> groups = gsearch.search(searcher, q, offset, size);

        ArrayList<SearchResult> results = new ArrayList<>(size);
        for (int i = offset; i < offset + size && i < groups.groups.length; i++) {
            ScoreDoc scoreDoc = groups.groups[i].scoreDocs[0];
            Document luceneDoc = searcher.doc(scoreDoc.doc);
            IndexDocumentAdapter doc = new IndexDocumentAdapter(luceneDoc);

            TokenStream tokenStream = TokenSources.getTokenStream(IndexDocumentAdapter.FIELD_SEARCH,
                    reader.getTermVectors(scoreDoc.doc), luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH),
                    analyzer, highlighter.getMaxDocCharsToAnalyze() - 1);

            String[] snippets = highlighter.getBestFragments(tokenStream,
                    luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH), 3);
            String snippet = Arrays.asList(snippets).stream().collect(Collectors.joining("\n"));
            snippet = Jsoup.clean(snippet, Whitelist.simpleText());

            String debugInfo = null;
            if (includeDebug) {
                Explanation explanation = searcher.explain(q, scoreDoc.doc);
                debugInfo = explanation.toString();
            }

            results.add(new SearchResult(doc.getKey(), doc.getCollection(), doc.getTitle(), doc.getUrl(),
                    doc.getDomain(), doc.getTimestampText(), snippet, debugInfo, scoreDoc.score));
        }

        SearchResultWrapper wrapper = new SearchResultWrapper().setQuery(query).setOffset(offset)
                .setMaxResultsRequested(size)
                .setResultCount(groups.totalGroupCount != null ? groups.totalGroupCount : 0)
                .setResults(results);

        if (includeDebug) {
            wrapper.setDebugInfo(q.toString());
        }

        return wrapper;

    } catch (IOException | ParseException | InvalidTokenOffsetsException e) {
        LOG.error(e.getLocalizedMessage());
        throw new IndexException(e);
    }
}

From source file:com.github.wxiaoqi.search.lucene.LuceneDao.java

License:Open Source License

private Highlighter addStringHighlighter(Query query) {
    QueryScorer scorer = new QueryScorer(query);
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
    SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
    Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
    highlighter.setTextFragmenter(fragmenter);
    return highlighter;
}

From source file:com.leavesfly.lia.tool.HighlightIt.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 1) {
        System.err.println("Usage: HighlightIt <filename-out>");
        System.exit(-1);/*from   ww  w  .ja va 2  s .  c o m*/
    }

    String filename = args[0];

    String searchText = "term"; // #1
    QueryParser parser = new QueryParser(Version.LUCENE_30, // #1
            "f", // #1
            new StandardAnalyzer(Version.LUCENE_30));// #1
    Query query = parser.parse(searchText); // #1

    SimpleHTMLFormatter formatter = // #2
            new SimpleHTMLFormatter("<span class=\"highlight\">", // #2
                    "</span>"); // #2

    TokenStream tokens = new StandardAnalyzer(Version.LUCENE_30) // #3
            .tokenStream("f", new StringReader(text)); // #3

    QueryScorer scorer = new QueryScorer(query, "f"); // #4

    Highlighter highlighter = new Highlighter(formatter, scorer); // #5
    highlighter.setTextFragmenter( // #6
            new SimpleSpanFragmenter(scorer)); // #6

    String result = // #7
            highlighter.getBestFragments(tokens, text, 3, "..."); // #7

    FileWriter writer = new FileWriter(filename); // #8
    writer.write("<html>"); // #8
    writer.write("<style>\n" + // #8
            ".highlight {\n" + // #8
            " background: yellow;\n" + // #8
            "}\n" + // #8
            "</style>"); // #8
    writer.write("<body>"); // #8
    writer.write(result); // #8
    writer.write("</body></html>"); // #8
    writer.close(); // #8
}

From source file:com.leavesfly.lia.tool.HighlightTest.java

License:Apache License

public void testHits() throws Exception {
    IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory());
    TermQuery query = new TermQuery(new Term("title", "action"));
    TopDocs hits = searcher.search(query, 10);

    QueryScorer scorer = new QueryScorer(query, "title");
    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));

    Analyzer analyzer = new SimpleAnalyzer();

    for (ScoreDoc sd : hits.scoreDocs) {
        Document doc = searcher.doc(sd.doc);
        String title = doc.get("title");

        TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc,
                analyzer);//from  www  .jav  a 2s .  c  o m
        String fragment = highlighter.getBestFragment(stream, title);

        System.out.println(fragment);
    }
}

From source file:com.mathworks.xzheng.tools.HighlightIt.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 1) {
        System.err.println("Usage: HighlightIt <filename-out>");
        System.exit(-1);/*from  w w w. ja  v a  2s  . c  om*/
    }

    String filename = args[0];

    String searchText = "term"; // #1
    QueryParser parser = new QueryParser(Version.LUCENE_46, // #1
            "f", // #1
            new StandardAnalyzer(Version.LUCENE_46));// #1
    Query query = parser.parse(searchText); // #1

    SimpleHTMLFormatter formatter = // #2
            new SimpleHTMLFormatter("<span class=\"highlight\">", // #2
                    "</span>"); // #2

    TokenStream tokens = new StandardAnalyzer(Version.LUCENE_46) // #3
            .tokenStream("f", new StringReader(text)); // #3

    QueryScorer scorer = new QueryScorer(query, "f"); // #4

    Highlighter highlighter = new Highlighter(formatter, scorer); // #5
    highlighter.setTextFragmenter( // #6
            new SimpleSpanFragmenter(scorer)); // #6

    String result = // #7
            highlighter.getBestFragments(tokens, text, 3, "..."); // #7

    FileWriter writer = new FileWriter(filename); // #8
    writer.write("<html>"); // #8
    writer.write("<style>\n" + // #8
            ".highlight {\n" + // #8
            " background: yellow;\n" + // #8
            "}\n" + // #8
            "</style>"); // #8
    writer.write("<body>"); // #8
    writer.write(result); // #8
    writer.write("</body></html>"); // #8
    writer.close(); // #8
}