Example usage for org.apache.lucene.search.vectorhighlight FastVectorHighlighter FastVectorHighlighter

List of usage examples for org.apache.lucene.search.vectorhighlight FastVectorHighlighter FastVectorHighlighter

Introduction

In this page you can find the example usage for org.apache.lucene.search.vectorhighlight FastVectorHighlighter FastVectorHighlighter.

Prototype

public FastVectorHighlighter() 

Source Link

Document

the default constructor.

Usage

From source file:com.github.hotware.lucene.extension.highlight.FVHighlighterUtil.java

License:BEER-WARE LICENSE

public FVHighlighterUtil(int phraseLimit, FragListBuilder fragListBuilder,
        ObjectFragmentsBuilder objectFragmentsBuilder) {
    this.phraseLimit = phraseLimit;
    this.fvh = new FastVectorHighlighter();
    this.fragListBuilder = fragListBuilder;
    this.objectFragmentsBuilder = objectFragmentsBuilder;
}

From source file:org.elasticsearch.deps.lucene.VectorHighlighterTests.java

License:Apache License

@Test
public void testVectorHighlighter() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir,
            new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(new Field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED,
            Field.TermVector.WITH_POSITIONS_OFFSETS));
    indexWriter.addDocument(document);/*from w  w w. java2  s .  c om*/

    IndexReader reader = DirectoryReader.open(indexWriter, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);

    assertThat(topDocs.totalHits, equalTo(1));

    FastVectorHighlighter highlighter = new FastVectorHighlighter();
    String fragment = highlighter.getBestFragment(
            highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader,
            topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, notNullValue());
    assertThat(fragment, equalTo("the big <b>bad</b> dog"));
}

From source file:org.elasticsearch.deps.lucene.VectorHighlighterTests.java

License:Apache License

@Test
public void testVectorHighlighterPrefixQuery() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir,
            new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(new Field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED,
            Field.TermVector.WITH_POSITIONS_OFFSETS));
    indexWriter.addDocument(document);//  ww w.  j a  v a2 s .  co  m

    IndexReader reader = DirectoryReader.open(indexWriter, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);

    assertThat(topDocs.totalHits, equalTo(1));

    FastVectorHighlighter highlighter = new FastVectorHighlighter();

    PrefixQuery prefixQuery = new PrefixQuery(new Term("content", "ba"));
    assertThat(prefixQuery.getRewriteMethod().getClass().getName(),
            equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName()));
    String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(prefixQuery), reader,
            topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, nullValue());

    prefixQuery.setRewriteMethod(PrefixQuery.SCORING_BOOLEAN_QUERY_REWRITE);
    Query rewriteQuery = prefixQuery.rewrite(reader);
    fragment = highlighter.getBestFragment(highlighter.getFieldQuery(rewriteQuery), reader,
            topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, notNullValue());

    // now check with the custom field query
    prefixQuery = new PrefixQuery(new Term("content", "ba"));
    assertThat(prefixQuery.getRewriteMethod().getClass().getName(),
            equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName()));
    fragment = highlighter.getBestFragment(new CustomFieldQuery(prefixQuery, reader, highlighter), reader,
            topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, notNullValue());
}

From source file:org.elasticsearch.deps.lucene.VectorHighlighterTests.java

License:Apache License

@Test
public void testVectorHighlighterNoStore() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir,
            new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(new Field("content", "the big bad dog", Field.Store.NO, Field.Index.ANALYZED,
            Field.TermVector.WITH_POSITIONS_OFFSETS));
    indexWriter.addDocument(document);//ww  w  . j a v a2  s .  c  o  m

    IndexReader reader = DirectoryReader.open(indexWriter, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);

    assertThat(topDocs.totalHits, equalTo(1));

    FastVectorHighlighter highlighter = new FastVectorHighlighter();
    String fragment = highlighter.getBestFragment(
            highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader,
            topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, nullValue());
}

From source file:org.elasticsearch.deps.lucene.VectorHighlighterTests.java

License:Apache License

@Test
public void testVectorHighlighterNoTermVector() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir,
            new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(new Field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED,
            Field.TermVector.NO));
    indexWriter.addDocument(document);//  ww  w  . j  a va2s .  co m

    IndexReader reader = DirectoryReader.open(indexWriter, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);

    assertThat(topDocs.totalHits, equalTo(1));

    FastVectorHighlighter highlighter = new FastVectorHighlighter();
    String fragment = highlighter.getBestFragment(
            highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader,
            topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, nullValue());
}

From source file:org.elasticsearch.test.unit.deps.lucene.VectorHighlighterTests.java

License:Apache License

@Test
public void testVectorHighlighter() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir,
            new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(new Field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED,
            Field.TermVector.WITH_POSITIONS_OFFSETS));
    indexWriter.addDocument(document);/*from   www . j a v a 2  s  .  c  om*/

    IndexReader reader = IndexReader.open(indexWriter, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);

    assertThat(topDocs.totalHits, equalTo(1));

    FastVectorHighlighter highlighter = new FastVectorHighlighter();
    String fragment = highlighter.getBestFragment(
            highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader,
            topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, notNullValue());
    assertThat(fragment, equalTo("the big <b>bad</b> dog"));
}

From source file:org.elasticsearch.test.unit.deps.lucene.VectorHighlighterTests.java

License:Apache License

@Test
public void testVectorHighlighterPrefixQuery() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir,
            new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(new Field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED,
            Field.TermVector.WITH_POSITIONS_OFFSETS));
    indexWriter.addDocument(document);/*  www  . jav  a  2s.  com*/

    IndexReader reader = IndexReader.open(indexWriter, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);

    assertThat(topDocs.totalHits, equalTo(1));

    FastVectorHighlighter highlighter = new FastVectorHighlighter();

    PrefixQuery prefixQuery = new PrefixQuery(new Term("content", "ba"));
    assertThat(prefixQuery.getRewriteMethod().getClass().getName(),
            equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName()));
    String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(prefixQuery), reader,
            topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, nullValue());

    prefixQuery.setRewriteMethod(PrefixQuery.SCORING_BOOLEAN_QUERY_REWRITE);
    Query rewriteQuery = prefixQuery.rewrite(reader);
    fragment = highlighter.getBestFragment(highlighter.getFieldQuery(rewriteQuery), reader,
            topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, notNullValue());

    System.out.println(fragment);

    // now check with the custom field query
    prefixQuery = new PrefixQuery(new Term("content", "ba"));
    assertThat(prefixQuery.getRewriteMethod().getClass().getName(),
            equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName()));
    fragment = highlighter.getBestFragment(new CustomFieldQuery(prefixQuery, reader, highlighter), reader,
            topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, notNullValue());

    System.out.println(fragment);
}

From source file:org.elasticsearch.test.unit.deps.lucene.VectorHighlighterTests.java

License:Apache License

@Test
public void testVectorHighlighterNoStore() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir,
            new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(new Field("content", "the big bad dog", Field.Store.NO, Field.Index.ANALYZED,
            Field.TermVector.WITH_POSITIONS_OFFSETS));
    indexWriter.addDocument(document);//ww  w  . ja v a  2  s  . c o  m

    IndexReader reader = IndexReader.open(indexWriter, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);

    assertThat(topDocs.totalHits, equalTo(1));

    FastVectorHighlighter highlighter = new FastVectorHighlighter();
    String fragment = highlighter.getBestFragment(
            highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader,
            topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, nullValue());
}

From source file:org.elasticsearch.test.unit.deps.lucene.VectorHighlighterTests.java

License:Apache License

@Test
public void testVectorHighlighterNoTermVector() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir,
            new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(new Field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED,
            Field.TermVector.NO));
    indexWriter.addDocument(document);/*from  ww  w  . j a  v  a2 s  .  c om*/

    IndexReader reader = IndexReader.open(indexWriter, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);

    assertThat(topDocs.totalHits, equalTo(1));

    FastVectorHighlighter highlighter = new FastVectorHighlighter();
    String fragment = highlighter.getBestFragment(
            highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader,
            topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, nullValue());
}

From source file:org.segrada.search.lucene.LuceneSearchEngine.java

License:Apache License

@Override
public PaginationInfo<SearchHit> search(String searchTerm, Map<String, String> filters) {
    // to avoid NPEs
    if (filters == null)
        filters = new HashMap<>();

    // set defaults
    int page = 1;
    int entriesPerPage = 20;

    try {//from ww w.ja  va  2  s  . c o  m
        DirectoryReader iReader = DirectoryReader.open(directory);

        String[] containFields;
        // do we have a filter to contain to certain fields?
        if (filters.containsKey("fields")) {
            String fields = filters.get("fields");
            if (fields.isEmpty())
                containFields = new String[] { "title", "subTitles", "content" };
            else if (fields.equalsIgnoreCase("title"))
                containFields = new String[] { "title" };
            else if (fields.equalsIgnoreCase("subTitles"))
                containFields = new String[] { "subTitles" };
            else if (fields.equalsIgnoreCase("content"))
                containFields = new String[] { "content" };
            else if (fields.equalsIgnoreCase("allTitles"))
                containFields = new String[] { "title", "subTitles" };
            else
                throw new RuntimeException("fields-Filter " + fields + " is not known.");
        } else
            containFields = new String[] { "title", "subTitles", "content" };

        // Parse a simple query that searches for "text":
        MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_47, containFields, analyzer);

        // which operator do we use?
        parser.setDefaultOperator(QueryParser.Operator.AND);
        if (filters.containsKey("operator")) {
            String operator = filters.get("operator");
            if (operator.equalsIgnoreCase("or"))
                parser.setDefaultOperator(QueryParser.Operator.OR);
            else if (!operator.isEmpty() && !operator.equalsIgnoreCase("and"))
                throw new RuntimeException("operator-Filter " + operator + " is not and/or.");
        }

        // filters for query
        List<Filter> searchFilters = new ArrayList<>();

        // class filter
        if (filters.containsKey("class") && !filters.get("class").isEmpty()) {
            // multiple classes?
            String[] classes = filters.get("class").split(",");

            // single class
            if (classes.length <= 1) {
                TermQuery categoryQuery = new TermQuery(new Term("className", filters.get("class")));
                searchFilters.add(new QueryWrapperFilter(categoryQuery));
            } else { // multiple classes
                Filter[] categories = new Filter[classes.length];
                for (int i = 0; i < classes.length; i++) {
                    categories[i] = new QueryWrapperFilter(
                            new TermQuery(new Term("className", classes[i].trim())));
                }
                // add chained filter
                searchFilters.add(new ChainedFilter(categories, ChainedFilter.OR));
            }
        }

        // tag filter
        if (filters.containsKey("tags") && !filters.get("tags").isEmpty()) {
            // split tags into array
            String[] tags = filters.get("tags").split(",");
            BooleanQuery booleanQuery = new BooleanQuery();
            for (String tag : tags) {
                booleanQuery.add(new TermQuery(new Term("tag", tag.trim())), BooleanClause.Occur.SHOULD);
            }
            searchFilters.add(new QueryWrapperFilter(booleanQuery));
        }

        // create filter - if multiple filters applied, add chained filter
        Filter filter = null;
        if (searchFilters.size() == 1)
            filter = searchFilters.get(0);
        else if (searchFilters.size() > 1) {
            Filter[] filterArray = new Filter[searchFilters.size()];
            searchFilters.toArray(filterArray);
            filter = new ChainedFilter(filterArray, ChainedFilter.AND);
        }

        // define query
        Query query = null;
        if (searchTerm != null)
            query = parser.parse(searchTerm);
        if (query == null)
            query = new MatchAllDocsQuery(); // fallback to match all documents

        // get hits per page
        if (filters.containsKey("limit")) {
            try {
                entriesPerPage = Integer.valueOf(filters.get("limit"));
                if (entriesPerPage <= 0 || entriesPerPage > 1000)
                    entriesPerPage = 20;
            } catch (NumberFormatException e) {
                logger.warn("Could not parse limit " + filters.get("limit") + " to integer", e);
            }
        }

        // get page number
        if (filters.containsKey("page")) {
            try {
                page = Integer.valueOf(filters.get("page"));
            } catch (NumberFormatException e) {
                logger.warn("Could not parse page " + filters.get("page") + " to integer", e);
            }
        }

        // calculate start/stop indexes
        int startIndex = (page - 1) * entriesPerPage;
        int endIndex = page * entriesPerPage;

        IndexSearcher iSearcher = new IndexSearcher(iReader);
        // do search
        TopDocs topDocs = iSearcher.search(query, filter, 1000);

        // update end index
        if (topDocs.scoreDocs.length < endIndex)
            endIndex = topDocs.scoreDocs.length;
        // how many pages do we have?
        int pages = topDocs.scoreDocs.length / entriesPerPage + 1;
        // reset page to sane limit, if needed
        if (page <= 0 || page > pages)
            page = 1;

        // highlighter
        FastVectorHighlighter highlighter = new FastVectorHighlighter();
        FieldQuery fieldQuery = null;
        // field query for highlighted terms
        if (searchTerm != null)
            fieldQuery = highlighter.getFieldQuery(
                    new QueryParser(Version.LUCENE_47, "content", analyzer).parse(searchTerm), iReader);

        // cycle trough hits
        List<SearchHit> hits = new ArrayList<>();

        for (int i = startIndex; i < endIndex; i++) {
            ScoreDoc scoreDoc = topDocs.scoreDocs[i];
            Document hitDoc = iSearcher.doc(scoreDoc.doc);

            SearchHit searchHit = new SearchHit();
            searchHit.setId(hitDoc.get("id"));
            searchHit.setClassName(hitDoc.get("className"));
            searchHit.setTitle(hitDoc.get("title"));
            searchHit.setSubTitles(hitDoc.get("subTitles"));
            searchHit.setTagIds(hitDoc.getValues("tag"));
            String color = hitDoc.get("color");
            searchHit.setColor(color != null ? new Integer(color) : null);
            searchHit.setIconFileIdentifier(hitDoc.get("iconFileIdentifier"));
            searchHit.setRelevance(scoreDoc.score);

            // get highlighted components
            if (searchTerm != null) {
                String[] bestFragments = highlighter.getBestFragments(fieldQuery, iReader, scoreDoc.doc,
                        "content", 18, 10);
                searchHit.setHighlightText(bestFragments);
            }

            // add hit
            hits.add(searchHit);
        }

        iReader.close();

        // return pagination info
        return new PaginationInfo<>(page, pages, topDocs.totalHits, entriesPerPage, hits);
    } catch (Throwable e) {
        logger.error("Error in search.", e);
    }

    // return empty list result in order to avoid NPEs
    return new PaginationInfo<>(page, 1, 0, entriesPerPage, new ArrayList<>());
}