List of usage examples for org.apache.lucene.search.vectorhighlight FastVectorHighlighter FastVectorHighlighter
public FastVectorHighlighter()
From source file:com.github.hotware.lucene.extension.highlight.FVHighlighterUtil.java
License:BEER-WARE LICENSE
public FVHighlighterUtil(int phraseLimit, FragListBuilder fragListBuilder, ObjectFragmentsBuilder objectFragmentsBuilder) { this.phraseLimit = phraseLimit; this.fvh = new FastVectorHighlighter(); this.fragListBuilder = fragListBuilder; this.objectFragmentsBuilder = objectFragmentsBuilder; }
From source file:org.elasticsearch.deps.lucene.VectorHighlighterTests.java
License:Apache License
@Test public void testVectorHighlighter() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add(new Field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(document);/*from w w w. java2 s . c om*/ IndexReader reader = DirectoryReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment( highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, notNullValue()); assertThat(fragment, equalTo("the big <b>bad</b> dog")); }
From source file:org.elasticsearch.deps.lucene.VectorHighlighterTests.java
License:Apache License
@Test public void testVectorHighlighterPrefixQuery() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add(new Field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(document);// ww w. j a v a2 s . co m IndexReader reader = DirectoryReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); PrefixQuery prefixQuery = new PrefixQuery(new Term("content", "ba")); assertThat(prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName())); String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(prefixQuery), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, nullValue()); prefixQuery.setRewriteMethod(PrefixQuery.SCORING_BOOLEAN_QUERY_REWRITE); Query rewriteQuery = prefixQuery.rewrite(reader); fragment = highlighter.getBestFragment(highlighter.getFieldQuery(rewriteQuery), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, notNullValue()); // now check with the custom field query prefixQuery = new PrefixQuery(new Term("content", "ba")); assertThat(prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName())); fragment = highlighter.getBestFragment(new CustomFieldQuery(prefixQuery, reader, highlighter), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, notNullValue()); }
From source file:org.elasticsearch.deps.lucene.VectorHighlighterTests.java
License:Apache License
@Test public void testVectorHighlighterNoStore() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add(new Field("content", "the big bad dog", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(document);//ww w . j a v a2 s . c o m IndexReader reader = DirectoryReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment( highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, nullValue()); }
From source file:org.elasticsearch.deps.lucene.VectorHighlighterTests.java
License:Apache License
@Test public void testVectorHighlighterNoTermVector() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add(new Field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); indexWriter.addDocument(document);// ww w . j a va2s . co m IndexReader reader = DirectoryReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment( highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, nullValue()); }
From source file:org.elasticsearch.test.unit.deps.lucene.VectorHighlighterTests.java
License:Apache License
@Test public void testVectorHighlighter() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add(new Field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(document);/*from www . j a v a 2 s . c om*/ IndexReader reader = IndexReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment( highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, notNullValue()); assertThat(fragment, equalTo("the big <b>bad</b> dog")); }
From source file:org.elasticsearch.test.unit.deps.lucene.VectorHighlighterTests.java
License:Apache License
@Test public void testVectorHighlighterPrefixQuery() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add(new Field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(document);/* www . jav a 2s. com*/ IndexReader reader = IndexReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); PrefixQuery prefixQuery = new PrefixQuery(new Term("content", "ba")); assertThat(prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName())); String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(prefixQuery), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, nullValue()); prefixQuery.setRewriteMethod(PrefixQuery.SCORING_BOOLEAN_QUERY_REWRITE); Query rewriteQuery = prefixQuery.rewrite(reader); fragment = highlighter.getBestFragment(highlighter.getFieldQuery(rewriteQuery), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, notNullValue()); System.out.println(fragment); // now check with the custom field query prefixQuery = new PrefixQuery(new Term("content", "ba")); assertThat(prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName())); fragment = highlighter.getBestFragment(new CustomFieldQuery(prefixQuery, reader, highlighter), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, notNullValue()); System.out.println(fragment); }
From source file:org.elasticsearch.test.unit.deps.lucene.VectorHighlighterTests.java
License:Apache License
@Test public void testVectorHighlighterNoStore() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add(new Field("content", "the big bad dog", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); indexWriter.addDocument(document);//ww w . ja v a 2 s . c o m IndexReader reader = IndexReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment( highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, nullValue()); }
From source file:org.elasticsearch.test.unit.deps.lucene.VectorHighlighterTests.java
License:Apache License
@Test public void testVectorHighlighterNoTermVector() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add(new Field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); indexWriter.addDocument(document);/*from ww w . j a v a2 s . c om*/ IndexReader reader = IndexReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment( highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, nullValue()); }
From source file:org.segrada.search.lucene.LuceneSearchEngine.java
License:Apache License
@Override public PaginationInfo<SearchHit> search(String searchTerm, Map<String, String> filters) { // to avoid NPEs if (filters == null) filters = new HashMap<>(); // set defaults int page = 1; int entriesPerPage = 20; try {//from ww w.ja va 2 s . c o m DirectoryReader iReader = DirectoryReader.open(directory); String[] containFields; // do we have a filter to contain to certain fields? if (filters.containsKey("fields")) { String fields = filters.get("fields"); if (fields.isEmpty()) containFields = new String[] { "title", "subTitles", "content" }; else if (fields.equalsIgnoreCase("title")) containFields = new String[] { "title" }; else if (fields.equalsIgnoreCase("subTitles")) containFields = new String[] { "subTitles" }; else if (fields.equalsIgnoreCase("content")) containFields = new String[] { "content" }; else if (fields.equalsIgnoreCase("allTitles")) containFields = new String[] { "title", "subTitles" }; else throw new RuntimeException("fields-Filter " + fields + " is not known."); } else containFields = new String[] { "title", "subTitles", "content" }; // Parse a simple query that searches for "text": MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_47, containFields, analyzer); // which operator do we use? parser.setDefaultOperator(QueryParser.Operator.AND); if (filters.containsKey("operator")) { String operator = filters.get("operator"); if (operator.equalsIgnoreCase("or")) parser.setDefaultOperator(QueryParser.Operator.OR); else if (!operator.isEmpty() && !operator.equalsIgnoreCase("and")) throw new RuntimeException("operator-Filter " + operator + " is not and/or."); } // filters for query List<Filter> searchFilters = new ArrayList<>(); // class filter if (filters.containsKey("class") && !filters.get("class").isEmpty()) { // multiple classes? String[] classes = filters.get("class").split(","); // single class if (classes.length <= 1) { TermQuery categoryQuery = new TermQuery(new Term("className", filters.get("class"))); searchFilters.add(new QueryWrapperFilter(categoryQuery)); } else { // multiple classes Filter[] categories = new Filter[classes.length]; for (int i = 0; i < classes.length; i++) { categories[i] = new QueryWrapperFilter( new TermQuery(new Term("className", classes[i].trim()))); } // add chained filter searchFilters.add(new ChainedFilter(categories, ChainedFilter.OR)); } } // tag filter if (filters.containsKey("tags") && !filters.get("tags").isEmpty()) { // split tags into array String[] tags = filters.get("tags").split(","); BooleanQuery booleanQuery = new BooleanQuery(); for (String tag : tags) { booleanQuery.add(new TermQuery(new Term("tag", tag.trim())), BooleanClause.Occur.SHOULD); } searchFilters.add(new QueryWrapperFilter(booleanQuery)); } // create filter - if multiple filters applied, add chained filter Filter filter = null; if (searchFilters.size() == 1) filter = searchFilters.get(0); else if (searchFilters.size() > 1) { Filter[] filterArray = new Filter[searchFilters.size()]; searchFilters.toArray(filterArray); filter = new ChainedFilter(filterArray, ChainedFilter.AND); } // define query Query query = null; if (searchTerm != null) query = parser.parse(searchTerm); if (query == null) query = new MatchAllDocsQuery(); // fallback to match all documents // get hits per page if (filters.containsKey("limit")) { try { entriesPerPage = Integer.valueOf(filters.get("limit")); if (entriesPerPage <= 0 || entriesPerPage > 1000) entriesPerPage = 20; } catch (NumberFormatException e) { logger.warn("Could not parse limit " + filters.get("limit") + " to integer", e); } } // get page number if (filters.containsKey("page")) { try { page = Integer.valueOf(filters.get("page")); } catch (NumberFormatException e) { logger.warn("Could not parse page " + filters.get("page") + " to integer", e); } } // calculate start/stop indexes int startIndex = (page - 1) * entriesPerPage; int endIndex = page * entriesPerPage; IndexSearcher iSearcher = new IndexSearcher(iReader); // do search TopDocs topDocs = iSearcher.search(query, filter, 1000); // update end index if (topDocs.scoreDocs.length < endIndex) endIndex = topDocs.scoreDocs.length; // how many pages do we have? int pages = topDocs.scoreDocs.length / entriesPerPage + 1; // reset page to sane limit, if needed if (page <= 0 || page > pages) page = 1; // highlighter FastVectorHighlighter highlighter = new FastVectorHighlighter(); FieldQuery fieldQuery = null; // field query for highlighted terms if (searchTerm != null) fieldQuery = highlighter.getFieldQuery( new QueryParser(Version.LUCENE_47, "content", analyzer).parse(searchTerm), iReader); // cycle trough hits List<SearchHit> hits = new ArrayList<>(); for (int i = startIndex; i < endIndex; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; Document hitDoc = iSearcher.doc(scoreDoc.doc); SearchHit searchHit = new SearchHit(); searchHit.setId(hitDoc.get("id")); searchHit.setClassName(hitDoc.get("className")); searchHit.setTitle(hitDoc.get("title")); searchHit.setSubTitles(hitDoc.get("subTitles")); searchHit.setTagIds(hitDoc.getValues("tag")); String color = hitDoc.get("color"); searchHit.setColor(color != null ? new Integer(color) : null); searchHit.setIconFileIdentifier(hitDoc.get("iconFileIdentifier")); searchHit.setRelevance(scoreDoc.score); // get highlighted components if (searchTerm != null) { String[] bestFragments = highlighter.getBestFragments(fieldQuery, iReader, scoreDoc.doc, "content", 18, 10); searchHit.setHighlightText(bestFragments); } // add hit hits.add(searchHit); } iReader.close(); // return pagination info return new PaginationInfo<>(page, pages, topDocs.totalHits, entriesPerPage, hits); } catch (Throwable e) { logger.error("Error in search.", e); } // return empty list result in order to avoid NPEs return new PaginationInfo<>(page, 1, 0, entriesPerPage, new ArrayList<>()); }