List of usage examples for org.apache.lucene.analysis Analyzer getOffsetGap
public int getOffsetGap(String fieldName)
From source file:com.liferay.portal.search.lucene.PerFieldAnalyzerWrapper.java
License:Open Source License
@Override public int getOffsetGap(Fieldable field) { Analyzer analyzer = _getAnalyzer(field.name()); return analyzer.getOffsetGap(field); }
From source file:org.tallison.lucene.search.concordance.TestConcordanceSearcher.java
License:Apache License
@Test public void testSimple() throws Exception { String[] docs = new String[] { "a b c a b c", "c b a c b a" }; Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET); Directory directory = getDirectory(analyzer, docs); IndexReader reader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(reader); WindowBuilder wb = new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD), new DefaultSortKeyBuilder(ConcordanceSortOrder.PRE), metadataExtractor, docIdBuilder); ConcordanceSearcher searcher = new ConcordanceSearcher(wb); SpanQuery q = new SpanTermQuery(new Term(FIELD, "a")); ConcordanceWindowCollector collector = new ConcordanceWindowCollector(3); searcher.search(indexSearcher, FIELD, q, null, analyzer, collector); assertEquals(3, collector.size());//from ww w . j a v a2s.co m collector = new ConcordanceWindowCollector(ConcordanceWindowCollector.COLLECT_ALL); searcher.search(indexSearcher, FIELD, q, null, analyzer, collector); // test result size assertEquals(4, collector.size()); // test result with sort order = pre List<ConcordanceWindow> windows = collector.getSortedWindows(); String[] pres = new String[] { "", "c b", "c b a c b", "a b c" }; String[] posts = new String[] { " b c a b c", " c b a", "", " b c" }; for (int i = 0; i < windows.size(); i++) { ConcordanceWindow w = windows.get(i); assertEquals(pres[i], w.getPre()); assertEquals(posts[i], w.getPost()); } // test sort order post // sort key is built at search time, so must re-search wb = new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD), new DefaultSortKeyBuilder(ConcordanceSortOrder.POST), metadataExtractor, docIdBuilder); searcher = new ConcordanceSearcher(wb); collector = new ConcordanceWindowCollector(ConcordanceWindowCollector.COLLECT_ALL); searcher.search(indexSearcher, FIELD, q, null, analyzer, collector); windows = collector.getSortedWindows(); posts = new String[] { "", " b c", " b c a b c", " c b a", }; for (int i = 0; i < windows.size(); i++) { ConcordanceWindow w = windows.get(i); assertEquals(posts[i], w.getPost()); } reader.close(); directory.close(); }
From source file:org.tallison.lucene.search.concordance.TestConcordanceSearcher.java
License:Apache License
@Test public void testSimpleMultiValuedField() throws Exception { String[] doc = new String[] { "a b c a b c", "c b a c b a" }; List<String[]> docs = new ArrayList<>(); docs.add(doc);/*from w w w. j a va2 s . c o m*/ Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET); Directory directory = getDirectory(analyzer, docs); IndexReader reader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(reader); ConcordanceSearcher searcher = new ConcordanceSearcher( new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD))); SpanQuery q = new SpanTermQuery(new Term(FIELD, "a")); ConcordanceWindowCollector collector = new ConcordanceWindowCollector(100); searcher.search(indexSearcher, FIELD, q, null, analyzer, collector); // test result size assertEquals(4, collector.size()); // test result with sort order = pre List<ConcordanceWindow> windows = collector.getSortedWindows(); String[] pres = new String[] { "", "c b", "c b a c b", "a b c" }; String[] posts = new String[] { " b c a b c", " c b a", "", " b c" }; for (int i = 0; i < pres.length; i++) { ConcordanceWindow w = windows.get(i); assertEquals("pres: " + i, pres[i], w.getPre()); assertEquals("posts: " + i, posts[i], w.getPost()); } // test sort order post // sort key is built at search time, so must re-search WindowBuilder wb = new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD), new DefaultSortKeyBuilder(ConcordanceSortOrder.POST), metadataExtractor, docIdBuilder); searcher = new ConcordanceSearcher(wb); collector = new ConcordanceWindowCollector(100); searcher.search(indexSearcher, FIELD, q, null, analyzer, collector); windows = collector.getSortedWindows(); posts = new String[] { "", " b c", " b c a b c", " c b a", }; for (int i = 0; i < posts.length; i++) { ConcordanceWindow w = windows.get(i); assertEquals(posts[i], w.getPost()); } reader.close(); directory.close(); }
From source file:org.tallison.lucene.search.concordance.TestConcordanceSearcher.java
License:Apache License
@Test public void testWindowLengths() throws Exception { String[] doc = new String[] { "a b c d e f g" }; List<String[]> docs = new ArrayList<>(); docs.add(doc);// w ww.j ava2 s . com Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET); Directory directory = getDirectory(analyzer, docs); IndexReader reader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(reader); SpanQuery q = new SpanTermQuery(new Term(FIELD, "d")); String[] pres = { "", "c", "b c", "a b c", "a b c", "a b c" }; String[] posts = { "", " e", " e f", " e f g", " e f g", " e f g" }; for (int tokensBefore = 0; tokensBefore < pres.length; tokensBefore++) { for (int tokensAfter = 0; tokensAfter < posts.length; tokensAfter++) { WindowBuilder wb = new WindowBuilder(tokensBefore, tokensAfter, analyzer.getOffsetGap(FIELD)); ConcordanceSearcher searcher = new ConcordanceSearcher(wb); ConcordanceWindowCollector collector = new ConcordanceWindowCollector(100); searcher.search(indexSearcher, FIELD, q, null, analyzer, collector); ConcordanceWindow w = collector.getSortedWindows().get(0); assertEquals(tokensBefore + " : " + tokensAfter, pres[tokensBefore], w.getPre()); assertEquals(tokensBefore + " : " + tokensAfter, posts[tokensAfter], w.getPost()); } } reader.close(); directory.close(); }
From source file:org.tallison.lucene.search.concordance.TestConcordanceSearcher.java
License:Apache License
@Test public void testClockworkOrangeMultiValuedFieldProblem() throws Exception { /*//from ww w . j ava 2 s. co m * test handling of target match (or not) over different indices into multivalued * field array */ String[] doc = new String[] { "a b c a b the", "clockwork", "orange b a c b a" }; List<String[]> docs = new ArrayList<>(); docs.add(doc); Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET, 0, 10); Directory directory = getDirectory(analyzer, docs); IndexReader reader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(reader); WindowBuilder wb = new WindowBuilder(3, 3, analyzer.getOffsetGap(FIELD)); ConcordanceSearcher searcher = new ConcordanceSearcher(wb); SpanQuery q1 = new SpanTermQuery(new Term(FIELD, "the")); SpanQuery q2 = new SpanTermQuery(new Term(FIELD, "clockwork")); SpanQuery q3 = new SpanTermQuery(new Term(FIELD, "orange")); SpanQuery q = new SpanNearQuery(new SpanQuery[] { q1, q2, q3 }, 3, true); ConcordanceWindowCollector collector = new ConcordanceWindowCollector(3); searcher.search(indexSearcher, FIELD, q, null, analyzer, collector); assertEquals(1, collector.size()); ConcordanceWindow w = collector.getSortedWindows().iterator().next(); assertEquals("target", "the | clockwork | orange", w.getTarget()); assertEquals("pre", "c a b", w.getPre()); assertEquals("post", " b a c", w.getPost()); reader.close(); directory.close(); // test hit even over long inter-field gap analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET, 20, 50); directory = getDirectory(analyzer, docs); reader = DirectoryReader.open(directory); indexSearcher = new IndexSearcher(reader); wb = new WindowBuilder(3, 3, analyzer.getOffsetGap(FIELD)); searcher = new ConcordanceSearcher(wb); q = new SpanNearQuery(new SpanQuery[] { q1, q2, q3 }, 120, true); collector = new ConcordanceWindowCollector(100); searcher.search(indexSearcher, FIELD, q, null, analyzer, collector); assertEquals(1, collector.size()); w = collector.getSortedWindows().iterator().next(); assertEquals("target", "the | clockwork | orange", w.getTarget()); assertEquals("pre", "c a b", w.getPre()); assertEquals("post", " b a c", w.getPost()); reader.close(); directory.close(); // test miss analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET, 100, 100); directory = getDirectory(analyzer, docs); reader = DirectoryReader.open(directory); indexSearcher = new IndexSearcher(reader); wb = new WindowBuilder(); searcher = new ConcordanceSearcher(wb); q = new SpanNearQuery(new SpanQuery[] { q1, q2, q3 }, 5, true); collector = new ConcordanceWindowCollector(100); searcher.search(indexSearcher, FIELD, q, null, analyzer, collector); assertEquals(0, collector.size()); reader.close(); directory.close(); }
From source file:org.tallison.lucene.search.concordance.TestConcordanceSearcher.java
License:Apache License
@Test public void testWithStops() throws Exception { String[] docs = new String[] { "a b the d e the f", "g h the d the j" }; Analyzer analyzer = getAnalyzer(MockTokenFilter.ENGLISH_STOPSET); Directory directory = getDirectory(analyzer, docs); IndexReader reader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(reader); WindowBuilder wb = new WindowBuilder(2, 2, analyzer.getOffsetGap(FIELD)); ConcordanceSearcher searcher = new ConcordanceSearcher(wb); SpanQuery q = new SpanTermQuery(new Term(FIELD, "d")); ConcordanceWindowCollector collector = new ConcordanceWindowCollector(3); searcher.search(indexSearcher, FIELD, q, null, analyzer, collector); List<ConcordanceWindow> windows = collector.getSortedWindows(); assertEquals(2, windows.size());//from ww w . j a v a 2 s . com // the second word after the target is a stop word // this post-component of this window should only go to the first word after // the target assertEquals("b the", windows.get(0).getPre()); assertEquals("d", windows.get(0).getTarget()); assertEquals(" e", windows.get(0).getPost()); assertEquals("h the", windows.get(1).getPre()); assertEquals("d", windows.get(1).getTarget()); assertEquals(" the j", windows.get(1).getPost()); reader.close(); directory.close(); }
From source file:org.tallison.lucene.search.concordance.TestConcordanceSearcher.java
License:Apache License
@Test public void testBasicStandardQueryConversion() throws Exception { String[] docs = new String[] { "a b c a b c", "c b a c b a d e a", "c b a c b a e a b c a" }; Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET); Directory directory = getDirectory(analyzer, docs); IndexReader reader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(reader); ConcordanceSearcher searcher = new ConcordanceSearcher( new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD))); BooleanQuery q = new BooleanQuery.Builder().add(new TermQuery(new Term(FIELD, "a")), Occur.MUST) .add(new TermQuery(new Term(FIELD, "d")), Occur.MUST_NOT).build(); ConcordanceWindowCollector collector = new ConcordanceWindowCollector(10); searcher.search(indexSearcher, FIELD, q, null, analyzer, collector); // shouldn't include document with "d" assertEquals(6, collector.size());/* w w w .j a v a 2 s. c o m*/ // should only include document with "e" and not "d" Query filter = new TermQuery(new Term(FIELD, "e")); collector = new ConcordanceWindowCollector(10); searcher.search(indexSearcher, FIELD, (Query) q, filter, analyzer, collector); assertEquals(4, collector.size()); reader.close(); directory.close(); }
From source file:org.tallison.lucene.search.concordance.TestConcordanceSearcher.java
License:Apache License
@Test public void testMismatchingFieldsInStandardQueryConversion() throws Exception { // tests what happens if a Query doesn't contain a term in the "span" field // in the searcher...should be no exception and zero documents returned. String[] docs = new String[] { "a b c a b c", }; Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET); Directory directory = getDirectory(analyzer, docs); IndexReader reader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(reader); ConcordanceSearcher searcher = new ConcordanceSearcher( new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD))); Query q = new TermQuery(new Term("_" + FIELD, "a")); int windowCount = -1; ConcordanceWindowCollector collector = new ConcordanceWindowCollector(10); searcher.search(indexSearcher, FIELD, q, null, analyzer, collector); windowCount = collector.size();//from w w w . j a v a2 s . c om assertEquals(0, windowCount); reader.close(); directory.close(); }
From source file:org.tallison.lucene.search.concordance.TestConcordanceSearcher.java
License:Apache License
@Test public void testUniqueCollector() throws Exception { String[] docs = new String[] { "a b c d c b a", "a B C d c b a", "a b C d C B a", "a b c d C B A", "e f g d g f e", "h i j d j i h" }; Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET); Directory directory = getDirectory(analyzer, docs); IndexReader reader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(reader); ConcordanceSearcher searcher = new ConcordanceSearcher( new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD))); SpanQuery q = new SpanTermQuery(new Term(FIELD, "d")); DedupingConcordanceWindowCollector collector = new DedupingConcordanceWindowCollector(2); searcher.search(indexSearcher, FIELD, (Query) q, null, analyzer, collector); assertEquals(2, collector.size());/*from ww w . jav a 2s .c om*/ collector = new DedupingConcordanceWindowCollector(AbstractConcordanceWindowCollector.COLLECT_ALL); searcher.search(indexSearcher, FIELD, (Query) q, null, analyzer, collector); assertEquals(3, collector.size()); reader.close(); directory.close(); }
From source file:org.tallison.lucene.search.concordance.TestConcordanceSearcher.java
License:Apache License
@Test public void testUniqueCollectorWithSameWindowOverflow() throws Exception { String[] docs = new String[] { "a b c d c b a", "a b c d c b a", "a b c d c b a", "a b c d c b a", "e f g d g f e", "h i j d j i h" }; Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET); Directory directory = getDirectory(analyzer, docs); IndexReader reader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(reader); ConcordanceSearcher searcher = new ConcordanceSearcher( new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD))); SpanQuery q = new SpanTermQuery(new Term(FIELD, "d")); DedupingConcordanceWindowCollector collector = new DedupingConcordanceWindowCollector(3); searcher.search(indexSearcher, FIELD, (Query) q, null, analyzer, collector); assertEquals(3, collector.size());/* w w w . j ava2 s . c om*/ assertEquals(4, collector.getSortedWindows().get(0).getCount()); reader.close(); directory.close(); }