List of usage examples for org.apache.lucene.util BytesRefArray iterator
public BytesRefIterator iterator()
null comparator From source file:de.unihildesheim.iw.lucene.analyzer.EnglishAnalyzerTest.java
License:Open Source License
@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test//from w ww.j a va 2 s .c o m
public void testTokenStream_noStopwords() throws Exception {
final String query = "foo bar baz bam";
final Analyzer analyzer = new EnglishAnalyzer();
final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));
try (TokenStream stream = analyzer.tokenStream(null, query)) {
stream.reset();
while (stream.incrementToken()) {
final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
if (term.length > 0) {
result.append(term);
}
}
}
Assert.assertEquals("Not all terms returned.", 4L, result.size());
final BytesRefIterator bri = result.iterator();
BytesRef term;
while ((term = bri.next()) != null) {
Assert.assertTrue("Unknown term found.",
"foo".equals(term.utf8ToString()) || "bar".equals(term.utf8ToString())
|| "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
}
}
From source file:de.unihildesheim.iw.lucene.analyzer.EnglishAnalyzerTest.java
License:Open Source License
@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test//from w w w .ja v a2 s . c om
public void testTokenStream() throws Exception {
final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true);
final String query = "foo bar baz bam";
final Analyzer analyzer = new EnglishAnalyzer(csa);
final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));
try (TokenStream stream = analyzer.tokenStream(null, query)) {
stream.reset();
while (stream.incrementToken()) {
final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
if (term.length > 0) {
result.append(term);
}
}
}
Assert.assertEquals("Not all terms returned.", 2L, result.size());
final BytesRefIterator bri = result.iterator();
BytesRef term;
while ((term = bri.next()) != null) {
Assert.assertTrue("Unknown term found.",
"baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
}
}
From source file:de.unihildesheim.iw.lucene.analyzer.FrenchAnalyzerTest.java
License:Open Source License
@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test//from w w w .j ava 2s . c o m
public void testTokenStream_noStopwords() throws Exception {
final String query = "foo bar baz bam";
final Analyzer analyzer = new FrenchAnalyzer();
final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));
try (TokenStream stream = analyzer.tokenStream(null, query)) {
stream.reset();
while (stream.incrementToken()) {
final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
if (term.length > 0) {
result.append(term);
}
}
}
Assert.assertEquals("Not all terms returned.", 4L, result.size());
final BytesRefIterator bri = result.iterator();
BytesRef term;
while ((term = bri.next()) != null) {
Assert.assertTrue("Unknown term found.",
"foo".equals(term.utf8ToString()) || "bar".equals(term.utf8ToString())
|| "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
}
}
From source file:de.unihildesheim.iw.lucene.analyzer.FrenchAnalyzerTest.java
License:Open Source License
@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test// w w w . j av a2 s.c o m
public void testTokenStream() throws Exception {
final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true);
final String query = "foo bar baz bam";
final Analyzer analyzer = new FrenchAnalyzer(csa);
final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));
try (TokenStream stream = analyzer.tokenStream(null, query)) {
stream.reset();
while (stream.incrementToken()) {
final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
if (term.length > 0) {
result.append(term);
}
}
}
Assert.assertEquals("Not all terms returned.", 2L, result.size());
final BytesRefIterator bri = result.iterator();
BytesRef term;
while ((term = bri.next()) != null) {
Assert.assertTrue("Unknown term found.",
"baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
}
}
From source file:de.unihildesheim.iw.lucene.analyzer.FrenchAnalyzerTest.java
License:Open Source License
@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test/* w w w. j av a2 s .c o m*/
public void testTokenStream_elisions() throws Exception {
final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true);
final StringBuilder query = new StringBuilder("foo bar baz bam ");
// add all elisions to the query
for (final String s : FrenchAnalyzer.DEFAULT_ELISIONS) {
query.append(s).append("\'bim ");
}
final Analyzer analyzer = new FrenchAnalyzer(csa);
final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));
try (TokenStream stream = analyzer.tokenStream(null, query.toString())) {
stream.reset();
while (stream.incrementToken()) {
final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
if (term.length > 0) {
result.append(term);
}
}
}
Assert.assertEquals("Not all terms returned.", 2L + FrenchAnalyzer.DEFAULT_ELISIONS.length, result.size());
final BytesRefIterator bri = result.iterator();
BytesRef term;
while ((term = bri.next()) != null) {
Assert.assertTrue("Unknown term found.",
"baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()) ||
// elisions should be removed from this
"bim".equals(term.utf8ToString()));
}
}
From source file:de.unihildesheim.iw.lucene.analyzer.GermanAnalyzerTest.java
License:Open Source License
@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test//from w w w . j ava 2 s. c o m
public void testTokenStream_noStopwords() throws Exception {
final String query = "foo bar baz bam";
final Analyzer analyzer = new GermanAnalyzer();
final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));
try (TokenStream stream = analyzer.tokenStream(null, query)) {
stream.reset();
while (stream.incrementToken()) {
final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
if (term.length > 0) {
result.append(term);
}
}
}
Assert.assertEquals("Not all terms returned.", 4L, result.size());
final BytesRefIterator bri = result.iterator();
BytesRef term;
while ((term = bri.next()) != null) {
Assert.assertTrue("Unknown term found.",
"foo".equals(term.utf8ToString()) || "bar".equals(term.utf8ToString())
|| "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
}
}
From source file:de.unihildesheim.iw.lucene.analyzer.GermanAnalyzerTest.java
License:Open Source License
@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test//from ww w. j a va 2 s. c om
public void testTokenStream() throws Exception {
final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true);
final String query = "foo bar baz bam";
final Analyzer analyzer = new GermanAnalyzer(csa);
final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));
try (TokenStream stream = analyzer.tokenStream(null, query)) {
stream.reset();
while (stream.incrementToken()) {
final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
if (term.length > 0) {
result.append(term);
}
}
}
Assert.assertEquals("Not all terms returned.", 2L, result.size());
final BytesRefIterator bri = result.iterator();
BytesRef term;
while ((term = bri.next()) != null) {
Assert.assertTrue("Unknown term found.",
"baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
}
}
From source file:de.unihildesheim.iw.lucene.query.QueryUtilsTest.java
License:Open Source License
@SuppressWarnings("ImplicitNumericConversion") @Test/*from w ww. jav a 2s . c o m*/ public void testTokenizeQuery_noMetrics() throws Exception { final BytesRefArray bra = QueryUtils.tokenizeQuery("foo bar baz", ANALYZER, null); Assert.assertEquals("Extracted terms count mismatch.", 3L, bra.size()); final BytesRefIterator braIt = bra.iterator(); BytesRef term; while ((term = braIt.next()) != null) { final String termStr = term.utf8ToString(); switch (termStr) { case "foo": case "bar": case "baz": break; default: Assert.fail("Unknown term found."); break; } } }
From source file:de.unihildesheim.iw.lucene.query.QueryUtilsTest.java
License:Open Source License
/** * Test tokenizing with skipping terms not present in index. * * @throws Exception/*from w w w . ja v a2s .c om*/ */ @SuppressWarnings("ImplicitNumericConversion") @Test public void testTokenizeQuery() throws Exception { try (TestMemIndex idx = new TestMemIndex()) { final IndexDataProvider idp = idx.getIdp(); final BytesRefArray bra = QueryUtils.tokenizeQuery("foo bar field baz value", ANALYZER, idp); Assert.assertEquals("Extracted terms count mismatch.", 2L, bra.size()); final BytesRefIterator braIt = bra.iterator(); BytesRef term; while ((term = braIt.next()) != null) { final String termStr = term.utf8ToString(); switch (termStr) { case "foo": case "bar": case "baz": Assert.fail("Non-index term found."); break; case "value": case "field": // pass break; default: Assert.fail("Unknown term found."); break; } } } }
From source file:de.unihildesheim.iw.lucene.util.BytesRefUtilsTest.java
License:Open Source License
@SuppressWarnings("ImplicitNumericConversion") @Test/*from ww w.j av a2 s . c om*/ public void testHashToArray() throws Exception { final Collection<String> data = new HashSet<>(3); data.add("foo"); data.add("bar"); data.add("baz"); final BytesRefHash brh = new BytesRefHash(); data.stream().map(BytesRef::new).forEach(brh::add); final BytesRefArray bra = BytesRefUtils.hashToArray(brh); Assert.assertEquals("Not all terms found.", data.size(), bra.size()); final BytesRefIterator bri = bra.iterator(); BytesRef br; while ((br = bri.next()) != null) { Assert.assertNotSame("BytesRef not found.", -1, brh.find(br)); } }