List of usage examples for org.apache.lucene.util BytesRefHash add
public int add(BytesRef bytes)
From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProvider.java
License:Open Source License
@SuppressFBWarnings({ "EXS_EXCEPTION_SOFTENING_NO_CONSTRAINTS", "EXS_EXCEPTION_SOFTENING_NO_CHECKED" })
@Override//from w w w. j a v a2 s . c om
public Stream<BytesRef> getDocumentTerms(final int docId, @NotNull final String... field) {
Arrays.sort(field);
final Fields fields;
try {
fields = this.index.reader.getTermVectors(docId);
} catch (final IOException e) {
throw new UncheckedIOException(e);
}
if (fields == null) {
return Stream.empty();
}
final BytesRefHash terms = new BytesRefHash();
StreamSupport.stream(fields.spliterator(), false)
// filter for required fields
.filter(fn -> Arrays.binarySearch(field, fn) >= 0).map(fn -> {
try {
return fields.terms(fn);
} catch (final IOException e) {
throw new UncheckedIOException(e);
}
}).filter(t -> t != null).forEach(t -> {
try {
final TermsEnum te = t.iterator(null);
BytesRef term;
while ((term = te.next()) != null) {
terms.add(term);
}
} catch (final IOException e) {
throw new UncheckedIOException(e);
}
});
return StreamUtils.stream(terms);
}
From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProvider.java
License:Open Source License
@SuppressFBWarnings("EXS_EXCEPTION_SOFTENING_NO_CONSTRAINTS") @Override/* w w w . ja v a 2 s . c om*/ @NotNull public Stream<BytesRef> getDocumentsTerms(@NotNull final DocIdSet docIds) { try { return StreamUtils.stream(docIds).mapToObj(docId -> { try { return this.index.reader.getTermVectors(docId); } catch (final IOException e) { throw new UncheckedIOException(e); } }).filter(f -> f != null).map(f -> { final BytesRefHash terms = new BytesRefHash(); StreamSupport.stream(f.spliterator(), false).map(fn -> { try { return f.terms(fn); } catch (final IOException e) { throw new UncheckedIOException(e); } }).filter(t -> t != null).forEach(t -> { try { final TermsEnum te = t.iterator(null); BytesRef term; while ((term = te.next()) != null) { terms.add(term); } } catch (final IOException e) { throw new UncheckedIOException(e); } }); return terms; }).collect(MergingBytesRefHash::new, MergingBytesRefHash::addAll, MergingBytesRefHash::addAll).stream(); } catch (final IOException e) { throw new UncheckedIOException(e); } }
From source file:de.unihildesheim.iw.lucene.util.StreamUtilsTest.java
License:Open Source License
@Test public void testStream_bytesRefHash() throws Exception { final BytesRefHash brh = new BytesRefHash(); brh.add(new BytesRef("foo")); brh.add(new BytesRef("bar")); brh.add(new BytesRef("baz")); Assert.assertEquals("Not all terms streamed.", 3L, StreamUtils.stream(brh).count()); Assert.assertEquals("Term not found.", 1L, StreamUtils.stream(brh).filter(br -> br.bytesEquals(new BytesRef("foo"))).count()); Assert.assertEquals("Term not found.", 1L, StreamUtils.stream(brh).filter(br -> br.bytesEquals(new BytesRef("bar"))).count()); Assert.assertEquals("Term not found.", 1L, StreamUtils.stream(brh).filter(br -> br.bytesEquals(new BytesRef("baz"))).count()); Assert.assertEquals("Unknown term found.", 0L, StreamUtils/*from ww w .j a v a 2s .c o m*/ .stream(brh).filter(t -> !t.bytesEquals(new BytesRef("foo")) && !t.bytesEquals(new BytesRef("bar")) && !t.bytesEquals(new BytesRef("baz"))) .count()); }
From source file:org.elasticsearch.common.util.BytesRefHashTests.java
License:Apache License
private void assertAllIn(Set<String> strings, BytesRefHash hash) { BytesRef ref = new BytesRef(); BytesRef scratch = new BytesRef(); long count = hash.size(); for (String string : strings) { ref.copyChars(string);/* w w w . ja v a 2 s . com*/ long key = hash.add(ref); // add again to check duplicates assertEquals(string, hash.get((-key) - 1, scratch).utf8ToString()); assertEquals(count, hash.size()); assertTrue("key: " + key + " count: " + count + " string: " + string, key < count); } }
From source file:uk.co.flax.luwak.presearcher.TermFilteredPresearcher.java
License:Apache License
protected BytesRefHash buildTermsHash(String field, LeafReader reader) throws IOException { BytesRefHash terms = new BytesRefHash(); Terms t = reader.terms(field);/* w w w. j a v a 2 s . c o m*/ if (t == null) { return terms; } TermsEnum te = t.iterator(); BytesRef term; while ((term = te.next()) != null) { terms.add(term); } return terms; }
From source file:uk.co.flax.luwak.presearcher.TermFilteredPresearcher.java
License:Apache License
protected Map<String, BytesRefHash> collectTerms(QueryTree tree) { Map<String, BytesRefHash> fieldTerms = new HashMap<>(); for (QueryTerm queryTerm : extractor.collectTerms(tree)) { if (queryTerm.type.equals(QueryTerm.Type.ANY)) { if (!fieldTerms.containsKey(ANYTOKEN_FIELD)) { BytesRefHash hash = new BytesRefHash(); hash.add(new BytesRef(ANYTOKEN)); fieldTerms.put(ANYTOKEN_FIELD, hash); }/*from w ww . j a va 2 s .c o m*/ } else { if (!fieldTerms.containsKey(queryTerm.term.field())) fieldTerms.put(queryTerm.term.field(), new BytesRefHash()); BytesRefHash termslist = fieldTerms.get(queryTerm.term.field()); if (queryTerm.type.equals(QueryTerm.Type.EXACT)) { termslist.add(queryTerm.term.bytes()); } else { termslist.add(queryTerm.term.bytes()); for (PresearcherComponent component : components) { BytesRef extratoken = component.extraToken(queryTerm); if (extratoken != null) termslist.add(extratoken); } } } } return fieldTerms; }
From source file:uk.co.flax.luwak.QueryTermFilter.java
License:Apache License
/** * Create a QueryTermFilter for an IndexReader * @param reader the {@link IndexReader} * @throws IOException on error//from w ww. j a va2 s .c o m */ public QueryTermFilter(IndexReader reader) throws IOException { LeafReader leafReader = SlowCompositeReaderWrapper.wrap(reader); for (String field : leafReader.fields()) { BytesRefHash terms = new BytesRefHash(); Terms t = leafReader.terms(field); if (t != null) { TermsEnum te = t.iterator(); BytesRef term; while ((term = te.next()) != null) { terms.add(term); } } termsHash.put(field, terms); } }