Example usage for org.apache.lucene.util BytesRefHash add

List of usage examples for org.apache.lucene.util BytesRefHash add

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRefHash add.

Prototype

public int add(BytesRef bytes) 

Source Link

Document

Adds a new BytesRef

Usage

From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProvider.java

License:Open Source License

@SuppressFBWarnings({ "EXS_EXCEPTION_SOFTENING_NO_CONSTRAINTS", "EXS_EXCEPTION_SOFTENING_NO_CHECKED" })
@Override//from w w w. j a  v  a2 s  . c  om
public Stream<BytesRef> getDocumentTerms(final int docId, @NotNull final String... field) {
    Arrays.sort(field);
    final Fields fields;
    try {
        fields = this.index.reader.getTermVectors(docId);
    } catch (final IOException e) {
        throw new UncheckedIOException(e);
    }

    if (fields == null) {
        return Stream.empty();
    }

    final BytesRefHash terms = new BytesRefHash();
    StreamSupport.stream(fields.spliterator(), false)
            // filter for required fields
            .filter(fn -> Arrays.binarySearch(field, fn) >= 0).map(fn -> {
                try {
                    return fields.terms(fn);
                } catch (final IOException e) {
                    throw new UncheckedIOException(e);
                }
            }).filter(t -> t != null).forEach(t -> {
                try {
                    final TermsEnum te = t.iterator(null);
                    BytesRef term;
                    while ((term = te.next()) != null) {
                        terms.add(term);
                    }
                } catch (final IOException e) {
                    throw new UncheckedIOException(e);
                }
            });

    return StreamUtils.stream(terms);
}

From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProvider.java

License:Open Source License

@SuppressFBWarnings("EXS_EXCEPTION_SOFTENING_NO_CONSTRAINTS")
@Override/* w  w  w  . ja v a  2 s . c  om*/
@NotNull
public Stream<BytesRef> getDocumentsTerms(@NotNull final DocIdSet docIds) {
    try {
        return StreamUtils.stream(docIds).mapToObj(docId -> {
            try {
                return this.index.reader.getTermVectors(docId);
            } catch (final IOException e) {
                throw new UncheckedIOException(e);
            }
        }).filter(f -> f != null).map(f -> {
            final BytesRefHash terms = new BytesRefHash();
            StreamSupport.stream(f.spliterator(), false).map(fn -> {
                try {
                    return f.terms(fn);
                } catch (final IOException e) {
                    throw new UncheckedIOException(e);
                }
            }).filter(t -> t != null).forEach(t -> {
                try {
                    final TermsEnum te = t.iterator(null);
                    BytesRef term;
                    while ((term = te.next()) != null) {
                        terms.add(term);
                    }
                } catch (final IOException e) {
                    throw new UncheckedIOException(e);
                }
            });
            return terms;
        }).collect(MergingBytesRefHash::new, MergingBytesRefHash::addAll, MergingBytesRefHash::addAll).stream();
    } catch (final IOException e) {
        throw new UncheckedIOException(e);
    }
}

From source file:de.unihildesheim.iw.lucene.util.StreamUtilsTest.java

License:Open Source License

@Test
public void testStream_bytesRefHash() throws Exception {
    final BytesRefHash brh = new BytesRefHash();
    brh.add(new BytesRef("foo"));
    brh.add(new BytesRef("bar"));
    brh.add(new BytesRef("baz"));

    Assert.assertEquals("Not all terms streamed.", 3L, StreamUtils.stream(brh).count());

    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(brh).filter(br -> br.bytesEquals(new BytesRef("foo"))).count());
    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(brh).filter(br -> br.bytesEquals(new BytesRef("bar"))).count());
    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(brh).filter(br -> br.bytesEquals(new BytesRef("baz"))).count());

    Assert.assertEquals("Unknown term found.", 0L,
            StreamUtils/*from   ww w .j a v  a 2s  .c  o m*/
                    .stream(brh).filter(t -> !t.bytesEquals(new BytesRef("foo"))
                            && !t.bytesEquals(new BytesRef("bar")) && !t.bytesEquals(new BytesRef("baz")))
                    .count());
}

From source file:org.elasticsearch.common.util.BytesRefHashTests.java

License:Apache License

private void assertAllIn(Set<String> strings, BytesRefHash hash) {
    BytesRef ref = new BytesRef();
    BytesRef scratch = new BytesRef();
    long count = hash.size();
    for (String string : strings) {
        ref.copyChars(string);/* w w w  .  ja  v a  2  s .  com*/
        long key = hash.add(ref); // add again to check duplicates
        assertEquals(string, hash.get((-key) - 1, scratch).utf8ToString());
        assertEquals(count, hash.size());
        assertTrue("key: " + key + " count: " + count + " string: " + string, key < count);
    }
}

From source file:uk.co.flax.luwak.presearcher.TermFilteredPresearcher.java

License:Apache License

protected BytesRefHash buildTermsHash(String field, LeafReader reader) throws IOException {
    BytesRefHash terms = new BytesRefHash();
    Terms t = reader.terms(field);/*  w w  w. j  a v a 2 s  . c o  m*/
    if (t == null) {
        return terms;
    }
    TermsEnum te = t.iterator();
    BytesRef term;
    while ((term = te.next()) != null) {
        terms.add(term);
    }
    return terms;
}

From source file:uk.co.flax.luwak.presearcher.TermFilteredPresearcher.java

License:Apache License

protected Map<String, BytesRefHash> collectTerms(QueryTree tree) {

    Map<String, BytesRefHash> fieldTerms = new HashMap<>();

    for (QueryTerm queryTerm : extractor.collectTerms(tree)) {
        if (queryTerm.type.equals(QueryTerm.Type.ANY)) {
            if (!fieldTerms.containsKey(ANYTOKEN_FIELD)) {
                BytesRefHash hash = new BytesRefHash();
                hash.add(new BytesRef(ANYTOKEN));
                fieldTerms.put(ANYTOKEN_FIELD, hash);
            }/*from w  ww  .  j a  va 2  s .c  o  m*/
        } else {
            if (!fieldTerms.containsKey(queryTerm.term.field()))
                fieldTerms.put(queryTerm.term.field(), new BytesRefHash());

            BytesRefHash termslist = fieldTerms.get(queryTerm.term.field());
            if (queryTerm.type.equals(QueryTerm.Type.EXACT)) {
                termslist.add(queryTerm.term.bytes());
            } else {
                termslist.add(queryTerm.term.bytes());
                for (PresearcherComponent component : components) {
                    BytesRef extratoken = component.extraToken(queryTerm);
                    if (extratoken != null)
                        termslist.add(extratoken);
                }
            }
        }
    }

    return fieldTerms;
}

From source file:uk.co.flax.luwak.QueryTermFilter.java

License:Apache License

/**
 * Create a QueryTermFilter for an IndexReader
 * @param reader the {@link IndexReader}
 * @throws IOException on error//from   w  ww. j  a va2  s  .c  o m
 */
public QueryTermFilter(IndexReader reader) throws IOException {
    LeafReader leafReader = SlowCompositeReaderWrapper.wrap(reader);
    for (String field : leafReader.fields()) {
        BytesRefHash terms = new BytesRefHash();
        Terms t = leafReader.terms(field);
        if (t != null) {
            TermsEnum te = t.iterator();
            BytesRef term;
            while ((term = te.next()) != null) {
                terms.add(term);
            }
        }
        termsHash.put(field, terms);
    }
}