Example usage for org.apache.lucene.util BytesRef BytesRef

List of usage examples for org.apache.lucene.util BytesRef BytesRef

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef BytesRef.

Prototype

public BytesRef(CharSequence text) 

Source Link

Document

Initialize the byte[] from the UTF8 bytes for the provided String.

Usage

From source file:de.unihildesheim.iw.lucene.document.DocumentModelTest.java

License:Open Source License

@Test
public void testWriteObject() throws Exception {
    final ByteArrayOutputStream bouts = new ByteArrayOutputStream();
    @SuppressWarnings("resource")
    final ObjectOutputStream out = new ObjectOutputStream(bouts);

    final Builder dmb = new Builder(1);
    dmb.setTermFrequency(new BytesRef("foo"), 12L);
    dmb.setTermFrequency(new BytesRef("bar"), 4L);
    dmb.setTermFrequency(new BytesRef("baz"), 32L);
    final DocumentModel dm = dmb.build();
    out.writeObject(dm);/*w ww  .  j a  v  a2 s.c om*/
    out.close();
}

From source file:de.unihildesheim.iw.lucene.document.DocumentModelTest.java

License:Open Source License

@SuppressWarnings("ImplicitNumericConversion")
@Test/*from ww w.j a va  2 s.com*/
public void testReadObject() throws Exception {
    final ByteArrayOutputStream bouts = new ByteArrayOutputStream();
    @SuppressWarnings({ "resource", "TypeMayBeWeakened" })
    final ObjectOutputStream out = new ObjectOutputStream(bouts);

    // document id
    final int dom_id = 1;
    // number of unique terms
    final int dom_termCount = 3;
    // total term frequency
    final long dom_tf = 48L;

    final Builder dmb = new Builder(dom_id);
    dmb.setTermFrequency(new BytesRef("foo"), 12L);
    dmb.setTermFrequency(new BytesRef("bar"), 4L);
    dmb.setTermFrequency(new BytesRef("baz"), 32L);
    final DocumentModel ref = dmb.build();
    out.writeObject(ref);
    final byte[] bytes = bouts.toByteArray();
    out.close();

    final ByteArrayInputStream bins = new ByteArrayInputStream(bytes);
    @SuppressWarnings({ "TypeMayBeWeakened", "resource" })
    final ObjectInputStream in = new ObjectInputStream(bins);
    final DocumentModel dom = (DocumentModel) in.readObject();
    in.close();

    Assert.assertEquals("Document id differs.", dom_id, dom.id);
    Assert.assertEquals("Unique term count differs.", dom_termCount, dom.termCount());

    Assert.assertEquals("Term frequency value differs.", 12L, dom.tf(new BytesRef("foo")));
    Assert.assertEquals("Term frequency value differs.", 4L, dom.tf(new BytesRef("bar")));
    Assert.assertEquals("Term frequency value differs.", 32L, dom.tf(new BytesRef("baz")));

    Assert.assertEquals("Total term frequency differs.", dom_tf, dom.tf());

    Assert.assertEquals("Relative term frequency value differs.", (double) 12L / (double) dom_tf,
            dom.relTf(new BytesRef("foo")), 0d);
    Assert.assertEquals("Relative term frequency value differs.", (double) 4L / (double) dom_tf,
            dom.relTf(new BytesRef("bar")), 0d);
    Assert.assertEquals("Relative term frequency value differs.", (double) 32L / (double) dom_tf,
            dom.relTf(new BytesRef("baz")), 0d);

    Assert.assertEquals("Equals returns false.", ref, dom);
}

From source file:de.unihildesheim.iw.lucene.index.AcceptAllTermFilterTest.java

License:Open Source License

@Test
public void testIsAccepted() throws Exception {
    final AcceptAll aaTf = new AcceptAll();

    Assert.assertTrue("Term not accepted.", aaTf.isAccepted(null, new BytesRef("foo")));
    Assert.assertTrue("Term not accepted.", aaTf.isAccepted(null, new BytesRef("bar")));
    Assert.assertTrue("Term not accepted.", aaTf.isAccepted(null, new BytesRef("1")));
    Assert.assertTrue("Term not accepted.", aaTf.isAccepted(null, new BytesRef("")));
}

From source file:de.unihildesheim.iw.lucene.index.CommonTermsTermFilterTest.java

License:Open Source License

@Test
public void testIsAccepted() throws Exception {
    try (TestIndex idx = new TestIndex()) {
        final IndexReader r = idx.getReader();
        CommonTerms ctf;//from   w ww  .j a v  a  2  s. co m
        ctf = new CommonTerms(0.99);
        ctf.setTopReader(r);

        Assert.assertFalse("Term was accepted.", ctf.isAccepted(null, new BytesRef("value")));
        Assert.assertTrue("Term was not accepted.", ctf.isAccepted(null, new BytesRef("document1")));

        ctf = new CommonTerms(0.34);
        ctf.setTopReader(r);
        Assert.assertFalse("Term was accepted.", ctf.isAccepted(null, new BytesRef("value")));
        Assert.assertTrue("Term was not accepted.", ctf.isAccepted(null, new BytesRef("document1")));

        ctf = new CommonTerms(1d);
        ctf.setTopReader(r);
        Assert.assertTrue("Term was not accepted.", ctf.isAccepted(null, new BytesRef("document1")));
    }
}

From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProvider.java

License:Open Source License

/**
 * Create instance by using {@link Builder}.
 *
 * @param builder Builder instance//from  w ww.  j a  v  a2 s  . c  o m
 * @throws IOException Thrown on low-level I/O-errors
 */
@SuppressWarnings("WeakerAccess")
FDRIndexDataProvider(final Builder builder) throws IOException {
    // first initialize the Lucene index
    assert builder.idxReader != null;

    LOG.info("Initializing index & gathering base data..");
    this.index = new LuceneIndex(builder.idxReader);

    if (LOG.isDebugEnabled()) {
        LOG.debug("index.TTF {} index.UT {}", this.index.ttf, this.index.uniqueTerms);
        LOG.debug("TTF (abwasserreinigungsstuf): {}", getTermFrequency(new BytesRef("abwasserreinigungsstuf")));
    }
}

From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProviderTest.java

License:Open Source License

@Test
public void testGetTermFrequency_term() throws Exception {
    try (TestMemIndex idx = new TestMemIndex(Index.TVECTORS)) {
        final DirectoryReader reader = DirectoryReader.open(idx.dir);
        final FilteredDirectoryReader idxReader = new FilteredDirectoryReader.Builder(reader).build();
        final IndexDataProvider idp = new FDRIndexDataProvider.Builder().indexReader(idxReader).build();

        Assert.assertEquals("Term frequency mismatch.", 9L, idp.getTermFrequency(new BytesRef("value")));
        Assert.assertEquals("Term frequency mismatch.", 3L, idp.getTermFrequency(new BytesRef("document1")));
        Assert.assertEquals("Term frequency mismatch.", 3L, idp.getTermFrequency(new BytesRef("document2")));
        Assert.assertEquals("Term frequency mismatch.", 3L, idp.getTermFrequency(new BytesRef("document3")));
    }//from  ww w  .j av  a  2  s .co  m
}

From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProviderTest.java

License:Open Source License

@SuppressWarnings("ImplicitNumericConversion")
@Test//from www  .j ava  2  s  . c om
public void testGetDocumentFrequency() throws Exception {
    try (TestMemIndex idx = new TestMemIndex(Index.TVECTORS)) {
        final DirectoryReader reader = DirectoryReader.open(idx.dir);
        final FilteredDirectoryReader idxReader = new FilteredDirectoryReader.Builder(reader).build();
        final IndexDataProvider idp = new FDRIndexDataProvider.Builder().indexReader(idxReader).build();

        Assert.assertEquals("Document frequency mismatch.", 3L,
                idp.getDocumentFrequency(new BytesRef("value")));
        Assert.assertEquals("Document frequency mismatch.", 1L,
                idp.getDocumentFrequency(new BytesRef("document1")));
        Assert.assertEquals("Document frequency mismatch.", 1L,
                idp.getDocumentFrequency(new BytesRef("document2")));
        Assert.assertEquals("Document frequency mismatch.", 1L,
                idp.getDocumentFrequency(new BytesRef("document3")));
    }
}

From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProviderTest.java

License:Open Source License

@SuppressWarnings("ImplicitNumericConversion")
@Test/*  w w w.  j  a v  a  2 s .com*/
public void testGetDocumentModel() throws Exception {
    try (TestMemIndex idx = new TestMemIndex(Index.TVECTORS)) {
        final DirectoryReader reader = DirectoryReader.open(idx.dir);
        final FilteredDirectoryReader idxReader = new FilteredDirectoryReader.Builder(reader).build();
        final IndexDataProvider idp = new FDRIndexDataProvider.Builder().indexReader(idxReader).build();

        final DocumentModel dm1 = idp.getDocumentModel(0);

        Assert.assertEquals("DocumentModel tf value differs.", 3L, dm1.tf(new BytesRef("document1")));
        Assert.assertEquals("DocumentModel rel-tf value differs.", 3d / 18d,
                dm1.relTf(new BytesRef("document1")), 0d);
        Assert.assertEquals("DocumentModel id differs.", 0L, dm1.id);
        Assert.assertEquals("DocumentModel term count differs.", 12L, dm1.termCount());
    }
}

From source file:de.unihildesheim.iw.lucene.index.FilteredDirectoryReaderTest.java

License:Open Source License

/**
 * Test basic {@link TermFilter} usage./*from  www .  jav a 2s  . co  m*/
 *
 * @throws Exception
 */
@SuppressWarnings({ "AnonymousInnerClassMayBeStatic", "ImplicitNumericConversion" })
@Test
public void testBuilder_termFilter() throws Exception {
    try (TestMemIndex idx = new TestMemIndex(Index.PLAIN)) {
        final String skipTerm = "first";
        final DirectoryReader reader = DirectoryReader.open(idx.dir);
        final FilteredDirectoryReader fReader = new Builder(reader).termFilter(new TermFilter() {
            @Override
            public boolean isAccepted(@Nullable final TermsEnum termsEnum, @NotNull final BytesRef term) {
                return !skipTerm.equals(term.utf8ToString());
            }
        }).build();

        new LeafReaderInstanceTest() {

            @Override
            void testHasDeletions() throws Exception {
                Assert.assertFalse("Reader has deletions.", fReader.hasDeletions());
            }

            @Override
            void testFieldCount() throws Exception {
                Assert.assertEquals("Field count mismatch.", idx.flds.size(), fReader.getFields().size());
            }

            @Override
            void testFieldNames() throws Exception {
                Assert.assertTrue("Visible field not found.", fReader.getFields().containsAll(idx.flds));
            }

            @Override
            void testTotalTermFreq() throws Exception {
                Assert.assertEquals("TotalTermFreq mismatch for visible term.", idx.docs,
                        fReader.totalTermFreq(new Term("f1", "field")));
                Assert.assertEquals("TotalTermFreq mismatch for missing term.", 0L,
                        fReader.totalTermFreq(new Term("f1", "foo")));
                Assert.assertEquals("TotalTermFreq mismatch for hidden term.", 0L,
                        fReader.totalTermFreq(new Term("f1", "first")));
            }

            @Override
            void testSumTotalTermFreq() throws Exception {
                Assert.assertEquals("SumTotalTermFreq mismatch for visible term.", 14L,
                        fReader.getSumTotalTermFreq("f1"));
            }

            @Override
            void testDocCount() throws Exception {
                Assert.assertEquals("Doc count mismatch.", idx.docs, fReader.getDocCount("f1"));
            }

            @SuppressWarnings("ObjectAllocationInLoop")
            @Override
            void testDocFreq() throws Exception {
                for (final String f : idx.flds) {
                    Assert.assertEquals("Missing term from all documents.", idx.docs,
                            fReader.docFreq(new Term(f, "value")));
                    Assert.assertEquals("Found hidden term.", 0L, fReader.docFreq(new Term(f, "first")));
                }
            }

            @Override
            void testSumDocFreq() throws Exception {
                Assert.assertEquals("SumDocFreq mismatch for visible term.", 14L, fReader.getSumDocFreq("f1"));
            }

            @Override
            void testTermVectors() throws Exception {
                final BytesRef term = new BytesRef("first");
                for (int i = 0; i < idx.docs - 1; i++) {
                    final Fields f = fReader.getTermVectors(i);
                    Assert.assertEquals("Too much fields retrieved from TermVector.", 1L, f.size());
                    final TermsEnum te = f.terms("f1").iterator(null);
                    Assert.assertFalse("Hidden term found.", te.seekExact(term));
                }
            }

            @Override
            void testNumDocs() throws Exception {
                Assert.assertEquals("NumDocs mismatch.", idx.docs, fReader.numDocs());
            }

            @Override
            void testMaxDoc() throws Exception {
                Assert.assertEquals("MaxDoc mismatch.", idx.docs, fReader.maxDoc());
            }
        };
    }
}

From source file:de.unihildesheim.iw.lucene.index.FilteredDirectoryReaderTest.java

License:Open Source License

/**
 * Test {@link Filter} usage in combination with {@link TermFilter}
 * restriction.//w  w w  . j av a 2s .  c  o  m
 *
 * @throws Exception
 */
@SuppressWarnings({ "AnonymousInnerClassMayBeStatic", "ImplicitNumericConversion" })
@Test
public void testBuilder_filter_and_termFilter() throws Exception {
    try (TestMemIndex idx = new TestMemIndex(Index.ALL_FIELDS)) {
        final String skipTerm = "document2field3";
        final Query q = new TermQuery(new Term("f1", "document2field1"));
        final Filter f = new QueryWrapperFilter(q);
        final DirectoryReader reader = DirectoryReader.open(idx.dir);
        final FilteredDirectoryReader fReader = new Builder(reader).queryFilter(f).termFilter(new TermFilter() {
            @Override
            public boolean isAccepted(@Nullable final TermsEnum termsEnum, @NotNull final BytesRef term) {
                return !skipTerm.equals(term.utf8ToString());
            }
        }).build();

        new LeafReaderInstanceTest() {

            @Override
            void testHasDeletions() throws Exception {
                Assert.assertFalse("Reader has deletions.", fReader.hasDeletions());
            }

            @Override
            void testFieldCount() throws Exception {
                Assert.assertEquals("Field count mismatch.", 3L, fReader.getFields().size());
            }

            @Override
            void testFieldNames() throws Exception {
                for (final String fld : idx.flds) {
                    Assert.assertTrue("Visible field not found.", fReader.getFields().contains(fld));
                }
            }

            @Override
            void testTotalTermFreq() throws Exception {
                Assert.assertEquals("TotalTermFreq mismatch for visible term.", 1L,
                        fReader.totalTermFreq(new Term("f1", "field1")));
                Assert.assertEquals("TotalTermFreq mismatch for visible term.", 1L,
                        fReader.totalTermFreq(new Term("f2", "field2")));
                Assert.assertEquals("TotalTermFreq mismatch for visible term.", 1L,
                        fReader.totalTermFreq(new Term("f3", "field3")));
                Assert.assertEquals("TotalTermFreq mismatch for hidden term.", 0L,
                        fReader.totalTermFreq(new Term("f3", "document2field3")));
            }

            @Override
            void testSumTotalTermFreq() throws Exception {
                Assert.assertEquals("SumTotalTermFreq mismatch for visible terms.", 6L,
                        fReader.getSumTotalTermFreq("f2"));
            }

            @Override
            void testDocCount() throws Exception {
                for (final String fld : idx.flds) {
                    Assert.assertEquals("Doc count mismatch.", 1L, fReader.getDocCount(fld));
                }
            }

            @SuppressWarnings("ObjectAllocationInLoop")
            @Override
            void testDocFreq() throws Exception {
                Assert.assertEquals("Missing term from visible document.", 1L,
                        fReader.docFreq(new Term("f2", "value")));
                Assert.assertEquals("Hidden term found.", 0L,
                        fReader.docFreq(new Term("f1", "document1field1")));
                Assert.assertEquals("Hidden term found.", 0L,
                        fReader.docFreq(new Term("f3", "document2field3")));
            }

            @Override
            void testSumDocFreq() throws Exception {
                Assert.assertEquals("SumDocFreq mismatch for visible term.", 6L, fReader.getSumDocFreq("f2"));
                Assert.assertEquals("SumDocFreq mismatch for visible term.", 5L, fReader.getSumDocFreq("f3"));
            }

            @Override
            void testTermVectors() throws Exception {
                boolean match = false;
                final BytesRef term = new BytesRef(skipTerm);
                for (int i = 0; i < fReader.maxDoc(); i++) {
                    final Fields fld = fReader.getTermVectors(i);
                    if (fld != null) {
                        match = true;
                        Assert.assertEquals("Number of fields retrieved from TermVector do not match.", 3L,
                                fld.size());
                        final Terms t = fld.terms("f3");
                        if (t != null) {
                            final TermsEnum te = t.iterator(null);
                            Assert.assertFalse("Hidden term found.", te.seekExact(term));
                        }
                    }
                }
                Assert.assertTrue("Fields not found.", match);
            }

            @Override
            void testNumDocs() throws Exception {
                Assert.assertEquals("NumDocs mismatch.", 1L, fReader.numDocs());
            }

            @Override
            void testMaxDoc() throws Exception {
                Assert.assertEquals("MaxDoc mismatch.", 2L, fReader.maxDoc());
            }
        };
    }
}