List of usage examples for org.apache.lucene.util BytesRef BytesRef
public BytesRef(CharSequence text)
From source file:de.unihildesheim.iw.lucene.document.DocumentModelTest.java
License:Open Source License
@Test public void testWriteObject() throws Exception { final ByteArrayOutputStream bouts = new ByteArrayOutputStream(); @SuppressWarnings("resource") final ObjectOutputStream out = new ObjectOutputStream(bouts); final Builder dmb = new Builder(1); dmb.setTermFrequency(new BytesRef("foo"), 12L); dmb.setTermFrequency(new BytesRef("bar"), 4L); dmb.setTermFrequency(new BytesRef("baz"), 32L); final DocumentModel dm = dmb.build(); out.writeObject(dm);/*w ww . j a v a2 s.c om*/ out.close(); }
From source file:de.unihildesheim.iw.lucene.document.DocumentModelTest.java
License:Open Source License
@SuppressWarnings("ImplicitNumericConversion") @Test/*from ww w.j a va 2 s.com*/ public void testReadObject() throws Exception { final ByteArrayOutputStream bouts = new ByteArrayOutputStream(); @SuppressWarnings({ "resource", "TypeMayBeWeakened" }) final ObjectOutputStream out = new ObjectOutputStream(bouts); // document id final int dom_id = 1; // number of unique terms final int dom_termCount = 3; // total term frequency final long dom_tf = 48L; final Builder dmb = new Builder(dom_id); dmb.setTermFrequency(new BytesRef("foo"), 12L); dmb.setTermFrequency(new BytesRef("bar"), 4L); dmb.setTermFrequency(new BytesRef("baz"), 32L); final DocumentModel ref = dmb.build(); out.writeObject(ref); final byte[] bytes = bouts.toByteArray(); out.close(); final ByteArrayInputStream bins = new ByteArrayInputStream(bytes); @SuppressWarnings({ "TypeMayBeWeakened", "resource" }) final ObjectInputStream in = new ObjectInputStream(bins); final DocumentModel dom = (DocumentModel) in.readObject(); in.close(); Assert.assertEquals("Document id differs.", dom_id, dom.id); Assert.assertEquals("Unique term count differs.", dom_termCount, dom.termCount()); Assert.assertEquals("Term frequency value differs.", 12L, dom.tf(new BytesRef("foo"))); Assert.assertEquals("Term frequency value differs.", 4L, dom.tf(new BytesRef("bar"))); Assert.assertEquals("Term frequency value differs.", 32L, dom.tf(new BytesRef("baz"))); Assert.assertEquals("Total term frequency differs.", dom_tf, dom.tf()); Assert.assertEquals("Relative term frequency value differs.", (double) 12L / (double) dom_tf, dom.relTf(new BytesRef("foo")), 0d); Assert.assertEquals("Relative term frequency value differs.", (double) 4L / (double) dom_tf, dom.relTf(new BytesRef("bar")), 0d); Assert.assertEquals("Relative term frequency value differs.", (double) 32L / (double) dom_tf, dom.relTf(new BytesRef("baz")), 0d); Assert.assertEquals("Equals returns false.", ref, dom); }
From source file:de.unihildesheim.iw.lucene.index.AcceptAllTermFilterTest.java
License:Open Source License
@Test public void testIsAccepted() throws Exception { final AcceptAll aaTf = new AcceptAll(); Assert.assertTrue("Term not accepted.", aaTf.isAccepted(null, new BytesRef("foo"))); Assert.assertTrue("Term not accepted.", aaTf.isAccepted(null, new BytesRef("bar"))); Assert.assertTrue("Term not accepted.", aaTf.isAccepted(null, new BytesRef("1"))); Assert.assertTrue("Term not accepted.", aaTf.isAccepted(null, new BytesRef(""))); }
From source file:de.unihildesheim.iw.lucene.index.CommonTermsTermFilterTest.java
License:Open Source License
@Test public void testIsAccepted() throws Exception { try (TestIndex idx = new TestIndex()) { final IndexReader r = idx.getReader(); CommonTerms ctf;//from w ww .j a v a 2 s. co m ctf = new CommonTerms(0.99); ctf.setTopReader(r); Assert.assertFalse("Term was accepted.", ctf.isAccepted(null, new BytesRef("value"))); Assert.assertTrue("Term was not accepted.", ctf.isAccepted(null, new BytesRef("document1"))); ctf = new CommonTerms(0.34); ctf.setTopReader(r); Assert.assertFalse("Term was accepted.", ctf.isAccepted(null, new BytesRef("value"))); Assert.assertTrue("Term was not accepted.", ctf.isAccepted(null, new BytesRef("document1"))); ctf = new CommonTerms(1d); ctf.setTopReader(r); Assert.assertTrue("Term was not accepted.", ctf.isAccepted(null, new BytesRef("document1"))); } }
From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProvider.java
License:Open Source License
/** * Create instance by using {@link Builder}. * * @param builder Builder instance//from w ww. j a v a2 s . c o m * @throws IOException Thrown on low-level I/O-errors */ @SuppressWarnings("WeakerAccess") FDRIndexDataProvider(final Builder builder) throws IOException { // first initialize the Lucene index assert builder.idxReader != null; LOG.info("Initializing index & gathering base data.."); this.index = new LuceneIndex(builder.idxReader); if (LOG.isDebugEnabled()) { LOG.debug("index.TTF {} index.UT {}", this.index.ttf, this.index.uniqueTerms); LOG.debug("TTF (abwasserreinigungsstuf): {}", getTermFrequency(new BytesRef("abwasserreinigungsstuf"))); } }
From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProviderTest.java
License:Open Source License
@Test public void testGetTermFrequency_term() throws Exception { try (TestMemIndex idx = new TestMemIndex(Index.TVECTORS)) { final DirectoryReader reader = DirectoryReader.open(idx.dir); final FilteredDirectoryReader idxReader = new FilteredDirectoryReader.Builder(reader).build(); final IndexDataProvider idp = new FDRIndexDataProvider.Builder().indexReader(idxReader).build(); Assert.assertEquals("Term frequency mismatch.", 9L, idp.getTermFrequency(new BytesRef("value"))); Assert.assertEquals("Term frequency mismatch.", 3L, idp.getTermFrequency(new BytesRef("document1"))); Assert.assertEquals("Term frequency mismatch.", 3L, idp.getTermFrequency(new BytesRef("document2"))); Assert.assertEquals("Term frequency mismatch.", 3L, idp.getTermFrequency(new BytesRef("document3"))); }//from ww w .j av a 2 s .co m }
From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProviderTest.java
License:Open Source License
@SuppressWarnings("ImplicitNumericConversion") @Test//from www .j ava 2 s . c om public void testGetDocumentFrequency() throws Exception { try (TestMemIndex idx = new TestMemIndex(Index.TVECTORS)) { final DirectoryReader reader = DirectoryReader.open(idx.dir); final FilteredDirectoryReader idxReader = new FilteredDirectoryReader.Builder(reader).build(); final IndexDataProvider idp = new FDRIndexDataProvider.Builder().indexReader(idxReader).build(); Assert.assertEquals("Document frequency mismatch.", 3L, idp.getDocumentFrequency(new BytesRef("value"))); Assert.assertEquals("Document frequency mismatch.", 1L, idp.getDocumentFrequency(new BytesRef("document1"))); Assert.assertEquals("Document frequency mismatch.", 1L, idp.getDocumentFrequency(new BytesRef("document2"))); Assert.assertEquals("Document frequency mismatch.", 1L, idp.getDocumentFrequency(new BytesRef("document3"))); } }
From source file:de.unihildesheim.iw.lucene.index.FDRIndexDataProviderTest.java
License:Open Source License
@SuppressWarnings("ImplicitNumericConversion") @Test/* w w w. j a v a 2 s .com*/ public void testGetDocumentModel() throws Exception { try (TestMemIndex idx = new TestMemIndex(Index.TVECTORS)) { final DirectoryReader reader = DirectoryReader.open(idx.dir); final FilteredDirectoryReader idxReader = new FilteredDirectoryReader.Builder(reader).build(); final IndexDataProvider idp = new FDRIndexDataProvider.Builder().indexReader(idxReader).build(); final DocumentModel dm1 = idp.getDocumentModel(0); Assert.assertEquals("DocumentModel tf value differs.", 3L, dm1.tf(new BytesRef("document1"))); Assert.assertEquals("DocumentModel rel-tf value differs.", 3d / 18d, dm1.relTf(new BytesRef("document1")), 0d); Assert.assertEquals("DocumentModel id differs.", 0L, dm1.id); Assert.assertEquals("DocumentModel term count differs.", 12L, dm1.termCount()); } }
From source file:de.unihildesheim.iw.lucene.index.FilteredDirectoryReaderTest.java
License:Open Source License
/** * Test basic {@link TermFilter} usage./*from www . jav a 2s . co m*/ * * @throws Exception */ @SuppressWarnings({ "AnonymousInnerClassMayBeStatic", "ImplicitNumericConversion" }) @Test public void testBuilder_termFilter() throws Exception { try (TestMemIndex idx = new TestMemIndex(Index.PLAIN)) { final String skipTerm = "first"; final DirectoryReader reader = DirectoryReader.open(idx.dir); final FilteredDirectoryReader fReader = new Builder(reader).termFilter(new TermFilter() { @Override public boolean isAccepted(@Nullable final TermsEnum termsEnum, @NotNull final BytesRef term) { return !skipTerm.equals(term.utf8ToString()); } }).build(); new LeafReaderInstanceTest() { @Override void testHasDeletions() throws Exception { Assert.assertFalse("Reader has deletions.", fReader.hasDeletions()); } @Override void testFieldCount() throws Exception { Assert.assertEquals("Field count mismatch.", idx.flds.size(), fReader.getFields().size()); } @Override void testFieldNames() throws Exception { Assert.assertTrue("Visible field not found.", fReader.getFields().containsAll(idx.flds)); } @Override void testTotalTermFreq() throws Exception { Assert.assertEquals("TotalTermFreq mismatch for visible term.", idx.docs, fReader.totalTermFreq(new Term("f1", "field"))); Assert.assertEquals("TotalTermFreq mismatch for missing term.", 0L, fReader.totalTermFreq(new Term("f1", "foo"))); Assert.assertEquals("TotalTermFreq mismatch for hidden term.", 0L, fReader.totalTermFreq(new Term("f1", "first"))); } @Override void testSumTotalTermFreq() throws Exception { Assert.assertEquals("SumTotalTermFreq mismatch for visible term.", 14L, fReader.getSumTotalTermFreq("f1")); } @Override void testDocCount() throws Exception { Assert.assertEquals("Doc count mismatch.", idx.docs, fReader.getDocCount("f1")); } @SuppressWarnings("ObjectAllocationInLoop") @Override void testDocFreq() throws Exception { for (final String f : idx.flds) { Assert.assertEquals("Missing term from all documents.", idx.docs, fReader.docFreq(new Term(f, "value"))); Assert.assertEquals("Found hidden term.", 0L, fReader.docFreq(new Term(f, "first"))); } } @Override void testSumDocFreq() throws Exception { Assert.assertEquals("SumDocFreq mismatch for visible term.", 14L, fReader.getSumDocFreq("f1")); } @Override void testTermVectors() throws Exception { final BytesRef term = new BytesRef("first"); for (int i = 0; i < idx.docs - 1; i++) { final Fields f = fReader.getTermVectors(i); Assert.assertEquals("Too much fields retrieved from TermVector.", 1L, f.size()); final TermsEnum te = f.terms("f1").iterator(null); Assert.assertFalse("Hidden term found.", te.seekExact(term)); } } @Override void testNumDocs() throws Exception { Assert.assertEquals("NumDocs mismatch.", idx.docs, fReader.numDocs()); } @Override void testMaxDoc() throws Exception { Assert.assertEquals("MaxDoc mismatch.", idx.docs, fReader.maxDoc()); } }; } }
From source file:de.unihildesheim.iw.lucene.index.FilteredDirectoryReaderTest.java
License:Open Source License
/** * Test {@link Filter} usage in combination with {@link TermFilter} * restriction.//w w w . j av a 2s . c o m * * @throws Exception */ @SuppressWarnings({ "AnonymousInnerClassMayBeStatic", "ImplicitNumericConversion" }) @Test public void testBuilder_filter_and_termFilter() throws Exception { try (TestMemIndex idx = new TestMemIndex(Index.ALL_FIELDS)) { final String skipTerm = "document2field3"; final Query q = new TermQuery(new Term("f1", "document2field1")); final Filter f = new QueryWrapperFilter(q); final DirectoryReader reader = DirectoryReader.open(idx.dir); final FilteredDirectoryReader fReader = new Builder(reader).queryFilter(f).termFilter(new TermFilter() { @Override public boolean isAccepted(@Nullable final TermsEnum termsEnum, @NotNull final BytesRef term) { return !skipTerm.equals(term.utf8ToString()); } }).build(); new LeafReaderInstanceTest() { @Override void testHasDeletions() throws Exception { Assert.assertFalse("Reader has deletions.", fReader.hasDeletions()); } @Override void testFieldCount() throws Exception { Assert.assertEquals("Field count mismatch.", 3L, fReader.getFields().size()); } @Override void testFieldNames() throws Exception { for (final String fld : idx.flds) { Assert.assertTrue("Visible field not found.", fReader.getFields().contains(fld)); } } @Override void testTotalTermFreq() throws Exception { Assert.assertEquals("TotalTermFreq mismatch for visible term.", 1L, fReader.totalTermFreq(new Term("f1", "field1"))); Assert.assertEquals("TotalTermFreq mismatch for visible term.", 1L, fReader.totalTermFreq(new Term("f2", "field2"))); Assert.assertEquals("TotalTermFreq mismatch for visible term.", 1L, fReader.totalTermFreq(new Term("f3", "field3"))); Assert.assertEquals("TotalTermFreq mismatch for hidden term.", 0L, fReader.totalTermFreq(new Term("f3", "document2field3"))); } @Override void testSumTotalTermFreq() throws Exception { Assert.assertEquals("SumTotalTermFreq mismatch for visible terms.", 6L, fReader.getSumTotalTermFreq("f2")); } @Override void testDocCount() throws Exception { for (final String fld : idx.flds) { Assert.assertEquals("Doc count mismatch.", 1L, fReader.getDocCount(fld)); } } @SuppressWarnings("ObjectAllocationInLoop") @Override void testDocFreq() throws Exception { Assert.assertEquals("Missing term from visible document.", 1L, fReader.docFreq(new Term("f2", "value"))); Assert.assertEquals("Hidden term found.", 0L, fReader.docFreq(new Term("f1", "document1field1"))); Assert.assertEquals("Hidden term found.", 0L, fReader.docFreq(new Term("f3", "document2field3"))); } @Override void testSumDocFreq() throws Exception { Assert.assertEquals("SumDocFreq mismatch for visible term.", 6L, fReader.getSumDocFreq("f2")); Assert.assertEquals("SumDocFreq mismatch for visible term.", 5L, fReader.getSumDocFreq("f3")); } @Override void testTermVectors() throws Exception { boolean match = false; final BytesRef term = new BytesRef(skipTerm); for (int i = 0; i < fReader.maxDoc(); i++) { final Fields fld = fReader.getTermVectors(i); if (fld != null) { match = true; Assert.assertEquals("Number of fields retrieved from TermVector do not match.", 3L, fld.size()); final Terms t = fld.terms("f3"); if (t != null) { final TermsEnum te = t.iterator(null); Assert.assertFalse("Hidden term found.", te.seekExact(term)); } } } Assert.assertTrue("Fields not found.", match); } @Override void testNumDocs() throws Exception { Assert.assertEquals("NumDocs mismatch.", 1L, fReader.numDocs()); } @Override void testMaxDoc() throws Exception { Assert.assertEquals("MaxDoc mismatch.", 2L, fReader.maxDoc()); } }; } }