List of usage examples for org.apache.lucene.util BytesRef BytesRef
public BytesRef(CharSequence text)
From source file:com.sindicetech.siren.index.codecs.block.TestAForCodec.java
License:Open Source License
@Test public void testIncompleteFrame() throws IOException { final BlockCompressor compressor = new AForBlockCompressor(); final IntsRef input = new IntsRef(64); final BytesRef output = new BytesRef(compressor.maxCompressedSize(64)); // fill first part with 1 for (int i = 0; i < 33; i++) { input.ints[i] = 1;//from ww w . ja v a2 s. c om } // fill the rest with random numbers for (int i = 33; i < 64; i++) { input.ints[i] = (int) this.nextLong(64, Short.MAX_VALUE); } input.offset = 0; input.length = 33; // the random numbers after the end of the input array should not impact // compression compressor.compress(input, output); // should be frame code 1 : 32 ints encoded with 1 bits assertEquals(1, output.bytes[0]); // followed by 4 bytes at 255 assertEquals(0xFF, output.bytes[1] & 0xFF); assertEquals(0xFF, output.bytes[2] & 0xFF); assertEquals(0xFF, output.bytes[3] & 0xFF); assertEquals(0xFF, output.bytes[4] & 0xFF); // then frame code 34 : 16 ints encoded with 1 bits assertEquals(34, output.bytes[5]); // followed by 1 byte with at least 128 and a second byte with 0 assertEquals(128, output.bytes[6] & 0x80); assertEquals(0, output.bytes[7] & 0xFF); // followed by frame code 33: 16 ints encoded with 0 bits assertEquals(33, output.bytes[8]); }
From source file:com.sindicetech.siren.index.codecs.CodecTestCase.java
License:Open Source License
private void doTest(final int[] values, final int blockSize, final BlockCompressor compressor, final BlockDecompressor decompressor) throws Exception { final BytesRef compressedData = new BytesRef(compressor.maxCompressedSize(blockSize)); final IntsRef input = new IntsRef(blockSize); final IntsRef output = new IntsRef(blockSize); for (int i = 0; i < values.length; i += blockSize) { int offset = 0; // copy first block into the uncompressed data buffer for (int j = i; offset < blockSize && j < values.length; j++, offset++) { input.ints[offset] = values[j]; }/*from ww w .j av a 2 s . co m*/ input.offset = 0; input.length = offset; // compress compressor.compress(input, compressedData); // decompress decompressor.decompress(compressedData, output); // check if they are equals for (int j = 0; j < input.length; j++) { assertEquals(input.ints[j], output.ints[j]); } } }
From source file:com.sindicetech.siren.index.codecs.siren10.TestSiren10PostingsFormat.java
License:Open Source License
@Test public void testSkipDoc() throws IOException { final MockSirenDocument[] docs = new MockSirenDocument[2048]; for (int i = 0; i < 2048; i += 4) { docs[i] = doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2))); docs[i + 1] = doc(token("aaa", node(1, 0)), token("bbb", node(1, 0, 1, 0))); docs[i + 2] = doc(token("aaa", node(5, 3, 6, 3)), token("bbb", node(5, 3, 6, 3, 7))); docs[i + 3] = doc(token("bbb", node(2, 0)), token("aaa", node(5, 3, 6))); }/* w ww. j av a2s. co m*/ this.addDocuments(docs); final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum(); // first skip in skiplist is at 512 assertTrue(e.skipTo(502)); assertEquals(502, e.doc()); assertEquals(1, e.nodeFreqInDoc()); // must have used the second skip assertTrue(e.skipTo(1624)); assertEquals(1624, e.doc()); assertEquals(2, e.nodeFreqInDoc()); // no other skip, must have used the linear scan assertTrue(e.skipTo(2000)); assertEquals(2000, e.doc()); assertEquals(2, e.nodeFreqInDoc()); assertFalse(e.skipTo(256323)); }
From source file:com.sindicetech.siren.index.codecs.siren10.TestSiren10PostingsFormat.java
License:Open Source License
@Test public void testSimpleNextNode() throws IOException { this.addDocuments(doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2))), doc(token("aaa", node(1, 0)), token("bbb", node(1, 0, 1, 0))), doc(token("aaa", node(5, 3, 6, 3)), token("bbb", node(5, 3, 6, 3, 7)))); final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum(); assertEquals(-1, e.doc());/*from w w w . j a v a 2 s . co m*/ assertEquals(0, e.nodeFreqInDoc()); assertEquals(node(-1), e.node()); assertTrue(e.nextDocument()); assertEquals(0, e.doc()); assertEquals(2, e.nodeFreqInDoc()); assertTrue(e.nextNode()); assertEquals(node(1), e.node()); assertTrue(e.nextNode()); assertEquals(node(2), e.node()); assertFalse(e.nextNode()); assertEquals(DocsAndNodesIterator.NO_MORE_NOD, e.node()); assertTrue(e.nextDocument()); assertEquals(1, e.doc()); assertEquals(1, e.nodeFreqInDoc()); assertTrue(e.nextNode()); assertEquals(node(1, 0), e.node()); assertFalse(e.nextNode()); assertEquals(DocsAndNodesIterator.NO_MORE_NOD, e.node()); assertTrue(e.nextDocument()); assertEquals(2, e.doc()); assertEquals(1, e.nodeFreqInDoc()); assertTrue(e.nextNode()); assertEquals(node(5, 3, 6, 3), e.node()); assertFalse(e.nextNode()); assertEquals(DocsAndNodesIterator.NO_MORE_NOD, e.node()); assertFalse(e.nextDocument()); assertEquals(DocsAndNodesIterator.NO_MORE_DOC, e.doc()); }
From source file:com.sindicetech.siren.index.codecs.siren10.TestSiren10PostingsFormat.java
License:Open Source License
@Test public void testSimpleSkipNode() throws IOException { this.addDocuments(doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2))), doc(token("aaa", node(1, 0)), token("bbb", node(1, 0, 1, 0))), doc(token("aaa", node(5, 3, 6, 3)), token("bbb", node(5, 3, 6, 3, 7)))); final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum(); assertEquals(-1, e.doc());/*from www . j a v a 2 s.c om*/ assertEquals(0, e.nodeFreqInDoc()); // skip to 2 using linear scan. Node should be also be skipped. assertTrue(e.skipTo(2)); assertEquals(2, e.doc()); assertEquals(1, e.nodeFreqInDoc()); assertTrue(e.nextNode()); assertEquals(node(5, 3, 6, 3), e.node()); assertFalse(e.nextNode()); assertFalse(e.nextDocument()); }
From source file:com.sindicetech.siren.index.codecs.siren10.TestSiren10PostingsFormat.java
License:Open Source License
@Test public void testSkipNode() throws IOException { final MockSirenDocument[] docs = new MockSirenDocument[2048]; for (int i = 0; i < 2048; i += 4) { docs[i] = doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2))); docs[i + 1] = doc(token("aaa", node(1, 0)), token("bbb", node(1, 0, 1, 0))); docs[i + 2] = doc(token("aaa", node(5, 3, 6, 3)), token("bbb", node(5, 3, 6, 3, 7))); docs[i + 3] = doc(token("bbb", node(2, 0)), token("aaa", node(5, 3, 6))); }/*w w w. j a v a 2 s.co m*/ this.addDocuments(docs); final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum(); // first skip in skiplist is at 512 assertTrue(e.skipTo(502)); assertEquals(502, e.doc()); assertEquals(1, e.nodeFreqInDoc()); assertTrue(e.nextNode()); assertEquals(node(5, 3, 6, 3), e.node()); assertFalse(e.nextNode()); // skip to 504 and scan partially nodes assertTrue(e.nextDocument()); assertTrue(e.nextDocument()); assertEquals(504, e.doc()); assertEquals(2, e.nodeFreqInDoc()); assertTrue(e.nextNode()); assertEquals(node(1), e.node()); // must have used the second skip assertTrue(e.skipTo(1624)); assertEquals(1624, e.doc()); assertEquals(2, e.nodeFreqInDoc()); assertTrue(e.nextNode()); assertEquals(node(1), e.node()); assertTrue(e.nextNode()); assertEquals(node(2), e.node()); assertFalse(e.nextNode()); // no other skip, must have used the linear scan assertTrue(e.skipTo(2000)); assertEquals(2000, e.doc()); assertEquals(2, e.nodeFreqInDoc()); assertTrue(e.nextNode()); assertEquals(node(1), e.node()); assertTrue(e.nextNode()); assertEquals(node(2), e.node()); assertFalse(e.nextNode()); assertFalse(e.skipTo(256323)); }
From source file:com.sindicetech.siren.index.codecs.siren10.TestSiren10PostingsFormat.java
License:Open Source License
@Test public void testSimpleNextPosition() throws IOException { this.addDocuments(doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2))), doc(token("bbb", node(1, 0)), token("bbb", node(1, 0, 1, 0))), doc(token("bbb", node(5, 3, 6)), token("aaa", node(5, 3, 6, 3)), token("aaa", node(5, 3, 6, 3)))); final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum(); assertEquals(-1, e.doc());// w w w . jav a2 s . c o m assertEquals(0, e.nodeFreqInDoc()); assertEquals(node(-1), e.node()); assertEquals(-1, e.pos()); assertTrue(e.nextDocument()); assertEquals(0, e.doc()); assertEquals(2, e.nodeFreqInDoc()); assertTrue(e.nextNode()); assertEquals(node(1), e.node()); assertEquals(1, e.termFreqInNode()); assertTrue(e.nextPosition()); assertEquals(0, e.pos()); assertFalse(e.nextPosition()); assertTrue(e.nextNode()); assertEquals(node(2), e.node()); assertEquals(1, e.termFreqInNode()); assertTrue(e.nextPosition()); assertEquals(0, e.pos()); assertFalse(e.nextPosition()); assertFalse(e.nextNode()); assertTrue(e.nextDocument()); assertEquals(2, e.doc()); assertEquals(1, e.nodeFreqInDoc()); assertTrue(e.nextNode()); assertEquals(node(5, 3, 6, 3), e.node()); assertEquals(2, e.termFreqInNode()); assertTrue(e.nextPosition()); assertEquals(0, e.pos()); assertTrue(e.nextPosition()); assertEquals(1, e.pos()); assertFalse(e.nextPosition()); assertFalse(e.nextNode()); assertFalse(e.nextDocument()); }
From source file:com.sindicetech.siren.index.codecs.siren10.TestSiren10PostingsFormat.java
License:Open Source License
@Test public void testSimpleFrequencies() throws IOException { this.addDocuments(doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2))), doc(token("aaa", node(1)), token("aaa", node(1)), token("aaa", node(2)))); final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum(); assertEquals(-1, e.doc());/*from www. ja v a 2 s . c o m*/ // freqs should be set to 0 at the beginning assertEquals(0, e.nodeFreqInDoc()); assertEquals(0, e.termFreqInNode()); // nodeFreqInDoc should be set after calling nextDocument assertTrue(e.nextDocument()); assertEquals(2, e.nodeFreqInDoc()); // termFreqInNode should be set to 0 assertEquals(0, e.termFreqInNode()); // calling termFreqInNode should not change the freq settings assertEquals(2, e.nodeFreqInDoc()); // termFreqInNode should be set after calling nextNode assertTrue(e.nextNode()); // nodeFreqInDoc and nodeFreqInDoc should not have changed of settings assertEquals(2, e.nodeFreqInDoc()); // termFreqInNode should be set to 1 assertEquals(1, e.termFreqInNode()); // calling termFreqInNode should not change the freqs settings assertEquals(2, e.nodeFreqInDoc()); // calling nextPosition should not change freqs settings assertTrue(e.nextPosition()); assertEquals(2, e.nodeFreqInDoc()); assertEquals(1, e.termFreqInNode()); // partially scanned position should not have consequences on nodeFreqInDoc // settings assertTrue(e.nextDocument()); assertEquals(2, e.nodeFreqInDoc()); assertTrue(e.nextNode()); assertEquals(2, e.termFreqInNode()); assertTrue(e.nextPosition()); assertEquals(2, e.termFreqInNode()); assertTrue(e.nextNode()); assertEquals(1, e.termFreqInNode()); }
From source file:com.sindicetech.siren.index.codecs.siren10.TestSiren10PostingsFormat.java
License:Open Source License
@Test public void testSimpleMerge() throws IOException { this.addDocuments(doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2)))); this.addDocuments(doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2)))); this.forceMerge(); final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum(); assertTrue(e.nextDocument());// ww w . j a va 2 s .c o m assertEquals(0, e.doc()); assertEquals(2, e.nodeFreqInDoc()); assertTrue(e.nextNode()); assertEquals(1, e.termFreqInNode()); assertTrue(e.nextPosition()); assertEquals(0, e.pos()); assertTrue(e.nextNode()); assertEquals(1, e.termFreqInNode()); assertTrue(e.nextPosition()); assertEquals(0, e.pos()); assertTrue(e.nextDocument()); assertEquals(1, e.doc()); assertEquals(2, e.nodeFreqInDoc()); assertTrue(e.nextNode()); assertEquals(1, e.termFreqInNode()); assertTrue(e.nextPosition()); assertEquals(0, e.pos()); assertTrue(e.nextNode()); assertEquals(1, e.termFreqInNode()); assertTrue(e.nextPosition()); assertEquals(0, e.pos()); }
From source file:com.sindicetech.siren.index.codecs.siren10.TestSiren10PostingsFormat.java
License:Open Source License
@Test public void testStressMerge() throws IOException { this.addDocuments(doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2)))); while (this.reader.numDocs() < 10000) { final int batchSize = LuceneTestCase.random().nextInt(20); final MockSirenDocument[] docs = new MockSirenDocument[batchSize]; for (int i = 0; i < batchSize; i++) { docs[i] = doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2))); }//from w w w.jav a2s. co m this.addDocuments(docs); this.forceMerge(); } final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum(); for (int i = 0; i < reader.numDocs(); i++) { assertTrue(e.nextDocument()); assertEquals(i, e.doc()); assertEquals(2, e.nodeFreqInDoc()); assertTrue(e.nextNode()); assertEquals(node(1), e.node()); assertEquals(1, e.termFreqInNode()); assertTrue(e.nextPosition()); assertEquals(0, e.pos()); assertTrue(e.nextNode()); assertEquals(node(2), e.node()); assertEquals(1, e.termFreqInNode()); assertTrue(e.nextPosition()); assertEquals(0, e.pos()); assertFalse(e.nextNode()); } }