Example usage for org.apache.lucene.util BytesRef BytesRef

List of usage examples for org.apache.lucene.util BytesRef BytesRef

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef BytesRef.

Prototype

public BytesRef(CharSequence text) 

Source Link

Document

Initialize the byte[] from the UTF8 bytes for the provided String.

Usage

From source file:com.sindicetech.siren.index.codecs.block.TestAForCodec.java

License:Open Source License

@Test
public void testIncompleteFrame() throws IOException {
    final BlockCompressor compressor = new AForBlockCompressor();

    final IntsRef input = new IntsRef(64);
    final BytesRef output = new BytesRef(compressor.maxCompressedSize(64));

    // fill first part with 1
    for (int i = 0; i < 33; i++) {
        input.ints[i] = 1;//from   ww  w .  ja  v a2 s.  c  om
    }

    // fill the rest with random numbers
    for (int i = 33; i < 64; i++) {
        input.ints[i] = (int) this.nextLong(64, Short.MAX_VALUE);
    }

    input.offset = 0;
    input.length = 33;

    // the random numbers after the end of the input array should not impact
    // compression
    compressor.compress(input, output);

    // should be frame code 1 : 32 ints encoded with 1 bits
    assertEquals(1, output.bytes[0]);
    // followed by 4 bytes at 255
    assertEquals(0xFF, output.bytes[1] & 0xFF);
    assertEquals(0xFF, output.bytes[2] & 0xFF);
    assertEquals(0xFF, output.bytes[3] & 0xFF);
    assertEquals(0xFF, output.bytes[4] & 0xFF);
    // then frame code 34 : 16 ints encoded with 1 bits
    assertEquals(34, output.bytes[5]);
    // followed by 1 byte with at least 128 and a second byte with 0
    assertEquals(128, output.bytes[6] & 0x80);
    assertEquals(0, output.bytes[7] & 0xFF);
    // followed by frame code 33: 16 ints encoded with 0 bits
    assertEquals(33, output.bytes[8]);
}

From source file:com.sindicetech.siren.index.codecs.CodecTestCase.java

License:Open Source License

private void doTest(final int[] values, final int blockSize, final BlockCompressor compressor,
        final BlockDecompressor decompressor) throws Exception {
    final BytesRef compressedData = new BytesRef(compressor.maxCompressedSize(blockSize));
    final IntsRef input = new IntsRef(blockSize);
    final IntsRef output = new IntsRef(blockSize);

    for (int i = 0; i < values.length; i += blockSize) {

        int offset = 0;

        // copy first block into the uncompressed data buffer
        for (int j = i; offset < blockSize && j < values.length; j++, offset++) {
            input.ints[offset] = values[j];
        }/*from ww w  .j  av  a 2 s  .  co m*/
        input.offset = 0;
        input.length = offset;

        // compress
        compressor.compress(input, compressedData);

        // decompress
        decompressor.decompress(compressedData, output);

        // check if they are equals
        for (int j = 0; j < input.length; j++) {
            assertEquals(input.ints[j], output.ints[j]);
        }
    }
}

From source file:com.sindicetech.siren.index.codecs.siren10.TestSiren10PostingsFormat.java

License:Open Source License

@Test
public void testSkipDoc() throws IOException {
    final MockSirenDocument[] docs = new MockSirenDocument[2048];
    for (int i = 0; i < 2048; i += 4) {
        docs[i] = doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2)));
        docs[i + 1] = doc(token("aaa", node(1, 0)), token("bbb", node(1, 0, 1, 0)));
        docs[i + 2] = doc(token("aaa", node(5, 3, 6, 3)), token("bbb", node(5, 3, 6, 3, 7)));
        docs[i + 3] = doc(token("bbb", node(2, 0)), token("aaa", node(5, 3, 6)));
    }/*  w  ww. j  av a2s.  co m*/
    this.addDocuments(docs);

    final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader);
    final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa")));
    assertTrue(docsEnum instanceof Siren10DocsEnum);
    final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum();

    // first skip in skiplist is at 512
    assertTrue(e.skipTo(502));
    assertEquals(502, e.doc());
    assertEquals(1, e.nodeFreqInDoc());

    // must have used the second skip
    assertTrue(e.skipTo(1624));
    assertEquals(1624, e.doc());
    assertEquals(2, e.nodeFreqInDoc());

    // no other skip, must have used the linear scan
    assertTrue(e.skipTo(2000));
    assertEquals(2000, e.doc());
    assertEquals(2, e.nodeFreqInDoc());

    assertFalse(e.skipTo(256323));

}

From source file:com.sindicetech.siren.index.codecs.siren10.TestSiren10PostingsFormat.java

License:Open Source License

@Test
public void testSimpleNextNode() throws IOException {
    this.addDocuments(doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2))),
            doc(token("aaa", node(1, 0)), token("bbb", node(1, 0, 1, 0))),
            doc(token("aaa", node(5, 3, 6, 3)), token("bbb", node(5, 3, 6, 3, 7))));

    final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader);
    final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa")));
    assertTrue(docsEnum instanceof Siren10DocsEnum);
    final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum();
    assertEquals(-1, e.doc());/*from w  w w  .  j  a v  a 2 s .  co  m*/
    assertEquals(0, e.nodeFreqInDoc());
    assertEquals(node(-1), e.node());

    assertTrue(e.nextDocument());
    assertEquals(0, e.doc());
    assertEquals(2, e.nodeFreqInDoc());
    assertTrue(e.nextNode());
    assertEquals(node(1), e.node());
    assertTrue(e.nextNode());
    assertEquals(node(2), e.node());
    assertFalse(e.nextNode());
    assertEquals(DocsAndNodesIterator.NO_MORE_NOD, e.node());

    assertTrue(e.nextDocument());
    assertEquals(1, e.doc());
    assertEquals(1, e.nodeFreqInDoc());
    assertTrue(e.nextNode());
    assertEquals(node(1, 0), e.node());
    assertFalse(e.nextNode());
    assertEquals(DocsAndNodesIterator.NO_MORE_NOD, e.node());

    assertTrue(e.nextDocument());
    assertEquals(2, e.doc());
    assertEquals(1, e.nodeFreqInDoc());
    assertTrue(e.nextNode());
    assertEquals(node(5, 3, 6, 3), e.node());
    assertFalse(e.nextNode());
    assertEquals(DocsAndNodesIterator.NO_MORE_NOD, e.node());

    assertFalse(e.nextDocument());
    assertEquals(DocsAndNodesIterator.NO_MORE_DOC, e.doc());
}

From source file:com.sindicetech.siren.index.codecs.siren10.TestSiren10PostingsFormat.java

License:Open Source License

@Test
public void testSimpleSkipNode() throws IOException {
    this.addDocuments(doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2))),
            doc(token("aaa", node(1, 0)), token("bbb", node(1, 0, 1, 0))),
            doc(token("aaa", node(5, 3, 6, 3)), token("bbb", node(5, 3, 6, 3, 7))));

    final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader);
    final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa")));
    assertTrue(docsEnum instanceof Siren10DocsEnum);
    final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum();
    assertEquals(-1, e.doc());/*from www . j  a  v a  2 s.c om*/
    assertEquals(0, e.nodeFreqInDoc());

    // skip to 2 using linear scan. Node should be also be skipped.
    assertTrue(e.skipTo(2));
    assertEquals(2, e.doc());
    assertEquals(1, e.nodeFreqInDoc());
    assertTrue(e.nextNode());
    assertEquals(node(5, 3, 6, 3), e.node());
    assertFalse(e.nextNode());

    assertFalse(e.nextDocument());
}

From source file:com.sindicetech.siren.index.codecs.siren10.TestSiren10PostingsFormat.java

License:Open Source License

@Test
public void testSkipNode() throws IOException {
    final MockSirenDocument[] docs = new MockSirenDocument[2048];
    for (int i = 0; i < 2048; i += 4) {
        docs[i] = doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2)));
        docs[i + 1] = doc(token("aaa", node(1, 0)), token("bbb", node(1, 0, 1, 0)));
        docs[i + 2] = doc(token("aaa", node(5, 3, 6, 3)), token("bbb", node(5, 3, 6, 3, 7)));
        docs[i + 3] = doc(token("bbb", node(2, 0)), token("aaa", node(5, 3, 6)));
    }/*w  w w.  j  a  v a 2 s.co m*/
    this.addDocuments(docs);

    final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader);
    final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa")));
    assertTrue(docsEnum instanceof Siren10DocsEnum);
    final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum();

    // first skip in skiplist is at 512
    assertTrue(e.skipTo(502));
    assertEquals(502, e.doc());
    assertEquals(1, e.nodeFreqInDoc());
    assertTrue(e.nextNode());
    assertEquals(node(5, 3, 6, 3), e.node());
    assertFalse(e.nextNode());

    // skip to 504 and scan partially nodes
    assertTrue(e.nextDocument());
    assertTrue(e.nextDocument());
    assertEquals(504, e.doc());
    assertEquals(2, e.nodeFreqInDoc());
    assertTrue(e.nextNode());
    assertEquals(node(1), e.node());

    // must have used the second skip
    assertTrue(e.skipTo(1624));
    assertEquals(1624, e.doc());
    assertEquals(2, e.nodeFreqInDoc());
    assertTrue(e.nextNode());
    assertEquals(node(1), e.node());
    assertTrue(e.nextNode());
    assertEquals(node(2), e.node());
    assertFalse(e.nextNode());

    // no other skip, must have used the linear scan
    assertTrue(e.skipTo(2000));
    assertEquals(2000, e.doc());
    assertEquals(2, e.nodeFreqInDoc());
    assertTrue(e.nextNode());
    assertEquals(node(1), e.node());
    assertTrue(e.nextNode());
    assertEquals(node(2), e.node());
    assertFalse(e.nextNode());

    assertFalse(e.skipTo(256323));

}

From source file:com.sindicetech.siren.index.codecs.siren10.TestSiren10PostingsFormat.java

License:Open Source License

@Test
public void testSimpleNextPosition() throws IOException {
    this.addDocuments(doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2))),
            doc(token("bbb", node(1, 0)), token("bbb", node(1, 0, 1, 0))),
            doc(token("bbb", node(5, 3, 6)), token("aaa", node(5, 3, 6, 3)), token("aaa", node(5, 3, 6, 3))));

    final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader);
    final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa")));
    assertTrue(docsEnum instanceof Siren10DocsEnum);
    final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum();
    assertEquals(-1, e.doc());// w w  w . jav a2  s . c o  m
    assertEquals(0, e.nodeFreqInDoc());
    assertEquals(node(-1), e.node());
    assertEquals(-1, e.pos());

    assertTrue(e.nextDocument());
    assertEquals(0, e.doc());
    assertEquals(2, e.nodeFreqInDoc());

    assertTrue(e.nextNode());
    assertEquals(node(1), e.node());
    assertEquals(1, e.termFreqInNode());

    assertTrue(e.nextPosition());
    assertEquals(0, e.pos());
    assertFalse(e.nextPosition());

    assertTrue(e.nextNode());
    assertEquals(node(2), e.node());
    assertEquals(1, e.termFreqInNode());

    assertTrue(e.nextPosition());
    assertEquals(0, e.pos());
    assertFalse(e.nextPosition());

    assertFalse(e.nextNode());

    assertTrue(e.nextDocument());
    assertEquals(2, e.doc());
    assertEquals(1, e.nodeFreqInDoc());

    assertTrue(e.nextNode());
    assertEquals(node(5, 3, 6, 3), e.node());
    assertEquals(2, e.termFreqInNode());

    assertTrue(e.nextPosition());
    assertEquals(0, e.pos());
    assertTrue(e.nextPosition());
    assertEquals(1, e.pos());
    assertFalse(e.nextPosition());

    assertFalse(e.nextNode());

    assertFalse(e.nextDocument());
}

From source file:com.sindicetech.siren.index.codecs.siren10.TestSiren10PostingsFormat.java

License:Open Source License

@Test
public void testSimpleFrequencies() throws IOException {
    this.addDocuments(doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2))),
            doc(token("aaa", node(1)), token("aaa", node(1)), token("aaa", node(2))));

    final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader);
    final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa")));
    assertTrue(docsEnum instanceof Siren10DocsEnum);
    final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum();
    assertEquals(-1, e.doc());/*from   www.  ja v a 2 s  . c  o m*/

    // freqs should be set to 0 at the beginning
    assertEquals(0, e.nodeFreqInDoc());
    assertEquals(0, e.termFreqInNode());

    // nodeFreqInDoc should be set after calling nextDocument
    assertTrue(e.nextDocument());
    assertEquals(2, e.nodeFreqInDoc());
    // termFreqInNode should be set to 0
    assertEquals(0, e.termFreqInNode());
    // calling termFreqInNode should not change the freq settings
    assertEquals(2, e.nodeFreqInDoc());

    // termFreqInNode should be set after calling nextNode
    assertTrue(e.nextNode());
    // nodeFreqInDoc and nodeFreqInDoc should not have changed of settings
    assertEquals(2, e.nodeFreqInDoc());
    // termFreqInNode should be set to 1
    assertEquals(1, e.termFreqInNode());
    // calling termFreqInNode should not change the freqs settings
    assertEquals(2, e.nodeFreqInDoc());

    // calling nextPosition should not change freqs settings
    assertTrue(e.nextPosition());
    assertEquals(2, e.nodeFreqInDoc());
    assertEquals(1, e.termFreqInNode());

    // partially scanned position should not have consequences on nodeFreqInDoc
    // settings
    assertTrue(e.nextDocument());
    assertEquals(2, e.nodeFreqInDoc());
    assertTrue(e.nextNode());
    assertEquals(2, e.termFreqInNode());
    assertTrue(e.nextPosition());
    assertEquals(2, e.termFreqInNode());
    assertTrue(e.nextNode());
    assertEquals(1, e.termFreqInNode());
}

From source file:com.sindicetech.siren.index.codecs.siren10.TestSiren10PostingsFormat.java

License:Open Source License

@Test
public void testSimpleMerge() throws IOException {
    this.addDocuments(doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2))));
    this.addDocuments(doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2))));

    this.forceMerge();

    final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader);
    final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa")));
    assertTrue(docsEnum instanceof Siren10DocsEnum);
    final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum();

    assertTrue(e.nextDocument());//  ww w  .  j  a va  2  s  .c o m
    assertEquals(0, e.doc());
    assertEquals(2, e.nodeFreqInDoc());
    assertTrue(e.nextNode());
    assertEquals(1, e.termFreqInNode());
    assertTrue(e.nextPosition());
    assertEquals(0, e.pos());
    assertTrue(e.nextNode());
    assertEquals(1, e.termFreqInNode());
    assertTrue(e.nextPosition());
    assertEquals(0, e.pos());

    assertTrue(e.nextDocument());
    assertEquals(1, e.doc());
    assertEquals(2, e.nodeFreqInDoc());
    assertTrue(e.nextNode());
    assertEquals(1, e.termFreqInNode());
    assertTrue(e.nextPosition());
    assertEquals(0, e.pos());
    assertTrue(e.nextNode());
    assertEquals(1, e.termFreqInNode());
    assertTrue(e.nextPosition());
    assertEquals(0, e.pos());
}

From source file:com.sindicetech.siren.index.codecs.siren10.TestSiren10PostingsFormat.java

License:Open Source License

@Test
public void testStressMerge() throws IOException {
    this.addDocuments(doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2))));

    while (this.reader.numDocs() < 10000) {
        final int batchSize = LuceneTestCase.random().nextInt(20);
        final MockSirenDocument[] docs = new MockSirenDocument[batchSize];
        for (int i = 0; i < batchSize; i++) {
            docs[i] = doc(token("aaa", node(1)), token("bbb", node(1, 0)), token("aaa", node(2)));
        }//from   w  w w.jav a2s. co m
        this.addDocuments(docs);
        this.forceMerge();
    }

    final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader);
    final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa")));
    assertTrue(docsEnum instanceof Siren10DocsEnum);
    final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum();

    for (int i = 0; i < reader.numDocs(); i++) {
        assertTrue(e.nextDocument());
        assertEquals(i, e.doc());
        assertEquals(2, e.nodeFreqInDoc());

        assertTrue(e.nextNode());
        assertEquals(node(1), e.node());
        assertEquals(1, e.termFreqInNode());
        assertTrue(e.nextPosition());
        assertEquals(0, e.pos());

        assertTrue(e.nextNode());
        assertEquals(node(2), e.node());
        assertEquals(1, e.termFreqInNode());
        assertTrue(e.nextPosition());
        assertEquals(0, e.pos());

        assertFalse(e.nextNode());
    }
}