org.elasticsearch.test.unit.common.compress.CompressIndexInputOutputTests.java Source code

Java tutorial

Introduction

Here is the source code for org.elasticsearch.test.unit.common.compress.CompressIndexInputOutputTests.java

Source

/*
 * Licensed to ElasticSearch and Shay Banon under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. ElasticSearch licenses this
 * file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.test.unit.common.compress;

import jsr166y.ThreadLocalRandom;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.*;
import org.apache.lucene.store.*;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.RandomStringGenerator;
import org.elasticsearch.common.compress.CompressedDirectory;
import org.elasticsearch.common.compress.CompressedIndexInput;
import org.elasticsearch.common.compress.CompressedIndexOutput;
import org.elasticsearch.common.compress.Compressor;
import org.elasticsearch.common.compress.lzf.LZFCompressor;
import org.elasticsearch.common.compress.snappy.xerial.XerialSnappy;
import org.elasticsearch.common.compress.snappy.xerial.XerialSnappyCompressor;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.unit.SizeValue;
import org.testng.annotations.Test;

import java.io.EOFException;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;

/**
 */
@Test
public class CompressIndexInputOutputTests {

    @Test
    public void testXerialSnappy() throws Exception {
        if (XerialSnappy.available) {
            testCompressor(new XerialSnappyCompressor());
        }
    }

    @Test
    public void testLZF() throws Exception {
        testCompressor(new LZFCompressor());
    }

    @Test
    public void testSideAffects() throws Exception {
        if (XerialSnappy.available) {
            testCompressor(new XerialSnappyCompressor());
        }
        testCompressor(new LZFCompressor());
        if (XerialSnappy.available) {
            testCompressor(new XerialSnappyCompressor());
        }
        testCompressor(new LZFCompressor());
    }

    private void testCompressor(Compressor compressor) throws Exception {
        empty(compressor);
        simple(compressor);
        seek1Compressed(compressor);
        seek1UnCompressed(compressor);
        copyBytes(compressor);
        lucene(compressor);
    }

    private void empty(Compressor compressor) throws Exception {
        Directory dir = new RAMDirectory();
        IndexOutput out = compressor.indexOutput(dir.createOutput("test", IOContext.DEFAULT));
        out.close();

        IndexInput in = compressor.indexInput(dir.openInput("test", IOContext.DEFAULT));
        try {
            in.readByte();
            assert false;
        } catch (EOFException e) {
            // all is well
        }

        in.seek(100);
        try {
            in.readByte();
            assert false;
        } catch (EOFException e) {
            // all is well
        }
    }

    private void simple(Compressor compressor) throws Exception {
        Directory dir = new RAMDirectory();
        IndexOutput out = compressor.indexOutput(dir.createOutput("test", IOContext.DEFAULT));
        long pos1 = out.getFilePointer();
        out.writeInt(1);
        long pos2 = out.getFilePointer();
        out.writeString("test1");
        long pos3 = out.getFilePointer();
        String largeString = RandomStringGenerator.random(0xFFFF + 5);
        out.writeString(largeString);
        long pos4 = out.getFilePointer();
        out.writeInt(2);
        long pos5 = out.getFilePointer();
        out.writeString("test2");
        out.close();

        IndexInput in = compressor.indexInput(dir.openInput("test", IOContext.DEFAULT));
        assertThat(in.readInt(), equalTo(1));
        assertThat(in.readString(), equalTo("test1"));
        assertThat(in.readString(), equalTo(largeString));
        assertThat(in.readInt(), equalTo(2));
        assertThat(in.readString(), equalTo("test2"));

        in.seek(pos3);
        assertThat(in.readString(), equalTo(largeString));
        in.seek(pos2);
        assertThat(in.readString(), equalTo("test1"));
        in.seek(pos5);
        assertThat(in.readString(), equalTo("test2"));
        in.seek(pos1);
        assertThat(in.readInt(), equalTo(1));

        in.seek(0);
        byte[] full = new byte[(int) in.length()];
        in.readBytes(full, 0, full.length);

        in.close();
    }

    private void seek1Compressed(Compressor compressor) throws Exception {
        seek1(true, compressor);
    }

    private void seek1UnCompressed(Compressor compressor) throws Exception {
        seek1(false, compressor);
    }

    private void seek1(boolean compressed, Compressor compressor) throws Exception {
        Directory dir = new RAMDirectory();
        IndexOutput out = compressed ? compressor.indexOutput(dir.createOutput("test", IOContext.DEFAULT))
                : dir.createOutput("test", IOContext.DEFAULT);
        long pos1 = out.getFilePointer();
        out.writeVInt(4);
        out.writeInt(1);
        long pos2 = out.getFilePointer();
        out.writeVInt(8);
        long posX = out.getFilePointer();
        out.writeInt(2);
        out.writeInt(3);
        long pos3 = out.getFilePointer();
        out.writeVInt(4);
        out.writeInt(4);

        int size = 50;
        long[] positions = new long[size];
        String[] data = new String[size];
        for (int i = 0; i < 50; i++) {
            positions[i] = out.getFilePointer();
            data[i] = RandomStringGenerator.random(12345);
            out.writeString(data[i]);
        }

        out.close();

        //IndexInput in = dir.openInput("test");
        IndexInput in = compressed ? compressor.indexInput(dir.openInput("test", IOContext.DEFAULT))
                : dir.openInput("test", IOContext.DEFAULT);
        in.seek(pos2);
        // now "skip"
        int numBytes = in.readVInt();
        assertThat(in.getFilePointer(), equalTo(posX));
        in.seek(in.getFilePointer() + numBytes);
        assertThat(in.readVInt(), equalTo(4));
        assertThat(in.readInt(), equalTo(4));

        for (int i = 0; i < size; i++) {
            in.seek(positions[i]);
            assertThat(in.getFilePointer(), equalTo(positions[i]));
            assertThat(in.readString(), equalTo(data[i]));
        }
    }

    private void copyBytes(Compressor compressor) throws Exception {
        Directory dir = new RAMDirectory();
        IndexOutput out = compressor.indexOutput(dir.createOutput("test", IOContext.DEFAULT));
        long pos1 = out.getFilePointer();
        out.writeInt(1);
        long pos2 = out.getFilePointer();
        assertThat(pos2, equalTo(4l));
        out.writeString("test1");
        long pos3 = out.getFilePointer();
        String largeString = RandomStringGenerator.random(0xFFFF + 5);
        out.writeString(largeString);
        long pos4 = out.getFilePointer();
        out.writeInt(2);
        long pos5 = out.getFilePointer();
        out.writeString("test2");
        assertThat(out.length(), equalTo(out.getFilePointer()));
        long length = out.length();
        out.close();

        CompressedIndexOutput out2 = compressor.indexOutput(dir.createOutput("test2", IOContext.DEFAULT));
        out2.writeString("mergeStart");
        long startMergePos = out2.getFilePointer();
        CompressedIndexInput testInput = compressor.indexInput(dir.openInput("test", IOContext.DEFAULT));
        assertThat(testInput.length(), equalTo(length));
        out2.copyBytes(testInput, testInput.length());
        long endMergePos = out2.getFilePointer();
        out2.writeString("mergeEnd");
        out2.close();

        IndexInput in = compressor.indexInput(dir.openInput("test2", IOContext.DEFAULT));
        assertThat(in.readString(), equalTo("mergeStart"));
        assertThat(in.readInt(), equalTo(1));
        assertThat(in.readString(), equalTo("test1"));
        assertThat(in.readString(), equalTo(largeString));
        assertThat(in.readInt(), equalTo(2));
        assertThat(in.readString(), equalTo("test2"));
        assertThat(in.readString(), equalTo("mergeEnd"));

        in.seek(pos1);
        assertThat(in.readString(), equalTo("mergeStart"));
        in.seek(endMergePos);
        assertThat(in.readString(), equalTo("mergeEnd"));

        try {
            in.readByte();
            assert false;
        } catch (EOFException e) {
            // all is well, we reached hte end...
        }
    }

    private void lucene(Compressor compressor) throws Exception {
        CompressedDirectory dir = new CompressedDirectory(new RAMDirectory(), compressor, false, "fdt");
        IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));
        writer.addDocument(createDoc(1, (int) SizeValue.parseSizeValue("100b").singles()));
        writer.addDocument(createDoc(2, (int) SizeValue.parseSizeValue("5k").singles()));
        writer.commit();
        writer.addDocument(createDoc(3, (int) SizeValue.parseSizeValue("2k").singles()));
        writer.addDocument(createDoc(4, (int) SizeValue.parseSizeValue("1k").singles()));
        writer.commit();
        verify(writer);
        writer.forceMerge(1);
        writer.waitForMerges();
        verify(writer);
        dir.setCompress(false);
        writer.addDocument(createDoc(5, (int) SizeValue.parseSizeValue("2k").singles()));
        writer.addDocument(createDoc(6, (int) SizeValue.parseSizeValue("1k").singles()));
        verify(writer);
        writer.forceMerge(1);
        writer.waitForMerges();
        verify(writer);
        writer.close();
    }

    private void verify(IndexWriter writer) throws Exception {
        CheckIndex checkIndex = new CheckIndex(writer.getDirectory());
        CheckIndex.Status status = checkIndex.checkIndex();
        assertThat(status.clean, equalTo(true));
        IndexReader reader = DirectoryReader.open(writer, true);
        final Bits liveDocs = MultiFields.getLiveDocs(reader);
        for (int i = 0; i < reader.maxDoc(); i++) {
            if (liveDocs != null && !liveDocs.get(i)) {
                continue;
            }
            Document document = reader.document(i);
            checkDoc(document);
            DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("id", "field", "count");
            reader.document(i, visitor);
            document = visitor.getDocument();
            checkDoc(document);
        }
        for (int i = 0; i < 100; i++) {
            int doc = ThreadLocalRandom.current().nextInt(reader.maxDoc());
            if (liveDocs != null && !liveDocs.get(i)) {
                continue;
            }
            Document document = reader.document(doc);
            checkDoc(document);
            DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("id", "field", "count");
            reader.document(doc, visitor);
            document = visitor.getDocument();
            checkDoc(document);
        }
    }

    private void checkDoc(Document document) {
        String id = document.get("id");
        String field = document.get("field");
        int count = 0;
        int idx = 0;
        while (true) {
            int oldIdx = idx;
            idx = field.indexOf(' ', oldIdx);
            if (idx == -1) {
                break;
            }
            count++;
            assertThat(field.substring(oldIdx, idx), equalTo(id));
            idx++;
        }
        assertThat(count, equalTo(Integer.parseInt(document.get("count"))));
    }

    private Document createDoc(int id, int size) {
        Document doc = new Document();
        doc.add(new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field("size", Integer.toString(size), Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field("skip", RandomStringGenerator.random(50), Field.Store.YES, Field.Index.NO));
        StringBuilder sb = new StringBuilder();
        int count = 0;
        while (true) {
            count++;
            sb.append(id);
            sb.append(" ");
            if (sb.length() >= size) {
                break;
            }
        }
        doc.add(new Field("count", Integer.toString(count), Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field("field", sb.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field("skip", RandomStringGenerator.random(50), Field.Store.YES, Field.Index.NO));
        return doc;
    }
}