org.hibernate.search.test.serialization.SerializationTest.java Source code

Java tutorial

Introduction

Here is the source code for org.hibernate.search.test.serialization.SerializationTest.java

Source

/*
 * Hibernate, Relational Persistence for Idiomatic Java
 *
 * JBoss, Home of Professional Open Source
 * Copyright 2011 Red Hat Inc. and/or its affiliates and other contributors
 * as indicated by the @authors tag. All rights reserved.
 * See the copyright.txt in the distribution for a
 * full listing of individual contributors.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
 * of the GNU Lesser General Public License, v. 2.1.
 * This program is distributed in the hope that it will be useful, but WITHOUT A
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 * PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.
 * You should have received a copy of the GNU Lesser General Public License,
 * v.2.1 along with this distribution; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 * MA  02110-1301, USA.
 */
package org.hibernate.search.test.serialization;

import java.io.IOException;
import java.io.Reader;
import java.io.Serializable;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.KeywordAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttributeImpl;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.Payload;
import org.apache.lucene.util.AttributeImpl;
import org.apache.solr.handler.AnalysisRequestHandlerBase;
import org.junit.Test;

import org.hibernate.search.backend.AddLuceneWork;
import org.hibernate.search.backend.DeleteLuceneWork;
import org.hibernate.search.backend.LuceneWork;
import org.hibernate.search.backend.OptimizeLuceneWork;
import org.hibernate.search.backend.PurgeAllLuceneWork;
import org.hibernate.search.backend.UpdateLuceneWork;
import org.hibernate.search.indexes.serialization.avro.impl.AvroSerializationProvider;
import org.hibernate.search.indexes.serialization.impl.CopyTokenStream;
import org.hibernate.search.indexes.serialization.impl.PluggableSerializationLuceneWorkSerializer;
import org.hibernate.search.indexes.serialization.impl.SerializationHelper;
import org.hibernate.search.indexes.serialization.spi.LuceneWorkSerializer;
import org.hibernate.search.indexes.serialization.spi.SerializableTokenStream;
import org.hibernate.search.test.SearchTestCase;
import org.hibernate.search.util.logging.impl.Log;
import org.hibernate.search.util.logging.impl.LoggerFactory;

import static org.fest.assertions.Assertions.assertThat;

/**
 * @author Emmanuel Bernard <emmanuel@hibernate.org>
 */
public class SerializationTest extends SearchTestCase {
    private static final Log log = LoggerFactory.make();

    @Test
    public void testAvroSerialization() throws Exception {
        LuceneWorkSerializer converter = new PluggableSerializationLuceneWorkSerializer(
                new AvroSerializationProvider(), getSearchFactoryImpl());
        List<LuceneWork> works = buildWorks();

        byte[] bytes = converter.toSerializedModel(works);
        List<LuceneWork> copyOfWorks = converter.toLuceneWorks(bytes);

        assertThat(copyOfWorks).hasSize(works.size());
        for (int index = 0; index < works.size(); index++) {
            assertLuceneWork(works.get(index), copyOfWorks.get(index));
        }
    }

    @Test
    /**
     * 20110815
     * Our avro serializer is slower (1.6) than Java serialization esp when the VM is not warm (small loop value like = 1000
     * In evens up on longer loops like 100000
     *
     * Our avro deserializer is slower (2.5) than Java serialization esp when the VM is not warm (small loop value like = 1000
     * In evens up or beats the Java serialization on longer loops like 100000
     *
     * Test done after initial implementation (in particular the schema is not part of the message
     *
     * With 1000000:
     * Java serialization: 28730
     * Java message size: 2509
     * Java deserialization: 82970
     * Avro serialization: 24245
     * Avro message size: 1064
     * Avro deserialization: 54444
     *
     *
     * 20110824
     * The new Work sample is bigger and Avro's layer has been optimized
     * Our avro serializer is faster (1.8 times) than Java serialization for 100000.
     *
     * Our avro deserializer is faster (2.7 times) than Java serialization for 100000.
     *
     * The message size is 4.4 times smaller in Avro
     *
     * (the schema is not part of the message)
     *
     * With 1000000:
     * Java serialization: 55786
     * Java message size: 4094
     * Java deserialization: 160764
     * Avro serialization: 30430
     * Avro message size: 929
     * Avro deserialization: 59255
     *
     * 20110826
     * Our avro serializer is faster (1.7 times) than Java serialization for 100000.
     *
     * Our avro deserializer is faster (2.7 times) than Java serialization for 100000.
     *
     * The message size is 6.6 times smaller in Avro
     *
     * (the schema is not part of the message)
     *
     * With 1000000:
     * Java serialization: 52682
     * Java message size: 4094
     * Java de-serialization: 168595
     * Avro serialization: 30586
     * Avro message size: 617
     * Avro deserialization: 62141
     */
    public void testAvroSerializationPerf() throws Exception {
        final int loop = 10; //TODO do 10000 or 100000
        LuceneWorkSerializer converter = new PluggableSerializationLuceneWorkSerializer(
                new AvroSerializationProvider(), getSearchFactoryImpl());
        List<LuceneWork> works = buildWorks();

        long begin;
        long end;
        byte[] javaBytes = null;
        begin = System.nanoTime();
        for (int i = 0; i < loop; i++) {
            javaBytes = SerializationHelper.toByteArray((Serializable) works);
        }
        end = System.nanoTime();
        log.debug("Java serialization: " + ((end - begin) / 1000000));
        log.debug("Java message size: " + javaBytes.length);

        begin = System.nanoTime();

        List<LuceneWork> copyOfWorkForJavaSerial = null;
        for (int i = 0; i < loop; i++) {
            copyOfWorkForJavaSerial = (List<LuceneWork>) SerializationHelper.toSerializable(javaBytes,
                    Thread.currentThread().getContextClassLoader());
        }
        end = System.nanoTime();
        log.debug("Java de-serialization: " + ((end - begin) / 1000000));

        byte[] avroBytes = null;
        begin = System.nanoTime();
        for (int i = 0; i < loop; i++) {
            avroBytes = converter.toSerializedModel(works);
        }
        end = System.nanoTime();
        log.debug("Avro serialization: " + ((end - begin) / 1000000));
        log.debug("Avro message size: " + avroBytes.length);

        List<LuceneWork> copyOfWorks = null;
        begin = System.nanoTime();
        for (int i = 0; i < loop; i++) {
            copyOfWorks = converter.toLuceneWorks(avroBytes);
        }
        end = System.nanoTime();
        log.debug("Avro deserialization: " + ((end - begin) / 1000000));

        //make sure the compiler does not cheat
        log.debug(copyOfWorks == copyOfWorkForJavaSerial);

    }

    private List<LuceneWork> buildWorks() throws Exception {
        List<LuceneWork> works = new ArrayList<LuceneWork>();
        works.add(new OptimizeLuceneWork());
        works.add(new OptimizeLuceneWork());
        works.add(new OptimizeLuceneWork(RemoteEntity.class)); //class won't be send over
        works.add(new PurgeAllLuceneWork(RemoteEntity.class));
        works.add(new PurgeAllLuceneWork(RemoteEntity.class));
        works.add(new DeleteLuceneWork(123l, "123", RemoteEntity.class));
        works.add(new DeleteLuceneWork("Sissi", "Sissi", RemoteEntity.class));
        works.add(new DeleteLuceneWork(new URL("http://emmanuelbernard.com"), "http://emmanuelbernard.com",
                RemoteEntity.class));

        Document doc = new Document();
        doc.setBoost(2.3f);
        NumericField numField = new NumericField("double", 23, Field.Store.NO, true);
        numField.setDoubleValue(23d);
        numField.setOmitNorms(true);
        numField.setOmitTermFreqAndPositions(true);
        numField.setBoost(3f);
        doc.add(numField);
        numField = new NumericField("int", 23, Field.Store.NO, true);
        numField.setIntValue(23);
        doc.add(numField);
        numField = new NumericField("float", 23, Field.Store.NO, true);
        numField.setFloatValue(2.3f);
        doc.add(numField);
        numField = new NumericField("long", 23, Field.Store.NO, true);
        numField.setLongValue(23l);
        doc.add(numField);

        Map<String, String> analyzers = new HashMap<String, String>();
        analyzers.put("godo", "ngram");
        works.add(new AddLuceneWork(123, "123", RemoteEntity.class, doc, analyzers));

        doc = new Document();
        doc.setBoost(2.3f);
        Field field = new Field("StringF", "String field", Field.Store.YES, Field.Index.ANALYZED,
                Field.TermVector.WITH_OFFSETS);
        field.setOmitNorms(true);
        field.setOmitTermFreqAndPositions(true);
        field.setBoost(3f);
        doc.add(field);

        field = new Field("StringF2", "String field 2", Field.Store.YES, Field.Index.ANALYZED,
                Field.TermVector.WITH_OFFSETS);
        doc.add(field);

        byte[] array = new byte[4];
        array[0] = 2;
        array[1] = 5;
        array[2] = 5;
        array[3] = 8;
        field = new Field("binary", array, 0, array.length);
        doc.add(field);

        SerializableStringReader reader = new SerializableStringReader();
        field = new Field("ReaderField", reader, Field.TermVector.WITH_OFFSETS);
        doc.add(field);

        List<List<AttributeImpl>> tokens = buildTokenSteamWithAttributes();

        CopyTokenStream tokenStream = new CopyTokenStream(tokens);
        field = new Field("tokenstream", tokenStream, Field.TermVector.WITH_POSITIONS_OFFSETS);
        field.setOmitNorms(true);
        field.setOmitTermFreqAndPositions(true);
        field.setBoost(3f);
        doc.add(field);

        works.add(new UpdateLuceneWork(1234, "1234", RemoteEntity.class, doc));
        works.add(new AddLuceneWork(125, "125", RemoteEntity.class, new Document()));
        return works;
    }

    private List<List<AttributeImpl>> buildTokenSteamWithAttributes() {
        List<List<AttributeImpl>> tokens = new ArrayList<List<AttributeImpl>>();
        tokens.add(new ArrayList<AttributeImpl>());
        AnalysisRequestHandlerBase.TokenTrackingAttributeImpl attrImpl = new AnalysisRequestHandlerBase.TokenTrackingAttributeImpl();
        attrImpl.reset(new int[] { 1, 2, 3 }, 4);
        tokens.get(0).add(attrImpl);

        CharTermAttributeImpl charAttr = new CharTermAttributeImpl();
        charAttr.append("Wazzza");
        tokens.get(0).add(charAttr);

        PayloadAttributeImpl payloadAttribute = new PayloadAttributeImpl();
        payloadAttribute.setPayload(new Payload(new byte[] { 0, 1, 2, 3 }));
        tokens.get(0).add(payloadAttribute);

        KeywordAttributeImpl keywordAttr = new KeywordAttributeImpl();
        keywordAttr.setKeyword(true);
        tokens.get(0).add(keywordAttr);

        PositionIncrementAttributeImpl posIncrAttr = new PositionIncrementAttributeImpl();
        posIncrAttr.setPositionIncrement(3);
        tokens.get(0).add(posIncrAttr);

        FlagsAttributeImpl flagsAttr = new FlagsAttributeImpl();
        flagsAttr.setFlags(435);
        tokens.get(0).add(flagsAttr);

        TypeAttributeImpl typeAttr = new TypeAttributeImpl();
        typeAttr.setType("acronym");
        tokens.get(0).add(typeAttr);

        OffsetAttributeImpl offsetAttr = new OffsetAttributeImpl();
        offsetAttr.setOffset(4, 7);
        tokens.get(0).add(offsetAttr);
        return tokens;
    }

    private void assertLuceneWork(LuceneWork work, LuceneWork copy) {
        assertThat(copy).isInstanceOf(work.getClass());
        if (work instanceof OptimizeLuceneWork) {
            assertNotNull(copy);
            assertTrue(copy instanceof OptimizeLuceneWork);
        } else if (work instanceof PurgeAllLuceneWork) {
            assertPurgeAll((PurgeAllLuceneWork) work, (PurgeAllLuceneWork) copy);
        } else if (work instanceof DeleteLuceneWork) {
            assertDelete((DeleteLuceneWork) work, (DeleteLuceneWork) copy);
        } else if (work instanceof AddLuceneWork) {
            assertAdd((AddLuceneWork) work, (AddLuceneWork) copy);
        } else if (work instanceof UpdateLuceneWork) {
            assertUpdate((UpdateLuceneWork) work, (UpdateLuceneWork) copy);
        } else {
            fail("unexpected type");
        }
    }

    private void assertAdd(AddLuceneWork work, AddLuceneWork copy) {
        assertThat(work.getEntityClass()).as("Add.getEntityClass is not copied").isEqualTo(copy.getEntityClass());
        assertThat(work.getId()).as("Add.getId is not copied").isEqualTo(copy.getId());
        assertThat(work.getIdInString()).as("Add.getIdInString is not the same").isEqualTo(copy.getIdInString());
        assertThat(work.getFieldToAnalyzerMap()).as("Add.getFieldToAnalyzerMap is not the same")
                .isEqualTo(copy.getFieldToAnalyzerMap());
        assertDocument(work.getDocument(), copy.getDocument());
    }

    private void assertUpdate(UpdateLuceneWork work, UpdateLuceneWork copy) {
        assertThat(work.getEntityClass()).as("Add.getEntityClass is not copied").isEqualTo(copy.getEntityClass());
        assertThat(work.getId()).as("Add.getId is not copied").isEqualTo(copy.getId());
        assertThat(work.getIdInString()).as("Add.getIdInString is not the same").isEqualTo(copy.getIdInString());
        assertThat(work.getFieldToAnalyzerMap()).as("Add.getFieldToAnalyzerMap is not the same")
                .isEqualTo(copy.getFieldToAnalyzerMap());
        assertDocument(work.getDocument(), copy.getDocument());
    }

    private void assertDocument(Document document, Document copy) {
        assertThat(document.getBoost()).isEqualTo(copy.getBoost());
        for (int index = 0; index < document.getFields().size(); index++) {
            Fieldable field = document.getFields().get(index);
            Fieldable fieldCopy = copy.getFields().get(index);
            assertThat(field).isInstanceOf(fieldCopy.getClass());
            if (field instanceof NumericField) {
                assertNumericField((NumericField) field, (NumericField) fieldCopy);
            } else if (field instanceof Field) {
                assertNormalField((Field) field, (Field) fieldCopy);
            }
        }
    }

    private void assertNormalField(Field field, Field copy) {
        assertThat(copy.name()).isEqualTo(field.name());
        assertThat(copy.getBinaryLength()).isEqualTo(field.getBinaryLength());
        assertThat(copy.getBinaryOffset()).isEqualTo(field.getBinaryOffset());
        assertThat(copy.getBinaryValue()).isEqualTo(field.getBinaryValue());
        assertThat(copy.getBoost()).isEqualTo(field.getBoost());
        assertThat(copy.getOmitNorms()).isEqualTo(field.getOmitNorms());
        assertThat(copy.getOmitTermFreqAndPositions()).isEqualTo(field.getOmitTermFreqAndPositions());
        assertThat(copy.isBinary()).isEqualTo(field.isBinary());
        assertThat(copy.isIndexed()).isEqualTo(field.isIndexed());
        assertThat(copy.isLazy()).isEqualTo(field.isLazy());
        assertThat(copy.isStoreOffsetWithTermVector()).isEqualTo(field.isStoreOffsetWithTermVector());
        assertThat(copy.isStorePositionWithTermVector()).isEqualTo(field.isStorePositionWithTermVector());
        assertThat(copy.isStored()).isEqualTo(field.isStored());
        assertThat(copy.isTokenized()).isEqualTo(field.isTokenized());
        assertThat(compareReaders(copy.readerValue(), field.readerValue())).isTrue();
        assertThat(compareTokenStreams(field.tokenStreamValue(), copy.tokenStreamValue())).isTrue();
        assertThat(copy.stringValue()).isEqualTo(field.stringValue());

        assertThat(copy.isTermVectorStored()).isEqualTo(field.isTermVectorStored());
    }

    private boolean compareTokenStreams(TokenStream original, TokenStream copy) {
        if (original == null) {
            return copy == null;
        }
        try {
            original.reset();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        SerializableTokenStream serOriginal = CopyTokenStream.buildSerializabletokenStream(original);
        SerializableTokenStream serCopy = CopyTokenStream.buildSerializabletokenStream(copy);
        if (serOriginal.getStream().size() != serCopy.getStream().size()) {
            return false;
        }
        for (int i = 0; i < serOriginal.getStream().size(); i++) {
            List<AttributeImpl> origToken = serOriginal.getStream().get(i);
            List<AttributeImpl> copyToken = serCopy.getStream().get(i);
            if (origToken.size() != copyToken.size()) {
                return false;
            }
            for (int j = 0; j < origToken.size(); j++) {
                AttributeImpl origAttr = origToken.get(j);
                AttributeImpl copyAttr = copyToken.get(j);
                if (origAttr.getClass() != copyAttr.getClass()) {
                    return false;
                }
                testAttributeTypes(origAttr, copyAttr);
            }
        }
        return true;
    }

    private void testAttributeTypes(AttributeImpl origAttr, AttributeImpl copyAttr) {
        if (origAttr instanceof AnalysisRequestHandlerBase.TokenTrackingAttributeImpl) {
            assertThat(((AnalysisRequestHandlerBase.TokenTrackingAttributeImpl) origAttr).getPositions())
                    .isEqualTo(((AnalysisRequestHandlerBase.TokenTrackingAttributeImpl) copyAttr).getPositions());
        } else if (origAttr instanceof CharTermAttribute) {
            assertThat(origAttr.toString()).isEqualTo(copyAttr.toString());
        } else if (origAttr instanceof PayloadAttribute) {
            assertThat(((PayloadAttribute) origAttr).getPayload())
                    .isEqualTo(((PayloadAttribute) copyAttr).getPayload());
        } else if (origAttr instanceof KeywordAttribute) {
            assertThat(((KeywordAttribute) origAttr).isKeyword())
                    .isEqualTo(((KeywordAttribute) copyAttr).isKeyword());
        } else if (origAttr instanceof PositionIncrementAttribute) {
            assertThat(((PositionIncrementAttribute) origAttr).getPositionIncrement())
                    .isEqualTo(((PositionIncrementAttribute) copyAttr).getPositionIncrement());
        } else if (origAttr instanceof FlagsAttribute) {
            assertThat(((FlagsAttribute) origAttr).getFlags()).isEqualTo(((FlagsAttribute) copyAttr).getFlags());
        } else if (origAttr instanceof TypeAttribute) {
            assertThat(((TypeAttribute) origAttr).type()).isEqualTo(((TypeAttribute) copyAttr).type());
        } else if (origAttr instanceof OffsetAttribute) {
            OffsetAttribute orig = (OffsetAttribute) origAttr;
            OffsetAttribute cop = (OffsetAttribute) copyAttr;
            assertThat(orig.startOffset()).isEqualTo(cop.startOffset());
            assertThat(orig.endOffset()).isEqualTo(cop.endOffset());
        }
    }

    private boolean compareReaders(Reader copy, Reader original) {
        if (original == null) {
            return copy == null;
        }
        try {
            for (int o = original.read(); o != -1; o = original.read()) {
                int c = copy.read();
                if (o != c) {
                    return false;
                }
            }
            return copy.read() == -1;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private void assertNumericField(NumericField field, NumericField copy) {
        assertThat(copy.name()).isEqualTo(field.name());
        assertThat(copy.getBinaryLength()).isEqualTo(field.getBinaryLength());
        assertThat(copy.getBinaryOffset()).isEqualTo(field.getBinaryOffset());
        assertThat(copy.getBinaryValue()).isEqualTo(field.getBinaryValue());
        assertThat(copy.getBoost()).isEqualTo(field.getBoost());
        assertThat(copy.getDataType()).isEqualTo(field.getDataType());
        assertThat(copy.getNumericValue()).isEqualTo(field.getNumericValue());
        assertThat(copy.getOmitNorms()).isEqualTo(field.getOmitNorms());
        assertThat(copy.getOmitTermFreqAndPositions()).isEqualTo(field.getOmitTermFreqAndPositions());
        assertThat(copy.getPrecisionStep()).isEqualTo(field.getPrecisionStep());
        assertThat(copy.isBinary()).isEqualTo(field.isBinary());
        assertThat(copy.isIndexed()).isEqualTo(field.isIndexed());
        assertThat(copy.isLazy()).isEqualTo(field.isLazy());
        assertThat(copy.isStoreOffsetWithTermVector()).isEqualTo(field.isStoreOffsetWithTermVector());
        assertThat(copy.isStorePositionWithTermVector()).isEqualTo(field.isStorePositionWithTermVector());
        assertThat(copy.isStored()).isEqualTo(field.isStored());
        assertThat(copy.isTokenized()).isEqualTo(field.isTokenized());
        assertThat(copy.readerValue()).isEqualTo(field.readerValue());
        assertThat(copy.tokenStreamValue()).isEqualTo(field.tokenStreamValue());
        assertThat(copy.stringValue()).isEqualTo(field.stringValue());
    }

    private void assertDelete(DeleteLuceneWork work, DeleteLuceneWork copy) {
        assertThat(work.getEntityClass()).as("Delete.getEntityClass is not copied")
                .isEqualTo(copy.getEntityClass());
        assertThat(work.getId()).as("Delete.getId is not copied").isEqualTo(copy.getId());
        assertThat(work.getDocument()).as("Delete.getDocument is not the same").isEqualTo(copy.getDocument());
        assertThat(work.getIdInString()).as("Delete.getIdInString is not the same").isEqualTo(copy.getIdInString());
        assertThat(work.getFieldToAnalyzerMap()).as("Delete.getFieldToAnalyzerMap is not the same")
                .isEqualTo(copy.getFieldToAnalyzerMap());
    }

    private void assertPurgeAll(PurgeAllLuceneWork work, PurgeAllLuceneWork copy) {
        assertThat(work.getEntityClass()).as("PurgeAllLuceneWork.getEntityClass is not copied")
                .isEqualTo(copy.getEntityClass());
    }

    @Override
    protected Class<?>[] getAnnotatedClasses() {
        return new Class<?>[] { RemoteEntity.class };
    }

    private static class SerializableStringReader extends Reader implements Serializable {
        private boolean read = false;

        @Override
        public int read(char[] cbuf, int off, int len) throws IOException {
            if (read) {
                return -1;
            } else {
                read = true;
                cbuf[off] = 2;
                return 1;
            }
        }

        @Override
        public void close() throws IOException {
        }

    }
}