Java tutorial
/* * Hibernate, Relational Persistence for Idiomatic Java * * JBoss, Home of Professional Open Source * Copyright 2011 Red Hat Inc. and/or its affiliates and other contributors * as indicated by the @authors tag. All rights reserved. * See the copyright.txt in the distribution for a * full listing of individual contributors. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions * of the GNU Lesser General Public License, v. 2.1. * This program is distributed in the hope that it will be useful, but WITHOUT A * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. * You should have received a copy of the GNU Lesser General Public License, * v.2.1 along with this distribution; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ package org.hibernate.search.test.serialization; import java.io.IOException; import java.io.Reader; import java.io.Serializable; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.FlagsAttributeImpl; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttributeImpl; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttributeImpl; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttributeImpl; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttributeImpl; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttributeImpl; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.Payload; import org.apache.lucene.util.AttributeImpl; import org.apache.solr.handler.AnalysisRequestHandlerBase; import org.junit.Test; import org.hibernate.search.backend.AddLuceneWork; import org.hibernate.search.backend.DeleteLuceneWork; import org.hibernate.search.backend.LuceneWork; import org.hibernate.search.backend.OptimizeLuceneWork; import org.hibernate.search.backend.PurgeAllLuceneWork; import org.hibernate.search.backend.UpdateLuceneWork; import org.hibernate.search.indexes.serialization.avro.impl.AvroSerializationProvider; import org.hibernate.search.indexes.serialization.impl.CopyTokenStream; import org.hibernate.search.indexes.serialization.impl.PluggableSerializationLuceneWorkSerializer; import org.hibernate.search.indexes.serialization.impl.SerializationHelper; import org.hibernate.search.indexes.serialization.spi.LuceneWorkSerializer; import org.hibernate.search.indexes.serialization.spi.SerializableTokenStream; import org.hibernate.search.test.SearchTestCase; import org.hibernate.search.util.logging.impl.Log; import org.hibernate.search.util.logging.impl.LoggerFactory; import static org.fest.assertions.Assertions.assertThat; /** * @author Emmanuel Bernard <emmanuel@hibernate.org> */ public class SerializationTest extends SearchTestCase { private static final Log log = LoggerFactory.make(); @Test public void testAvroSerialization() throws Exception { LuceneWorkSerializer converter = new PluggableSerializationLuceneWorkSerializer( new AvroSerializationProvider(), getSearchFactoryImpl()); List<LuceneWork> works = buildWorks(); byte[] bytes = converter.toSerializedModel(works); List<LuceneWork> copyOfWorks = converter.toLuceneWorks(bytes); assertThat(copyOfWorks).hasSize(works.size()); for (int index = 0; index < works.size(); index++) { assertLuceneWork(works.get(index), copyOfWorks.get(index)); } } @Test /** * 20110815 * Our avro serializer is slower (1.6) than Java serialization esp when the VM is not warm (small loop value like = 1000 * In evens up on longer loops like 100000 * * Our avro deserializer is slower (2.5) than Java serialization esp when the VM is not warm (small loop value like = 1000 * In evens up or beats the Java serialization on longer loops like 100000 * * Test done after initial implementation (in particular the schema is not part of the message * * With 1000000: * Java serialization: 28730 * Java message size: 2509 * Java deserialization: 82970 * Avro serialization: 24245 * Avro message size: 1064 * Avro deserialization: 54444 * * * 20110824 * The new Work sample is bigger and Avro's layer has been optimized * Our avro serializer is faster (1.8 times) than Java serialization for 100000. * * Our avro deserializer is faster (2.7 times) than Java serialization for 100000. * * The message size is 4.4 times smaller in Avro * * (the schema is not part of the message) * * With 1000000: * Java serialization: 55786 * Java message size: 4094 * Java deserialization: 160764 * Avro serialization: 30430 * Avro message size: 929 * Avro deserialization: 59255 * * 20110826 * Our avro serializer is faster (1.7 times) than Java serialization for 100000. * * Our avro deserializer is faster (2.7 times) than Java serialization for 100000. * * The message size is 6.6 times smaller in Avro * * (the schema is not part of the message) * * With 1000000: * Java serialization: 52682 * Java message size: 4094 * Java de-serialization: 168595 * Avro serialization: 30586 * Avro message size: 617 * Avro deserialization: 62141 */ public void testAvroSerializationPerf() throws Exception { final int loop = 10; //TODO do 10000 or 100000 LuceneWorkSerializer converter = new PluggableSerializationLuceneWorkSerializer( new AvroSerializationProvider(), getSearchFactoryImpl()); List<LuceneWork> works = buildWorks(); long begin; long end; byte[] javaBytes = null; begin = System.nanoTime(); for (int i = 0; i < loop; i++) { javaBytes = SerializationHelper.toByteArray((Serializable) works); } end = System.nanoTime(); log.debug("Java serialization: " + ((end - begin) / 1000000)); log.debug("Java message size: " + javaBytes.length); begin = System.nanoTime(); List<LuceneWork> copyOfWorkForJavaSerial = null; for (int i = 0; i < loop; i++) { copyOfWorkForJavaSerial = (List<LuceneWork>) SerializationHelper.toSerializable(javaBytes, Thread.currentThread().getContextClassLoader()); } end = System.nanoTime(); log.debug("Java de-serialization: " + ((end - begin) / 1000000)); byte[] avroBytes = null; begin = System.nanoTime(); for (int i = 0; i < loop; i++) { avroBytes = converter.toSerializedModel(works); } end = System.nanoTime(); log.debug("Avro serialization: " + ((end - begin) / 1000000)); log.debug("Avro message size: " + avroBytes.length); List<LuceneWork> copyOfWorks = null; begin = System.nanoTime(); for (int i = 0; i < loop; i++) { copyOfWorks = converter.toLuceneWorks(avroBytes); } end = System.nanoTime(); log.debug("Avro deserialization: " + ((end - begin) / 1000000)); //make sure the compiler does not cheat log.debug(copyOfWorks == copyOfWorkForJavaSerial); } private List<LuceneWork> buildWorks() throws Exception { List<LuceneWork> works = new ArrayList<LuceneWork>(); works.add(new OptimizeLuceneWork()); works.add(new OptimizeLuceneWork()); works.add(new OptimizeLuceneWork(RemoteEntity.class)); //class won't be send over works.add(new PurgeAllLuceneWork(RemoteEntity.class)); works.add(new PurgeAllLuceneWork(RemoteEntity.class)); works.add(new DeleteLuceneWork(123l, "123", RemoteEntity.class)); works.add(new DeleteLuceneWork("Sissi", "Sissi", RemoteEntity.class)); works.add(new DeleteLuceneWork(new URL("http://emmanuelbernard.com"), "http://emmanuelbernard.com", RemoteEntity.class)); Document doc = new Document(); doc.setBoost(2.3f); NumericField numField = new NumericField("double", 23, Field.Store.NO, true); numField.setDoubleValue(23d); numField.setOmitNorms(true); numField.setOmitTermFreqAndPositions(true); numField.setBoost(3f); doc.add(numField); numField = new NumericField("int", 23, Field.Store.NO, true); numField.setIntValue(23); doc.add(numField); numField = new NumericField("float", 23, Field.Store.NO, true); numField.setFloatValue(2.3f); doc.add(numField); numField = new NumericField("long", 23, Field.Store.NO, true); numField.setLongValue(23l); doc.add(numField); Map<String, String> analyzers = new HashMap<String, String>(); analyzers.put("godo", "ngram"); works.add(new AddLuceneWork(123, "123", RemoteEntity.class, doc, analyzers)); doc = new Document(); doc.setBoost(2.3f); Field field = new Field("StringF", "String field", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS); field.setOmitNorms(true); field.setOmitTermFreqAndPositions(true); field.setBoost(3f); doc.add(field); field = new Field("StringF2", "String field 2", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS); doc.add(field); byte[] array = new byte[4]; array[0] = 2; array[1] = 5; array[2] = 5; array[3] = 8; field = new Field("binary", array, 0, array.length); doc.add(field); SerializableStringReader reader = new SerializableStringReader(); field = new Field("ReaderField", reader, Field.TermVector.WITH_OFFSETS); doc.add(field); List<List<AttributeImpl>> tokens = buildTokenSteamWithAttributes(); CopyTokenStream tokenStream = new CopyTokenStream(tokens); field = new Field("tokenstream", tokenStream, Field.TermVector.WITH_POSITIONS_OFFSETS); field.setOmitNorms(true); field.setOmitTermFreqAndPositions(true); field.setBoost(3f); doc.add(field); works.add(new UpdateLuceneWork(1234, "1234", RemoteEntity.class, doc)); works.add(new AddLuceneWork(125, "125", RemoteEntity.class, new Document())); return works; } private List<List<AttributeImpl>> buildTokenSteamWithAttributes() { List<List<AttributeImpl>> tokens = new ArrayList<List<AttributeImpl>>(); tokens.add(new ArrayList<AttributeImpl>()); AnalysisRequestHandlerBase.TokenTrackingAttributeImpl attrImpl = new AnalysisRequestHandlerBase.TokenTrackingAttributeImpl(); attrImpl.reset(new int[] { 1, 2, 3 }, 4); tokens.get(0).add(attrImpl); CharTermAttributeImpl charAttr = new CharTermAttributeImpl(); charAttr.append("Wazzza"); tokens.get(0).add(charAttr); PayloadAttributeImpl payloadAttribute = new PayloadAttributeImpl(); payloadAttribute.setPayload(new Payload(new byte[] { 0, 1, 2, 3 })); tokens.get(0).add(payloadAttribute); KeywordAttributeImpl keywordAttr = new KeywordAttributeImpl(); keywordAttr.setKeyword(true); tokens.get(0).add(keywordAttr); PositionIncrementAttributeImpl posIncrAttr = new PositionIncrementAttributeImpl(); posIncrAttr.setPositionIncrement(3); tokens.get(0).add(posIncrAttr); FlagsAttributeImpl flagsAttr = new FlagsAttributeImpl(); flagsAttr.setFlags(435); tokens.get(0).add(flagsAttr); TypeAttributeImpl typeAttr = new TypeAttributeImpl(); typeAttr.setType("acronym"); tokens.get(0).add(typeAttr); OffsetAttributeImpl offsetAttr = new OffsetAttributeImpl(); offsetAttr.setOffset(4, 7); tokens.get(0).add(offsetAttr); return tokens; } private void assertLuceneWork(LuceneWork work, LuceneWork copy) { assertThat(copy).isInstanceOf(work.getClass()); if (work instanceof OptimizeLuceneWork) { assertNotNull(copy); assertTrue(copy instanceof OptimizeLuceneWork); } else if (work instanceof PurgeAllLuceneWork) { assertPurgeAll((PurgeAllLuceneWork) work, (PurgeAllLuceneWork) copy); } else if (work instanceof DeleteLuceneWork) { assertDelete((DeleteLuceneWork) work, (DeleteLuceneWork) copy); } else if (work instanceof AddLuceneWork) { assertAdd((AddLuceneWork) work, (AddLuceneWork) copy); } else if (work instanceof UpdateLuceneWork) { assertUpdate((UpdateLuceneWork) work, (UpdateLuceneWork) copy); } else { fail("unexpected type"); } } private void assertAdd(AddLuceneWork work, AddLuceneWork copy) { assertThat(work.getEntityClass()).as("Add.getEntityClass is not copied").isEqualTo(copy.getEntityClass()); assertThat(work.getId()).as("Add.getId is not copied").isEqualTo(copy.getId()); assertThat(work.getIdInString()).as("Add.getIdInString is not the same").isEqualTo(copy.getIdInString()); assertThat(work.getFieldToAnalyzerMap()).as("Add.getFieldToAnalyzerMap is not the same") .isEqualTo(copy.getFieldToAnalyzerMap()); assertDocument(work.getDocument(), copy.getDocument()); } private void assertUpdate(UpdateLuceneWork work, UpdateLuceneWork copy) { assertThat(work.getEntityClass()).as("Add.getEntityClass is not copied").isEqualTo(copy.getEntityClass()); assertThat(work.getId()).as("Add.getId is not copied").isEqualTo(copy.getId()); assertThat(work.getIdInString()).as("Add.getIdInString is not the same").isEqualTo(copy.getIdInString()); assertThat(work.getFieldToAnalyzerMap()).as("Add.getFieldToAnalyzerMap is not the same") .isEqualTo(copy.getFieldToAnalyzerMap()); assertDocument(work.getDocument(), copy.getDocument()); } private void assertDocument(Document document, Document copy) { assertThat(document.getBoost()).isEqualTo(copy.getBoost()); for (int index = 0; index < document.getFields().size(); index++) { Fieldable field = document.getFields().get(index); Fieldable fieldCopy = copy.getFields().get(index); assertThat(field).isInstanceOf(fieldCopy.getClass()); if (field instanceof NumericField) { assertNumericField((NumericField) field, (NumericField) fieldCopy); } else if (field instanceof Field) { assertNormalField((Field) field, (Field) fieldCopy); } } } private void assertNormalField(Field field, Field copy) { assertThat(copy.name()).isEqualTo(field.name()); assertThat(copy.getBinaryLength()).isEqualTo(field.getBinaryLength()); assertThat(copy.getBinaryOffset()).isEqualTo(field.getBinaryOffset()); assertThat(copy.getBinaryValue()).isEqualTo(field.getBinaryValue()); assertThat(copy.getBoost()).isEqualTo(field.getBoost()); assertThat(copy.getOmitNorms()).isEqualTo(field.getOmitNorms()); assertThat(copy.getOmitTermFreqAndPositions()).isEqualTo(field.getOmitTermFreqAndPositions()); assertThat(copy.isBinary()).isEqualTo(field.isBinary()); assertThat(copy.isIndexed()).isEqualTo(field.isIndexed()); assertThat(copy.isLazy()).isEqualTo(field.isLazy()); assertThat(copy.isStoreOffsetWithTermVector()).isEqualTo(field.isStoreOffsetWithTermVector()); assertThat(copy.isStorePositionWithTermVector()).isEqualTo(field.isStorePositionWithTermVector()); assertThat(copy.isStored()).isEqualTo(field.isStored()); assertThat(copy.isTokenized()).isEqualTo(field.isTokenized()); assertThat(compareReaders(copy.readerValue(), field.readerValue())).isTrue(); assertThat(compareTokenStreams(field.tokenStreamValue(), copy.tokenStreamValue())).isTrue(); assertThat(copy.stringValue()).isEqualTo(field.stringValue()); assertThat(copy.isTermVectorStored()).isEqualTo(field.isTermVectorStored()); } private boolean compareTokenStreams(TokenStream original, TokenStream copy) { if (original == null) { return copy == null; } try { original.reset(); } catch (IOException e) { throw new RuntimeException(e); } SerializableTokenStream serOriginal = CopyTokenStream.buildSerializabletokenStream(original); SerializableTokenStream serCopy = CopyTokenStream.buildSerializabletokenStream(copy); if (serOriginal.getStream().size() != serCopy.getStream().size()) { return false; } for (int i = 0; i < serOriginal.getStream().size(); i++) { List<AttributeImpl> origToken = serOriginal.getStream().get(i); List<AttributeImpl> copyToken = serCopy.getStream().get(i); if (origToken.size() != copyToken.size()) { return false; } for (int j = 0; j < origToken.size(); j++) { AttributeImpl origAttr = origToken.get(j); AttributeImpl copyAttr = copyToken.get(j); if (origAttr.getClass() != copyAttr.getClass()) { return false; } testAttributeTypes(origAttr, copyAttr); } } return true; } private void testAttributeTypes(AttributeImpl origAttr, AttributeImpl copyAttr) { if (origAttr instanceof AnalysisRequestHandlerBase.TokenTrackingAttributeImpl) { assertThat(((AnalysisRequestHandlerBase.TokenTrackingAttributeImpl) origAttr).getPositions()) .isEqualTo(((AnalysisRequestHandlerBase.TokenTrackingAttributeImpl) copyAttr).getPositions()); } else if (origAttr instanceof CharTermAttribute) { assertThat(origAttr.toString()).isEqualTo(copyAttr.toString()); } else if (origAttr instanceof PayloadAttribute) { assertThat(((PayloadAttribute) origAttr).getPayload()) .isEqualTo(((PayloadAttribute) copyAttr).getPayload()); } else if (origAttr instanceof KeywordAttribute) { assertThat(((KeywordAttribute) origAttr).isKeyword()) .isEqualTo(((KeywordAttribute) copyAttr).isKeyword()); } else if (origAttr instanceof PositionIncrementAttribute) { assertThat(((PositionIncrementAttribute) origAttr).getPositionIncrement()) .isEqualTo(((PositionIncrementAttribute) copyAttr).getPositionIncrement()); } else if (origAttr instanceof FlagsAttribute) { assertThat(((FlagsAttribute) origAttr).getFlags()).isEqualTo(((FlagsAttribute) copyAttr).getFlags()); } else if (origAttr instanceof TypeAttribute) { assertThat(((TypeAttribute) origAttr).type()).isEqualTo(((TypeAttribute) copyAttr).type()); } else if (origAttr instanceof OffsetAttribute) { OffsetAttribute orig = (OffsetAttribute) origAttr; OffsetAttribute cop = (OffsetAttribute) copyAttr; assertThat(orig.startOffset()).isEqualTo(cop.startOffset()); assertThat(orig.endOffset()).isEqualTo(cop.endOffset()); } } private boolean compareReaders(Reader copy, Reader original) { if (original == null) { return copy == null; } try { for (int o = original.read(); o != -1; o = original.read()) { int c = copy.read(); if (o != c) { return false; } } return copy.read() == -1; } catch (IOException e) { throw new RuntimeException(e); } } private void assertNumericField(NumericField field, NumericField copy) { assertThat(copy.name()).isEqualTo(field.name()); assertThat(copy.getBinaryLength()).isEqualTo(field.getBinaryLength()); assertThat(copy.getBinaryOffset()).isEqualTo(field.getBinaryOffset()); assertThat(copy.getBinaryValue()).isEqualTo(field.getBinaryValue()); assertThat(copy.getBoost()).isEqualTo(field.getBoost()); assertThat(copy.getDataType()).isEqualTo(field.getDataType()); assertThat(copy.getNumericValue()).isEqualTo(field.getNumericValue()); assertThat(copy.getOmitNorms()).isEqualTo(field.getOmitNorms()); assertThat(copy.getOmitTermFreqAndPositions()).isEqualTo(field.getOmitTermFreqAndPositions()); assertThat(copy.getPrecisionStep()).isEqualTo(field.getPrecisionStep()); assertThat(copy.isBinary()).isEqualTo(field.isBinary()); assertThat(copy.isIndexed()).isEqualTo(field.isIndexed()); assertThat(copy.isLazy()).isEqualTo(field.isLazy()); assertThat(copy.isStoreOffsetWithTermVector()).isEqualTo(field.isStoreOffsetWithTermVector()); assertThat(copy.isStorePositionWithTermVector()).isEqualTo(field.isStorePositionWithTermVector()); assertThat(copy.isStored()).isEqualTo(field.isStored()); assertThat(copy.isTokenized()).isEqualTo(field.isTokenized()); assertThat(copy.readerValue()).isEqualTo(field.readerValue()); assertThat(copy.tokenStreamValue()).isEqualTo(field.tokenStreamValue()); assertThat(copy.stringValue()).isEqualTo(field.stringValue()); } private void assertDelete(DeleteLuceneWork work, DeleteLuceneWork copy) { assertThat(work.getEntityClass()).as("Delete.getEntityClass is not copied") .isEqualTo(copy.getEntityClass()); assertThat(work.getId()).as("Delete.getId is not copied").isEqualTo(copy.getId()); assertThat(work.getDocument()).as("Delete.getDocument is not the same").isEqualTo(copy.getDocument()); assertThat(work.getIdInString()).as("Delete.getIdInString is not the same").isEqualTo(copy.getIdInString()); assertThat(work.getFieldToAnalyzerMap()).as("Delete.getFieldToAnalyzerMap is not the same") .isEqualTo(copy.getFieldToAnalyzerMap()); } private void assertPurgeAll(PurgeAllLuceneWork work, PurgeAllLuceneWork copy) { assertThat(work.getEntityClass()).as("PurgeAllLuceneWork.getEntityClass is not copied") .isEqualTo(copy.getEntityClass()); } @Override protected Class<?>[] getAnnotatedClasses() { return new Class<?>[] { RemoteEntity.class }; } private static class SerializableStringReader extends Reader implements Serializable { private boolean read = false; @Override public int read(char[] cbuf, int off, int len) throws IOException { if (read) { return -1; } else { read = true; cbuf[off] = 2; return 1; } } @Override public void close() throws IOException { } } }