Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.tinkerpop.gremlin.spark.structure.io.gryo; import org.apache.commons.configuration.BaseConfiguration; import org.apache.commons.configuration.Configuration; import org.apache.spark.SparkConf; import org.apache.spark.api.python.PythonBroadcast; import org.apache.spark.broadcast.HttpBroadcast; import org.apache.spark.network.util.ByteUnit; import org.apache.spark.scheduler.CompressedMapStatus; import org.apache.spark.scheduler.HighlyCompressedMapStatus; import org.apache.spark.serializer.Serializer; import org.apache.spark.serializer.SerializerInstance; import org.apache.spark.storage.BlockManagerId; import org.apache.spark.util.SerializableConfiguration; import org.apache.spark.util.collection.CompactBuffer; import org.apache.tinkerpop.gremlin.hadoop.structure.io.ObjectWritable; import org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable; import org.apache.tinkerpop.gremlin.spark.process.computer.payload.MessagePayload; import org.apache.tinkerpop.gremlin.spark.process.computer.payload.ViewIncomingPayload; import org.apache.tinkerpop.gremlin.spark.process.computer.payload.ViewOutgoingPayload; import org.apache.tinkerpop.gremlin.spark.process.computer.payload.ViewPayload; import org.apache.tinkerpop.gremlin.structure.io.gryo.GryoPool; import org.apache.tinkerpop.shaded.kryo.io.Output; import org.apache.tinkerpop.shaded.kryo.serializers.ExternalizableSerializer; import org.apache.tinkerpop.shaded.kryo.serializers.JavaSerializer; import scala.Tuple2; import scala.Tuple3; import scala.collection.mutable.WrappedArray; import scala.runtime.BoxedUnit; import java.util.Collections; /** * @author Marko A. Rodriguez (http://markorodriguez.com) */ public final class GryoSerializer extends Serializer { //private final Option<String> userRegistrator; private final int bufferSize; private final int maxBufferSize; private final GryoPool gryoPool; public GryoSerializer(final SparkConf sparkConfiguration) { final long bufferSizeKb = sparkConfiguration.getSizeAsKb("spark.kryoserializer.buffer", "64k"); final long maxBufferSizeMb = sparkConfiguration.getSizeAsMb("spark.kryoserializer.buffer.max", "64m"); final boolean referenceTracking = sparkConfiguration.getBoolean("spark.kryo.referenceTracking", true); final boolean registrationRequired = sparkConfiguration.getBoolean("spark.kryo.registrationRequired", false); if (bufferSizeKb >= ByteUnit.GiB.toKiB(2L)) { throw new IllegalArgumentException( "spark.kryoserializer.buffer must be less than 2048 mb, got: " + bufferSizeKb + " mb."); } else { this.bufferSize = (int) ByteUnit.KiB.toBytes(bufferSizeKb); if (maxBufferSizeMb >= ByteUnit.GiB.toMiB(2L)) { throw new IllegalArgumentException( "spark.kryoserializer.buffer.max must be less than 2048 mb, got: " + maxBufferSizeMb + " mb."); } else { this.maxBufferSize = (int) ByteUnit.MiB.toBytes(maxBufferSizeMb); //this.userRegistrator = sparkConfiguration.getOption("spark.kryo.registrator"); } } this.gryoPool = GryoPool.build().poolSize(sparkConfiguration.getInt(GryoPool.CONFIG_IO_GRYO_POOL_SIZE, 256)) .ioRegistries(makeApacheConfiguration(sparkConfiguration).getList(GryoPool.CONFIG_IO_REGISTRY, Collections.emptyList())) .initializeMapper(builder -> { try { builder.addCustom(Tuple2.class, new Tuple2Serializer()).addCustom(Tuple2[].class) .addCustom(Tuple3.class, new Tuple3Serializer()).addCustom(Tuple3[].class) .addCustom(CompactBuffer.class, new CompactBufferSerializer()) .addCustom(CompactBuffer[].class).addCustom(CompressedMapStatus.class) .addCustom(BlockManagerId.class) .addCustom(HighlyCompressedMapStatus.class, new ExternalizableSerializer()) // externalizable implemented so its okay .addCustom(HttpBroadcast.class).addCustom(PythonBroadcast.class) .addCustom(BoxedUnit.class) .addCustom(Class.forName("scala.reflect.ClassTag$$anon$1"), new JavaSerializer()) .addCustom(Class.forName("scala.reflect.ManifestFactory$$anon$1"), new JavaSerializer()) .addCustom(WrappedArray.ofRef.class, new WrappedArraySerializer()) .addCustom(MessagePayload.class).addCustom(ViewIncomingPayload.class) .addCustom(ViewOutgoingPayload.class).addCustom(ViewPayload.class) .addCustom(SerializableConfiguration.class, new JavaSerializer()) .addCustom(VertexWritable.class, new VertexWritableSerializer()) .addCustom(ObjectWritable.class, new ObjectWritableSerializer()) .referenceTracking(referenceTracking).registrationRequired(registrationRequired); // add these as we find ClassNotFoundExceptions } catch (final ClassNotFoundException e) { throw new IllegalStateException(e); } }).create(); } public Output newOutput() { return new Output(this.bufferSize, this.maxBufferSize); } public GryoPool getGryoPool() { return this.gryoPool; } @Override public SerializerInstance newInstance() { return new GryoSerializerInstance(this); } private static Configuration makeApacheConfiguration(final SparkConf sparkConfiguration) { final BaseConfiguration apacheConfiguration = new BaseConfiguration(); apacheConfiguration.setDelimiterParsingDisabled(true); for (final Tuple2<String, String> tuple : sparkConfiguration.getAll()) { apacheConfiguration.setProperty(tuple._1(), tuple._2()); } return apacheConfiguration; } }