List of usage examples for org.apache.hadoop.io.serializer SerializationFactory getSerialization
@SuppressWarnings("unchecked") public <T> Serialization<T> getSerialization(Class<T> c)
From source file:cz.seznam.euphoria.hadoop.utils.Cloner.java
License:Apache License
/** * Help method retrieving a cloner for given class type from the * given configuration.// w ww . j a v a 2 s .c om * * @param <T> the type of objects the resulting cloner will be able to handle * * @param what the class for which to retrieve a cloner * @param conf the hadoop configuration defining the serializer/deserializer * to utilize for cloning * * @return a cloner instance able to clone objects of the specified type */ static <T> Cloner<T> get(Class<T> what, Configuration conf) { SerializationFactory factory = new SerializationFactory(conf); Serialization<T> serialization = factory.getSerialization(what); if (serialization == null) { // FIXME: if we cannot (de)serialize just do not clone return t -> t; } Deserializer<T> deserializer = serialization.getDeserializer(what); Serializer<T> serializer = serialization.getSerializer(what); return (T elem) -> { try { ByteArrayOutputStream baos = new ByteArrayOutputStream(); serializer.open(baos); serializer.serialize(elem); serializer.close(); byte[] serialized = baos.toByteArray(); ByteArrayInputStream bais = new ByteArrayInputStream(serialized); deserializer.open(bais); T deserialized = deserializer.deserialize(null); deserializer.close(); return deserialized; } catch (IOException ex) { throw new RuntimeException(ex); } }; }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java
License:Apache License
/** * Gets serializer for specified class./*from w w w. jav a 2s .c o m*/ * * @param cls Class. * @param jobConf Job configuration. * @return Appropriate serializer. */ @SuppressWarnings("unchecked") private HadoopSerialization getSerialization(Class<?> cls, Configuration jobConf) throws IgniteCheckedException { A.notNull(cls, "cls"); SerializationFactory factory = new SerializationFactory(jobConf); Serialization<?> serialization = factory.getSerialization(cls); if (serialization == null) throw new IgniteCheckedException("Failed to find serialization for: " + cls.getName()); if (serialization.getClass() == WritableSerialization.class) return new HadoopWritableSerialization((Class<? extends Writable>) cls); return new HadoopSerializationWrapper(serialization, cls); }
From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2TaskContext.java
License:Apache License
/** * Gets serializer for specified class.//w ww .jav a2s . co m * * @param cls Class. * @param jobConf Job configuration. * @return Appropriate serializer. */ @SuppressWarnings("unchecked") private GridHadoopSerialization getSerialization(Class<?> cls, Configuration jobConf) throws IgniteCheckedException { A.notNull(cls, "cls"); SerializationFactory factory = new SerializationFactory(jobConf); Serialization<?> serialization = factory.getSerialization(cls); if (serialization == null) throw new IgniteCheckedException("Failed to find serialization for: " + cls.getName()); if (serialization.getClass() == WritableSerialization.class) return new GridHadoopWritableSerialization((Class<? extends Writable>) cls); return new GridHadoopSerializationWrapper(serialization, cls); }
From source file:org.apache.mahout.hadoop.mapreduce.lib.AvroOutputFormat.java
License:Apache License
public RecordWriter<K, Object> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); SerializationFactory factory = new SerializationFactory(conf); Map<String, String> metadata = getAvroOutputMetadata(context); String className = metadata.get(AvroSerialization.CLASS_KEY); Class<?> clazz = null;//from w ww .j a v a 2s . c om try { clazz = context.getConfiguration().getClassByName(className); } catch (ClassNotFoundException ex) { throw new IOException("Could not get class for output key metadata."); } SerializationBase<K> serialization = (SerializationBase<K>) factory.getSerialization(clazz); // Check to make sure we have an Avro-based serializer. if (null == serialization) { throw new IOException("Could not get serializer for output key metadata."); } else if (!(serialization instanceof AvroSerialization)) { throw new IOException("Output key is not Avro-serializable."); } // Use this to instantiate the appropriate type of DatumWriter. AvroSerialization<K> avroSerialization = (AvroSerialization<K>) serialization; final Schema schema = avroSerialization.getSchema(metadata); final DatumWriter<K> datumWriter = avroSerialization.getWriter(metadata); final OutputStream ostream = createStream(context); final DataFileWriter<K> fileWriter = new DataFileWriter<K>(schema, ostream, datumWriter); return new RecordWriter<K, Object>() { private boolean warnedVal = false; private int recordsFromSync = 0; public void write(K key, Object value) throws IOException { if (!warnedVal && null != value) { LOG.warn("Writing non-null value to AvroOutputFormat: " + "this value will be ignored."); warnedVal = true; } fileWriter.append(key); recordsFromSync++; if (recordsFromSync >= SYNC_DISTANCE) { fileWriter.sync(); recordsFromSync = 0; } } public void close(TaskAttemptContext context) throws IOException { if (null != fileWriter) { fileWriter.close(); } } }; }
From source file:org.apache.mahout.hadoop.mapreduce.lib.AvroRecordReader.java
License:Apache License
/** * Interpret the user-specified metadata to determine the type of DatumReader * to generate./*from ww w . j a v a 2 s .c o m*/ * @param context the context containing the job configuration * @return a DatumReader */ private DatumReader<K> getDatumReader(TaskAttemptContext context) { Map<String, String> metadata = AvroInputFormat.getAvroInputMetadata(context); String className = metadata.get(AvroSerialization.CLASS_KEY); Class<?> clazz = null; try { clazz = context.getConfiguration().getClassByName(className); } catch (ClassNotFoundException ex) { return new GenericDatumReader<K>(); } SerializationFactory factory = new SerializationFactory(context.getConfiguration()); SerializationBase<K> serialization = (SerializationBase<K>) factory.getSerialization(clazz); if (null == serialization) { // metadata is unset or corrupt. Use the generic reader. LOG.warn( "Could not find appropriate serialization for AvroInputFormat;" + " trying GenericDatumReader"); return new GenericDatumReader<K>(); } else if (!(serialization instanceof AvroSerialization)) { // metadata is not avro metadata? LOG.warn("Metadata in " + AvroInputFormat.AVRO_INPUT_METADATA_KEY + " is not avro-serializable. Using GenericDatumReader."); return new GenericDatumReader<K>(); } LOG.info("Got serialization: " + serialization.getClass().getName()); return ((AvroSerialization<K>) serialization).getReader(metadata); }
From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2TaskContext.java
License:Open Source License
/** * Gets serializer for specified class./*from w w w. ja v a 2 s.c o m*/ * * @param cls Class. * @param jobConf Job configuration. * @return Appropriate serializer. */ @SuppressWarnings("unchecked") private GridHadoopSerialization getSerialization(Class<?> cls, Configuration jobConf) throws GridException { A.notNull(cls, "cls"); SerializationFactory factory = new SerializationFactory(jobConf); Serialization<?> serialization = factory.getSerialization(cls); if (serialization == null) throw new GridException("Failed to find serialization for: " + cls.getName()); if (serialization.getClass() == WritableSerialization.class) return new GridHadoopWritableSerialization((Class<? extends Writable>) cls); return new GridHadoopSerializationWrapper(serialization, cls); }