List of usage examples for org.apache.hadoop.io.serializer Serializer close
void close() throws IOException;
Close the underlying output stream and clear up any resources.
From source file:cascading.tuple.hadoop.SerializationElementWriter.java
License:Open Source License
public void close() { if (serializers.size() == 0) return;/* w w w.java2s. c o m*/ Collection<Serializer> clone = new ArrayList<Serializer>(serializers.values()); serializers.clear(); for (Serializer serializer : clone) { try { serializer.close(); } catch (IOException exception) { // do nothing } } }
From source file:com.chinamobile.bcbsp.client.BSPJobClient.java
License:Apache License
/** * Write splits./*from ww w. j ava2s. co m*/ * @param job BSPJob * @param submitSplitFile Path * @param <T> org.apache.hadoop.mapreduce.InputSplit * @return splitNum the count of split */ @SuppressWarnings("unchecked") private <T extends org.apache.hadoop.mapreduce.InputSplit> int writeSplits(BSPJob job, Path submitSplitFile) throws IOException, InterruptedException, ClassNotFoundException { Configuration confs = job.getConf(); com.chinamobile.bcbsp.io.InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), confs); input.initialize(job.getConf()); List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(job); int maxSplits = job.getNumPartition(); int splitNum = splits.size(); double factor = splitNum / (float) maxSplits; if (factor > 1.0) { job.setInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, (int) Math.ceil(factor)); LOG.info("[Split Adjust Factor] " + (int) Math.ceil(factor)); LOG.info("[Partition Num] " + maxSplits); splits = input.getSplits(job); splitNum = splits.size(); } T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]); // sort the splits into order based on size, so that the biggest // go first Arrays.sort(array, new NewSplitComparator()); DataOutputStream out = writeSplitsFileHeader(confs, submitSplitFile, array.length); try { if (array.length != 0) { DataOutputBuffer buffer = new DataOutputBuffer(); RawSplit rawSplit = new RawSplit(); SerializationFactory factory = new SerializationFactory(confs); Serializer<T> serializer = factory.getSerializer((Class<T>) array[0].getClass()); serializer.open(buffer); for (T split : array) { rawSplit.setClassName(split.getClass().getName()); buffer.reset(); serializer.serialize(split); rawSplit.setDataLength(split.getLength()); rawSplit.setBytes(buffer.getData(), 0, buffer.getLength()); rawSplit.setLocations(split.getLocations()); rawSplit.write(out); } serializer.close(); } } finally { out.close(); } return splitNum; }
From source file:com.datasalt.pangool.serialization.HadoopSerialization.java
License:Apache License
/** * Serializes the given object using the Hadoop serialization system. *///www . j a v a2 s . c o m public void ser(Object datum, OutputStream output) throws IOException { Map<Class, Serializer> serializers = cachedSerializers.get(); Serializer ser = serializers.get(datum.getClass()); if (ser == null) { ser = serialization.getSerializer(datum.getClass()); if (ser == null) { throw new IOException("Serializer for class " + datum.getClass() + " not found"); } serializers.put(datum.getClass(), ser); } ser.open(output); ser.serialize(datum); ser.close(); }
From source file:com.datasalt.pangool.tuplemr.mapred.SerializerComparator.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) public int compare(Object o1, Serializer ser1, Object o2, Serializer ser2) { try {/*from w ww. j a v a2 s . c o m*/ if (o1 == null) { return (o2 == null) ? 0 : -1; } else if (o2 == null) { return 1; } buf1.reset(); if (ser1 == null) { hadoopSer.ser(o1, buf1); } else { ser1.open(buf1); ser1.serialize(o1); ser1.close(); } buf2.reset(); if (ser2 == null) { hadoopSer.ser(o2, buf2); } else { ser2.open(buf2); ser2.serialize(o2); ser2.close(); } return WritableComparator.compareBytes(buf1.getData(), 0, buf1.getLength(), buf2.getData(), 0, buf2.getLength()); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.datasalt.pangool.tuplemr.serialization.SimpleTupleSerializer.java
License:Apache License
private void writeCustomObject(Object element, DataOutput output, Serializer customSer) throws CustomObjectSerializationException { try {//ww w. j a v a 2 s. c o m tmpOutputBuffer.reset(); if (customSer != null) { customSer.open(tmpOutputBuffer); customSer.serialize(element); customSer.close(); } else { // If no custom serializer defined then use Hadoop Serialization by default ser.ser(element, tmpOutputBuffer); } WritableUtils.writeVInt(output, tmpOutputBuffer.getLength()); output.write(tmpOutputBuffer.getData(), 0, tmpOutputBuffer.getLength()); } catch (Throwable e) { throw new CustomObjectSerializationException(e); } }
From source file:com.datasalt.pangool.utils.TupleToAvroRecordConverter.java
License:Apache License
/** * Moves data between a Tuple and an Avro Record *//*from ww w . j a va 2 s. com*/ @SuppressWarnings({ "unchecked", "rawtypes" }) public Record toRecord(ITuple tuple, Record reuse) throws IOException { Record record = reuse; if (record == null) { record = new Record(avroSchema); } if (schemaValidation && !tuple.getSchema().equals(pangoolSchema)) { throw new IOException("Tuple '" + tuple + "' " + "contains schema not expected." + "Expected schema '" + pangoolSchema + " and actual: " + tuple.getSchema()); } for (int i = 0; i < pangoolSchema.getFields().size(); i++) { Object obj = tuple.get(i); Field field = pangoolSchema.getField(i); if (obj == null) { throw new IOException("Field '" + field.getName() + "' can't be null in tuple:" + tuple); } switch (field.getType()) { case INT: case LONG: case FLOAT: case BOOLEAN: case DOUBLE: case BYTES: record.put(i, obj); //optimistic break; case OBJECT: Serializer customSer = customSerializers[i]; DataOutputBuffer buffer = buffers[i]; buffer.reset(); if (customSer != null) { customSer.open(buffer); customSer.serialize(obj); customSer.close(); //TODO is this safe ? } else { hadoopSer.ser(obj, buffer); } //TODO this byteBuffer instances should be cached and reused ByteBuffer byteBuffer = ByteBuffer.wrap(buffer.getData(), 0, buffer.getLength()); record.put(i, byteBuffer); break; case ENUM: record.put(i, obj.toString()); break; case STRING: record.put(i, new Utf8(obj.toString())); //could be directly String ? break; default: throw new IOException("Not correspondence to Avro type from Pangool type " + field.getType()); } } return record; }
From source file:com.datasalt.utils.io.Serialization.java
License:Apache License
public byte[] ser(Object datum) throws IOException { Serializer ser = serialization.getSerializer(datum.getClass()); ByteArrayOutputStream baOs = cachedOutputStream.get(); baOs.reset();//from w w w .j a v a2 s. com ser.open(baOs); ser.serialize(datum); ser.close(); return baOs.toByteArray(); }
From source file:com.datatorrent.demos.mroperator.MapOperator.java
License:Open Source License
@SuppressWarnings("rawtypes") @Override//w w w. j ava 2 s . c o m public Collection<Partition<MapOperator<K1, V1, K2, V2>>> definePartitions( Collection<Partition<MapOperator<K1, V1, K2, V2>>> partitions, int incrementalCapacity) { Collection c = partitions; Collection<Partition<MapOperator<K1, V1, K2, V2>>> operatorPartitions = c; Partition<MapOperator<K1, V1, K2, V2>> template = null; Iterator<Partition<MapOperator<K1, V1, K2, V2>>> itr = operatorPartitions.iterator(); template = itr.next(); Configuration conf = new Configuration(); SerializationFactory serializationFactory = new SerializationFactory(conf); if (outstream.size() == 0) { InputSplit[] splits; try { splits = getSplits(new JobConf(conf), incrementalCapacity + 1, template.getPartitionedInstance().getDirName()); } catch (Exception e1) { logger.info(" can't get splits {}", e1.getMessage()); throw new RuntimeException(e1); } Collection<Partition<MapOperator<K1, V1, K2, V2>>> operList = new ArrayList<Partition<MapOperator<K1, V1, K2, V2>>>(); itr = operatorPartitions.iterator(); int size = splits.length; Serializer keySerializer = serializationFactory.getSerializer(splits[0].getClass()); while (size > 0 && itr.hasNext()) { Partition<MapOperator<K1, V1, K2, V2>> p = itr.next(); MapOperator<K1, V1, K2, V2> opr = p.getPartitionedInstance(); opr.setInputFormatClass(inputFormatClass); opr.setMapClass(mapClass); opr.setCombineClass(combineClass); opr.setConfigFile(configFile); try { keySerializer.open(opr.getOutstream()); keySerializer.serialize(splits[size - 1]); opr.setInputSplitClass(splits[size - 1].getClass()); } catch (IOException e) { logger.info("error while serializing {}", e.getMessage()); } size--; operList.add(p); } while (size > 0) { MapOperator<K1, V1, K2, V2> opr = new MapOperator<K1, V1, K2, V2>(); opr.setInputFormatClass(inputFormatClass); opr.setMapClass(mapClass); opr.setCombineClass(combineClass); opr.setConfigFile(configFile); try { keySerializer.open(opr.getOutstream()); keySerializer.serialize(splits[size - 1]); opr.setInputSplitClass(splits[size - 1].getClass()); } catch (IOException e) { logger.info("error while serializing {}", e.getMessage()); } size--; operList.add(new DefaultPartition<MapOperator<K1, V1, K2, V2>>(opr)); } try { keySerializer.close(); } catch (IOException e) { throw new RuntimeException(e); } return operList; } return null; }
From source file:com.datatorrent.demos.mroperator.MapOperatorTest.java
License:Open Source License
public void testNodeProcessingSchema(MapOperator<LongWritable, Text, Text, IntWritable> oper) throws IOException { CollectorTestSink sortSink = new CollectorTestSink(); oper.output.setSink(sortSink);/*ww w. j av a 2s . c o m*/ oper.setMapClass(WordCount.Map.class); oper.setCombineClass(WordCount.Reduce.class); oper.setDirName("src/test/resources/mroperator/"); oper.setConfigFile(null); oper.setInputFormatClass(TextInputFormat.class); Configuration conf = new Configuration(); JobConf jobConf = new JobConf(conf); FileInputFormat.setInputPaths(jobConf, new Path("src/test/resources/mroperator/")); TextInputFormat inputFormat = new TextInputFormat(); inputFormat.configure(jobConf); InputSplit[] splits = inputFormat.getSplits(jobConf, 1); SerializationFactory serializationFactory = new SerializationFactory(conf); Serializer keySerializer = serializationFactory.getSerializer(splits[0].getClass()); keySerializer.open(oper.getOutstream()); keySerializer.serialize(splits[0]); oper.setInputSplitClass(splits[0].getClass()); keySerializer.close(); oper.setup(null); oper.beginWindow(0); oper.emitTuples(); oper.emitTuples(); oper.endWindow(); oper.beginWindow(1); oper.emitTuples(); oper.endWindow(); Assert.assertEquals("number emitted tuples", 6, sortSink.collectedTuples.size()); for (Object o : sortSink.collectedTuples) { logger.debug(o.toString()); } logger.debug("Done testing round\n"); }
From source file:com.datatorrent.demos.mroperator.OutputCollectorImpl.java
License:Open Source License
private <T> T cloneObj(T t) throws IOException { Serializer<T> keySerializer; Class<T> keyClass;//from w ww . j a v a2s.c om PipedInputStream pis = new PipedInputStream(); PipedOutputStream pos = new PipedOutputStream(pis); keyClass = (Class<T>) t.getClass(); keySerializer = serializationFactory.getSerializer(keyClass); keySerializer.open(pos); keySerializer.serialize(t); Deserializer<T> keyDesiralizer = serializationFactory.getDeserializer(keyClass); keyDesiralizer.open(pis); T clonedArg0 = keyDesiralizer.deserialize(null); pos.close(); pis.close(); keySerializer.close(); keyDesiralizer.close(); return clonedArg0; }