Example usage for org.apache.hadoop.io.serializer Serializer close

List of usage examples for org.apache.hadoop.io.serializer Serializer close

Introduction

In this page you can find the example usage for org.apache.hadoop.io.serializer Serializer close.

Prototype

void close() throws IOException;

Source Link

Document

Close the underlying output stream and clear up any resources.

Usage

From source file:cascading.tuple.hadoop.SerializationElementWriter.java

License:Open Source License

public void close() {
    if (serializers.size() == 0)
        return;/*  w w w.java2s. c  o m*/

    Collection<Serializer> clone = new ArrayList<Serializer>(serializers.values());

    serializers.clear();

    for (Serializer serializer : clone) {
        try {
            serializer.close();
        } catch (IOException exception) {
            // do nothing
        }
    }
}

From source file:com.chinamobile.bcbsp.client.BSPJobClient.java

License:Apache License

/**
 * Write splits./*from   ww  w.  j ava2s.  co m*/
 * @param job BSPJob
 * @param submitSplitFile Path
 * @param <T> org.apache.hadoop.mapreduce.InputSplit
 * @return splitNum the count of split
 */
@SuppressWarnings("unchecked")
private <T extends org.apache.hadoop.mapreduce.InputSplit> int writeSplits(BSPJob job, Path submitSplitFile)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration confs = job.getConf();
    com.chinamobile.bcbsp.io.InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(),
            confs);
    input.initialize(job.getConf());
    List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(job);
    int maxSplits = job.getNumPartition();
    int splitNum = splits.size();
    double factor = splitNum / (float) maxSplits;
    if (factor > 1.0) {
        job.setInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, (int) Math.ceil(factor));
        LOG.info("[Split Adjust Factor] " + (int) Math.ceil(factor));
        LOG.info("[Partition Num] " + maxSplits);
        splits = input.getSplits(job);
        splitNum = splits.size();
    }
    T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);
    // sort the splits into order based on size, so that the biggest
    // go first
    Arrays.sort(array, new NewSplitComparator());
    DataOutputStream out = writeSplitsFileHeader(confs, submitSplitFile, array.length);
    try {
        if (array.length != 0) {
            DataOutputBuffer buffer = new DataOutputBuffer();
            RawSplit rawSplit = new RawSplit();
            SerializationFactory factory = new SerializationFactory(confs);
            Serializer<T> serializer = factory.getSerializer((Class<T>) array[0].getClass());
            serializer.open(buffer);
            for (T split : array) {
                rawSplit.setClassName(split.getClass().getName());
                buffer.reset();
                serializer.serialize(split);
                rawSplit.setDataLength(split.getLength());
                rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
                rawSplit.setLocations(split.getLocations());
                rawSplit.write(out);
            }
            serializer.close();
        }
    } finally {
        out.close();
    }
    return splitNum;
}

From source file:com.datasalt.pangool.serialization.HadoopSerialization.java

License:Apache License

/**
 * Serializes the given object using the Hadoop serialization system.
 *///www . j a  v a2 s . c o  m
public void ser(Object datum, OutputStream output) throws IOException {
    Map<Class, Serializer> serializers = cachedSerializers.get();
    Serializer ser = serializers.get(datum.getClass());
    if (ser == null) {
        ser = serialization.getSerializer(datum.getClass());
        if (ser == null) {
            throw new IOException("Serializer for class " + datum.getClass() + " not found");
        }
        serializers.put(datum.getClass(), ser);
    }
    ser.open(output);
    ser.serialize(datum);
    ser.close();
}

From source file:com.datasalt.pangool.tuplemr.mapred.SerializerComparator.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
public int compare(Object o1, Serializer ser1, Object o2, Serializer ser2) {
    try {/*from w  ww.  j a v  a2 s  .  c o  m*/
        if (o1 == null) {
            return (o2 == null) ? 0 : -1;
        } else if (o2 == null) {
            return 1;
        }

        buf1.reset();
        if (ser1 == null) {
            hadoopSer.ser(o1, buf1);
        } else {
            ser1.open(buf1);
            ser1.serialize(o1);
            ser1.close();
        }
        buf2.reset();
        if (ser2 == null) {
            hadoopSer.ser(o2, buf2);
        } else {
            ser2.open(buf2);
            ser2.serialize(o2);
            ser2.close();
        }

        return WritableComparator.compareBytes(buf1.getData(), 0, buf1.getLength(), buf2.getData(), 0,
                buf2.getLength());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.datasalt.pangool.tuplemr.serialization.SimpleTupleSerializer.java

License:Apache License

private void writeCustomObject(Object element, DataOutput output, Serializer customSer)
        throws CustomObjectSerializationException {
    try {//ww  w. j a  v  a  2 s.  c  o  m
        tmpOutputBuffer.reset();
        if (customSer != null) {
            customSer.open(tmpOutputBuffer);
            customSer.serialize(element);
            customSer.close();
        } else {
            // If no custom serializer defined then use Hadoop Serialization by default
            ser.ser(element, tmpOutputBuffer);
        }
        WritableUtils.writeVInt(output, tmpOutputBuffer.getLength());
        output.write(tmpOutputBuffer.getData(), 0, tmpOutputBuffer.getLength());
    } catch (Throwable e) {
        throw new CustomObjectSerializationException(e);
    }
}

From source file:com.datasalt.pangool.utils.TupleToAvroRecordConverter.java

License:Apache License

/**
 * Moves data between a Tuple and an Avro Record
 *//*from  ww w . j  a  va  2 s.  com*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public Record toRecord(ITuple tuple, Record reuse) throws IOException {
    Record record = reuse;
    if (record == null) {
        record = new Record(avroSchema);
    }
    if (schemaValidation && !tuple.getSchema().equals(pangoolSchema)) {
        throw new IOException("Tuple '" + tuple + "' " + "contains schema not expected." + "Expected schema '"
                + pangoolSchema + " and actual: " + tuple.getSchema());
    }
    for (int i = 0; i < pangoolSchema.getFields().size(); i++) {
        Object obj = tuple.get(i);
        Field field = pangoolSchema.getField(i);
        if (obj == null) {
            throw new IOException("Field '" + field.getName() + "' can't be null in tuple:" + tuple);
        }

        switch (field.getType()) {
        case INT:
        case LONG:
        case FLOAT:
        case BOOLEAN:
        case DOUBLE:
        case BYTES:
            record.put(i, obj); //optimistic
            break;
        case OBJECT:
            Serializer customSer = customSerializers[i];
            DataOutputBuffer buffer = buffers[i];
            buffer.reset();
            if (customSer != null) {
                customSer.open(buffer);
                customSer.serialize(obj);
                customSer.close(); //TODO is this safe ?
            } else {
                hadoopSer.ser(obj, buffer);
            }
            //TODO this byteBuffer instances should be cached and reused
            ByteBuffer byteBuffer = ByteBuffer.wrap(buffer.getData(), 0, buffer.getLength());
            record.put(i, byteBuffer);
            break;
        case ENUM:
            record.put(i, obj.toString());
            break;
        case STRING:
            record.put(i, new Utf8(obj.toString())); //could be directly String ?
            break;
        default:
            throw new IOException("Not correspondence to Avro type from Pangool type " + field.getType());
        }
    }
    return record;
}

From source file:com.datasalt.utils.io.Serialization.java

License:Apache License

public byte[] ser(Object datum) throws IOException {
    Serializer ser = serialization.getSerializer(datum.getClass());
    ByteArrayOutputStream baOs = cachedOutputStream.get();
    baOs.reset();//from   w w  w  .j a  v a2  s. com
    ser.open(baOs);
    ser.serialize(datum);
    ser.close();
    return baOs.toByteArray();

}

From source file:com.datatorrent.demos.mroperator.MapOperator.java

License:Open Source License

@SuppressWarnings("rawtypes")
@Override//w  w  w. j ava  2  s . c  o  m
public Collection<Partition<MapOperator<K1, V1, K2, V2>>> definePartitions(
        Collection<Partition<MapOperator<K1, V1, K2, V2>>> partitions, int incrementalCapacity) {
    Collection c = partitions;
    Collection<Partition<MapOperator<K1, V1, K2, V2>>> operatorPartitions = c;
    Partition<MapOperator<K1, V1, K2, V2>> template = null;
    Iterator<Partition<MapOperator<K1, V1, K2, V2>>> itr = operatorPartitions.iterator();
    template = itr.next();
    Configuration conf = new Configuration();
    SerializationFactory serializationFactory = new SerializationFactory(conf);
    if (outstream.size() == 0) {
        InputSplit[] splits;
        try {
            splits = getSplits(new JobConf(conf), incrementalCapacity + 1,
                    template.getPartitionedInstance().getDirName());
        } catch (Exception e1) {
            logger.info(" can't get splits {}", e1.getMessage());
            throw new RuntimeException(e1);
        }
        Collection<Partition<MapOperator<K1, V1, K2, V2>>> operList = new ArrayList<Partition<MapOperator<K1, V1, K2, V2>>>();
        itr = operatorPartitions.iterator();
        int size = splits.length;
        Serializer keySerializer = serializationFactory.getSerializer(splits[0].getClass());
        while (size > 0 && itr.hasNext()) {
            Partition<MapOperator<K1, V1, K2, V2>> p = itr.next();
            MapOperator<K1, V1, K2, V2> opr = p.getPartitionedInstance();
            opr.setInputFormatClass(inputFormatClass);
            opr.setMapClass(mapClass);
            opr.setCombineClass(combineClass);
            opr.setConfigFile(configFile);
            try {
                keySerializer.open(opr.getOutstream());
                keySerializer.serialize(splits[size - 1]);
                opr.setInputSplitClass(splits[size - 1].getClass());
            } catch (IOException e) {
                logger.info("error while serializing {}", e.getMessage());
            }
            size--;
            operList.add(p);
        }
        while (size > 0) {
            MapOperator<K1, V1, K2, V2> opr = new MapOperator<K1, V1, K2, V2>();
            opr.setInputFormatClass(inputFormatClass);
            opr.setMapClass(mapClass);
            opr.setCombineClass(combineClass);
            opr.setConfigFile(configFile);
            try {
                keySerializer.open(opr.getOutstream());
                keySerializer.serialize(splits[size - 1]);
                opr.setInputSplitClass(splits[size - 1].getClass());
            } catch (IOException e) {
                logger.info("error while serializing {}", e.getMessage());
            }
            size--;
            operList.add(new DefaultPartition<MapOperator<K1, V1, K2, V2>>(opr));
        }
        try {
            keySerializer.close();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        return operList;
    }
    return null;
}

From source file:com.datatorrent.demos.mroperator.MapOperatorTest.java

License:Open Source License

public void testNodeProcessingSchema(MapOperator<LongWritable, Text, Text, IntWritable> oper)
        throws IOException {

    CollectorTestSink sortSink = new CollectorTestSink();
    oper.output.setSink(sortSink);/*ww w.  j  av  a 2s  .  c o  m*/

    oper.setMapClass(WordCount.Map.class);
    oper.setCombineClass(WordCount.Reduce.class);
    oper.setDirName("src/test/resources/mroperator/");
    oper.setConfigFile(null);
    oper.setInputFormatClass(TextInputFormat.class);

    Configuration conf = new Configuration();
    JobConf jobConf = new JobConf(conf);
    FileInputFormat.setInputPaths(jobConf, new Path("src/test/resources/mroperator/"));
    TextInputFormat inputFormat = new TextInputFormat();
    inputFormat.configure(jobConf);
    InputSplit[] splits = inputFormat.getSplits(jobConf, 1);
    SerializationFactory serializationFactory = new SerializationFactory(conf);
    Serializer keySerializer = serializationFactory.getSerializer(splits[0].getClass());
    keySerializer.open(oper.getOutstream());
    keySerializer.serialize(splits[0]);
    oper.setInputSplitClass(splits[0].getClass());
    keySerializer.close();
    oper.setup(null);
    oper.beginWindow(0);
    oper.emitTuples();
    oper.emitTuples();
    oper.endWindow();
    oper.beginWindow(1);
    oper.emitTuples();
    oper.endWindow();

    Assert.assertEquals("number emitted tuples", 6, sortSink.collectedTuples.size());
    for (Object o : sortSink.collectedTuples) {
        logger.debug(o.toString());
    }
    logger.debug("Done testing round\n");
}

From source file:com.datatorrent.demos.mroperator.OutputCollectorImpl.java

License:Open Source License

private <T> T cloneObj(T t) throws IOException {
    Serializer<T> keySerializer;
    Class<T> keyClass;//from w ww .  j  a  v a2s.c  om
    PipedInputStream pis = new PipedInputStream();
    PipedOutputStream pos = new PipedOutputStream(pis);
    keyClass = (Class<T>) t.getClass();
    keySerializer = serializationFactory.getSerializer(keyClass);
    keySerializer.open(pos);
    keySerializer.serialize(t);
    Deserializer<T> keyDesiralizer = serializationFactory.getDeserializer(keyClass);
    keyDesiralizer.open(pis);
    T clonedArg0 = keyDesiralizer.deserialize(null);
    pos.close();
    pis.close();
    keySerializer.close();
    keyDesiralizer.close();
    return clonedArg0;

}