Example usage for org.apache.hadoop.io.serializer Serializer close

Introduction

In this page you can find the example usage for org.apache.hadoop.io.serializer Serializer close.

Prototype

void close() throws IOException;

Source Link

Document

Close the underlying output stream and clear up any resources.

Usage

From source file:cascading.tuple.hadoop.SerializationElementWriter.java

License:Open Source License

public void close() {
    if (serializers.size() == 0)
        return;/*  w w w.java2s. c  o m*/

    Collection<Serializer> clone = new ArrayList<Serializer>(serializers.values());

    serializers.clear();

    for (Serializer serializer : clone) {
        try {
            serializer.close();
        } catch (IOException exception) {
            // do nothing
        }
    }
}

From source file:com.chinamobile.bcbsp.client.BSPJobClient.java

License:Apache License

/**
 * Write splits./*from   ww  w.  j ava2s.  co m*/
 * @param job BSPJob
 * @param submitSplitFile Path
 * @param <T> org.apache.hadoop.mapreduce.InputSplit
 * @return splitNum the count of split
 */
@SuppressWarnings("unchecked")
private <T extends org.apache.hadoop.mapreduce.InputSplit> int writeSplits(BSPJob job, Path submitSplitFile)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration confs = job.getConf();
    com.chinamobile.bcbsp.io.InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(),
            confs);
    input.initialize(job.getConf());
    List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(job);
    int maxSplits = job.getNumPartition();
    int splitNum = splits.size();
    double factor = splitNum / (float) maxSplits;
    if (factor > 1.0) {
        job.setInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, (int) Math.ceil(factor));
        LOG.info("[Split Adjust Factor] " + (int) Math.ceil(factor));
        LOG.info("[Partition Num] " + maxSplits);
        splits = input.getSplits(job);
        splitNum = splits.size();
    }
    T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);
    // sort the splits into order based on size, so that the biggest
    // go first
    Arrays.sort(array, new NewSplitComparator());
    DataOutputStream out = writeSplitsFileHeader(confs, submitSplitFile, array.length);
    try {
        if (array.length != 0) {
            DataOutputBuffer buffer = new DataOutputBuffer();
            RawSplit rawSplit = new RawSplit();
            SerializationFactory factory = new SerializationFactory(confs);
            Serializer<T> serializer = factory.getSerializer((Class<T>) array[0].getClass());
            serializer.open(buffer);
            for (T split : array) {
                rawSplit.setClassName(split.getClass().getName());
                buffer.reset();
                serializer.serialize(split);
                rawSplit.setDataLength(split.getLength());
                rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
                rawSplit.setLocations(split.getLocations());
                rawSplit.write(out);
            }
            serializer.close();
        }
    } finally {
        out.close();
    }
    return splitNum;
}

From source file:com.datasalt.pangool.serialization.HadoopSerialization.java

License:Apache License

/**
 * Serializes the given object using the Hadoop serialization system.
 *///www . j a  v a2 s . c o  m
public void ser(Object datum, OutputStream output) throws IOException {
    Map<Class, Serializer> serializers = cachedSerializers.get();
    Serializer ser = serializers.get(datum.getClass());
    if (ser == null) {
        ser = serialization.getSerializer(datum.getClass());
        if (ser == null) {
            throw new IOException("Serializer for class " + datum.getClass() + " not found");
        }
        serializers.put(datum.getClass(), ser);
    }
    ser.open(output);
    ser.serialize(datum);
    ser.close();
}

From source file:com.datasalt.pangool.tuplemr.mapred.SerializerComparator.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
public int compare(Object o1, Serializer ser1, Object o2, Serializer ser2) {
    try {/*from w  ww.  j a v  a2 s  .  c o  m*/
        if (o1 == null) {
            return (o2 == null) ? 0 : -1;
        } else if (o2 == null) {
            return 1;
        }

        buf1.reset();
        if (ser1 == null) {
            hadoopSer.ser(o1, buf1);
        } else {
            ser1.open(buf1);
            ser1.serialize(o1);
            ser1.close();
        }
        buf2.reset();
        if (ser2 == null) {
            hadoopSer.ser(o2, buf2);
        } else {
            ser2.open(buf2);
            ser2.serialize(o2);
            ser2.close();
        }

        return WritableComparator.compareBytes(buf1.getData(), 0, buf1.getLength(), buf2.getData(), 0,
                buf2.getLength());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.datasalt.pangool.tuplemr.serialization.SimpleTupleSerializer.java

License:Apache License

private void writeCustomObject(Object element, DataOutput output, Serializer customSer)
        throws CustomObjectSerializationException {
    try {//ww  w. j a  v  a  2 s.  c  o  m
        tmpOutputBuffer.reset();
        if (customSer != null) {
            customSer.open(tmpOutputBuffer);
            customSer.serialize(element);
            customSer.close();
        } else {
            // If no custom serializer defined then use Hadoop Serialization by default
            ser.ser(element, tmpOutputBuffer);
        }
        WritableUtils.writeVInt(output, tmpOutputBuffer.getLength());
        output.write(tmpOutputBuffer.getData(), 0, tmpOutputBuffer.getLength());
    } catch (Throwable e) {
        throw new CustomObjectSerializationException(e);
    }
}

From source file:com.datasalt.pangool.utils.TupleToAvroRecordConverter.java

License:Apache License

/**
 * Moves data between a Tuple and an Avro Record
 *//*from  ww w . j  a  va  2 s.  com*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public Record toRecord(ITuple tuple, Record reuse) throws IOException {
    Record record = reuse;
    if (record == null) {
        record = new Record(avroSchema);
    }
    if (schemaValidation && !tuple.getSchema().equals(pangoolSchema)) {
        throw new IOException("Tuple '" + tuple + "' " + "contains schema not expected." + "Expected schema '"
                + pangoolSchema + " and actual: " + tuple.getSchema());
    }
    for (int i = 0; i < pangoolSchema.getFields().size(); i++) {
        Object obj = tuple.get(i);
        Field field = pangoolSchema.getField(i);
        if (obj == null) {
            throw new IOException("Field '" + field.getName() + "' can't be null in tuple:" + tuple);
        }

        switch (field.getType()) {
        case INT:
        case LONG:
        case FLOAT:
        case BOOLEAN:
        case DOUBLE:
        case BYTES:
            record.put(i, obj); //optimistic
            break;
        case OBJECT:
            Serializer customSer = customSerializers[i];
            DataOutputBuffer buffer = buffers[i];
            buffer.reset();
            if (customSer != null) {
                customSer.open(buffer);
                customSer.serialize(obj);
                customSer.close(); //TODO is this safe ?
            } else {
                hadoopSer.ser(obj, buffer);
            }
            //TODO this byteBuffer instances should be cached and reused
            ByteBuffer byteBuffer = ByteBuffer.wrap(buffer.getData(), 0, buffer.getLength());
            record.put(i, byteBuffer);
            break;
        case ENUM:
            record.put(i, obj.toString());
            break;
        case STRING:
            record.put(i, new Utf8(obj.toString())); //could be directly String ?
            break;
        default:
            throw new IOException("Not correspondence to Avro type from Pangool type " + field.getType());
        }
    }
    return record;
}

From source file:com.datasalt.utils.io.Serialization.java

License:Apache License

public byte[] ser(Object datum) throws IOException {
    Serializer ser = serialization.getSerializer(datum.getClass());
    ByteArrayOutputStream baOs = cachedOutputStream.get();
    baOs.reset();//from   w w  w  .j a  v a2  s. com
    ser.open(baOs);
    ser.serialize(datum);
    ser.close();
    return baOs.toByteArray();

}

From source file:com.datatorrent.demos.mroperator.MapOperator.java

License:Open Source License

@SuppressWarnings("rawtypes")
@Override//w  w  w. j ava  2  s . c  o  m
public Collection<Partition<MapOperator<K1, V1, K2, V2>>> definePartitions(
        Collection<Partition<MapOperator<K1, V1, K2, V2>>> partitions, int incrementalCapacity) {
    Collection c = partitions;
    Collection<Partition<MapOperator<K1, V1, K2, V2>>> operatorPartitions = c;
    Partition<MapOperator<K1, V1, K2, V2>> template = null;
    Iterator<Partition<MapOperator<K1, V1, K2, V2>>> itr = operatorPartitions.iterator();
    template = itr.next();
    Configuration conf = new Configuration();
    SerializationFactory serializationFactory = new SerializationFactory(conf);
    if (outstream.size() == 0) {
        InputSplit[] splits;
        try {
            splits = getSplits(new JobConf(conf), incrementalCapacity + 1,
                    template.getPartitionedInstance().getDirName());
        } catch (Exception e1) {
            logger.info(" can't get splits {}", e1.getMessage());
            throw new RuntimeException(e1);
        }
        Collection<Partition<MapOperator<K1, V1, K2, V2>>> operList = new ArrayList<Partition<MapOperator<K1, V1, K2, V2>>>();
        itr = operatorPartitions.iterator();
        int size = splits.length;
        Serializer keySerializer = serializationFactory.getSerializer(splits[0].getClass());
        while (size > 0 && itr.hasNext()) {
            Partition<MapOperator<K1, V1, K2, V2>> p = itr.next();
            MapOperator<K1, V1, K2, V2> opr = p.getPartitionedInstance();
            opr.setInputFormatClass(inputFormatClass);
            opr.setMapClass(mapClass);
            opr.setCombineClass(combineClass);
            opr.setConfigFile(configFile);
            try {
                keySerializer.open(opr.getOutstream());
                keySerializer.serialize(splits[size - 1]);
                opr.setInputSplitClass(splits[size - 1].getClass());
            } catch (IOException e) {
                logger.info("error while serializing {}", e.getMessage());
            }
            size--;
            operList.add(p);
        }
        while (size > 0) {
            MapOperator<K1, V1, K2, V2> opr = new MapOperator<K1, V1, K2, V2>();
            opr.setInputFormatClass(inputFormatClass);
            opr.setMapClass(mapClass);
            opr.setCombineClass(combineClass);
            opr.setConfigFile(configFile);
            try {
                keySerializer.open(opr.getOutstream());
                keySerializer.serialize(splits[size - 1]);
                opr.setInputSplitClass(splits[size - 1].getClass());
            } catch (IOException e) {
                logger.info("error while serializing {}", e.getMessage());
            }
            size--;
            operList.add(new DefaultPartition<MapOperator<K1, V1, K2, V2>>(opr));
        }
        try {
            keySerializer.close();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        return operList;
    }
    return null;
}

From source file:com.datatorrent.demos.mroperator.MapOperatorTest.java

License:Open Source License

public void testNodeProcessingSchema(MapOperator<LongWritable, Text, Text, IntWritable> oper)
        throws IOException {

    CollectorTestSink sortSink = new CollectorTestSink();
    oper.output.setSink(sortSink);/*ww w.  j  av  a 2s  .  c o  m*/

    oper.setMapClass(WordCount.Map.class);
    oper.setCombineClass(WordCount.Reduce.class);
    oper.setDirName("src/test/resources/mroperator/");
    oper.setConfigFile(null);
    oper.setInputFormatClass(TextInputFormat.class);

    Configuration conf = new Configuration();
    JobConf jobConf = new JobConf(conf);
    FileInputFormat.setInputPaths(jobConf, new Path("src/test/resources/mroperator/"));
    TextInputFormat inputFormat = new TextInputFormat();
    inputFormat.configure(jobConf);
    InputSplit[] splits = inputFormat.getSplits(jobConf, 1);
    SerializationFactory serializationFactory = new SerializationFactory(conf);
    Serializer keySerializer = serializationFactory.getSerializer(splits[0].getClass());
    keySerializer.open(oper.getOutstream());
    keySerializer.serialize(splits[0]);
    oper.setInputSplitClass(splits[0].getClass());
    keySerializer.close();
    oper.setup(null);
    oper.beginWindow(0);
    oper.emitTuples();
    oper.emitTuples();
    oper.endWindow();
    oper.beginWindow(1);
    oper.emitTuples();
    oper.endWindow();

    Assert.assertEquals("number emitted tuples", 6, sortSink.collectedTuples.size());
    for (Object o : sortSink.collectedTuples) {
        logger.debug(o.toString());
    }
    logger.debug("Done testing round\n");
}

From source file:com.datatorrent.demos.mroperator.OutputCollectorImpl.java

License:Open Source License

private <T> T cloneObj(T t) throws IOException {
    Serializer<T> keySerializer;
    Class<T> keyClass;//from w ww .  j  a  v a2s.c  om
    PipedInputStream pis = new PipedInputStream();
    PipedOutputStream pos = new PipedOutputStream(pis);
    keyClass = (Class<T>) t.getClass();
    keySerializer = serializationFactory.getSerializer(keyClass);
    keySerializer.open(pos);
    keySerializer.serialize(t);
    Deserializer<T> keyDesiralizer = serializationFactory.getDeserializer(keyClass);
    keyDesiralizer.open(pis);
    T clonedArg0 = keyDesiralizer.deserialize(null);
    pos.close();
    pis.close();
    keySerializer.close();
    keyDesiralizer.close();
    return clonedArg0;

}