Example usage for org.apache.hadoop.io.serializer Deserializer deserialize

List of usage examples for org.apache.hadoop.io.serializer Deserializer deserialize

Introduction

In this page you can find the example usage for org.apache.hadoop.io.serializer Deserializer deserialize.

Prototype

T deserialize(T t) throws IOException;

Source Link

Document

Deserialize the next object from the underlying input stream.

Usage

From source file:cascading.tuple.hadoop.SerializationElementReader.java

License:Open Source License

public Object read(int token, DataInputStream inputStream) throws IOException {
    String className = getClassNameFor(token, inputStream);
    Deserializer deserializer = getDeserializerFor(inputStream, className);

    Object foundObject = null;// w  w w .ja  v a 2s  . com
    Object object = null;

    try {
        object = deserializer.deserialize(foundObject);
    } catch (IOException exception) {
        LOG.error("failed deserializing token: " + token + " with classname: " + className, exception);

        throw exception;
    }

    return object;
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit.java

License:Apache License

@SuppressWarnings("unchecked")
@Override// ww  w.  ja v  a 2 s  .  c o  m
public void readFields(DataInput in) throws IOException {
    name = Text.readString(in);
    inputSplitClass = (Class<? extends InputSplit>) readClass(in);
    inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in);
    mapperClassName = Text.readString(in);
    inputConfigs = GSON.fromJson(Text.readString(in), STRING_STRING_MAP_TYPE);
    inputSplit = ReflectionUtils.newInstance(inputSplitClass, conf);
    SerializationFactory factory = new SerializationFactory(conf);
    Deserializer deserializer = factory.getDeserializer(inputSplitClass);
    deserializer.open((DataInputStream) in);
    inputSplit = (InputSplit) deserializer.deserialize(inputSplit);
}

From source file:com.ambiata.ivory.operation.hadoop.TaggedInputSplit.java

License:Apache License

@SuppressWarnings("unchecked")
public void readFields(DataInput in) throws IOException {
    inputSplitClass = (Class<? extends InputSplit>) readClass(in);
    inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in);
    mapperClass = (Class<? extends Mapper<?, ?, ?, ?>>) readClass(in);
    inputSplit = (InputSplit) ReflectionUtils.newInstance(inputSplitClass, conf);
    SerializationFactory factory = new SerializationFactory(conf);
    Deserializer deserializer = factory.getDeserializer(inputSplitClass);
    deserializer.open((DataInputStream) in);
    inputSplit = (InputSplit) deserializer.deserialize(inputSplit);
}

From source file:com.baynote.hadoop.TaggedInputSplit.java

License:Apache License

@SuppressWarnings("unchecked")
public void readFields(DataInput in) throws IOException {
    inputSplitClass = (Class<? extends InputSplit>) readClass(in);
    inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in);
    mapperClass = (Class<? extends Mapper<?, ?, ?, ?>>) readClass(in);
    inputSplit = ReflectionUtils.newInstance(inputSplitClass, conf);
    SerializationFactory factory = new SerializationFactory(conf);
    Deserializer deserializer = factory.getDeserializer(inputSplitClass);
    deserializer.open((DataInputStream) in);
    inputSplit = (InputSplit) deserializer.deserialize(inputSplit);
}

From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java

License:Apache License

/**
 * loadData: load data for the staff.//ww  w.j a v  a  2  s  .co m
 * 
 * @param job
 *        BSP job configuration
 * @param workerAgent
 *        Protocol that staff child process uses to contact its parent process
 * @return boolean
 * @throws ClassNotFoundException
 * @throws IOException
 *         e
 * @throws InterruptedException
 *         e
 */
@SuppressWarnings("unchecked")
public boolean loadData(BSPJob job, WorkerAgentProtocol workerAgent, WorkerAgentForStaffInterface aStaffAgent)
        throws ClassNotFoundException, IOException, InterruptedException {
    // rebuild the input split
    RecordReader input = null;
    org.apache.hadoop.mapreduce.InputSplit split = null;
    if (rawSplitClass.equals("no")) {
        input = null;
    } else {

        DataInputBuffer splitBuffer = new DataInputBuffer();
        splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
        SerializationFactory factory = new SerializationFactory(job.getConf());
        Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory
                .getDeserializer(job.getConf().getClassByName(rawSplitClass));
        deserializer.open(splitBuffer);
        split = deserializer.deserialize(null);

        // rebuild the InputFormat class according to the user configuration
        InputFormat inputformat = (InputFormat) ReflectionUtils.newInstance(
                job.getConf().getClass(Constants.USER_BC_BSP_JOB_INPUT_FORMAT_CLASS, InputFormat.class),
                job.getConf());
        inputformat.initialize(job.getConf());
        input = inputformat.createRecordReader(split, job);
        input.initialize(split, job.getConf());
    }
    SuperStepReportContainer ssrc = new SuperStepReportContainer();
    ssrc.setPartitionId(this.getPartition());
    this.numCopy = (int) (1 / (job.getConf().getFloat(Constants.USER_BC_BSP_JOB_BALANCE_FACTOR,
            Constants.USER_BC_BSP_JOB_BALANCE_FACTOR_DEFAULT)));
    ssrc.setNumCopy(numCopy);
    ssrc.setCheckNum(this.staffNum);
    StaffSSControllerInterface lsssc = new StaffSSController(this.getJobId(), this.getSid(), workerAgent);
    long start = System.currentTimeMillis();
    LOG.info("in BCBSP with PartitionType is: Hash" + " start time:" + start);
    if (this.staffNum == 1 || job.getConf().getBoolean(Constants.USER_BC_BSP_JOB_ISDIVIDE, false)) {

        this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance(
                job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class),
                job.getConf());
        this.partitioner.setNumPartition(this.staffNum);
        this.partitioner.intialize(job, split);

        WritePartition writePartition = new NotDivideWritePartition();
        /*
         * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance(
         * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS,
         * RecordParseDefault.class), job .getConf()); recordParse.init(job);
         * //add by chen for null bug this.recordParse = recordParse;
         * //this.recordParse.init(job);
         */
        writePartition.setRecordParse(this.recordParse);
        writePartition.setStaff(this);
        writePartition.write(input);

        ssrc.setDirFlag(new String[] { "1" });
        ssrc.setCheckNum(this.staffNum);
        lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH);

        LOG.info("The number of verteices from other staff" + " that cound not be parsed:" + this.lost);
        LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:"
                + graphData.sizeForAll());

        graphData.finishAdd();
        ssrc.setCheckNum(this.staffNum * 2);
        ssrc.setDirFlag(new String[] { "2" });
        lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH);

    } else {
        this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance(
                job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class),
                job.getConf());

        WritePartition writePartition = (WritePartition) ReflectionUtils.newInstance(job.getConf().getClass(
                Constants.USER_BC_BSP_JOB_WRITEPARTITION_CLASS, HashWritePartition.class), job.getConf());
        int multiple = 1;
        if (writePartition instanceof HashWithBalancerWritePartition) {
            this.partitioner.setNumPartition(this.staffNum * numCopy);
            multiple = 2;
        } else {

            this.partitioner.setNumPartition(this.staffNum);
            multiple = 1;
            if (writePartition instanceof RangeWritePartition) {
                multiple = 2;
            }
        }
        this.partitioner.intialize(job, split);
        /*
         * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance(
         * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS,
         * RecordParseDefault.class), job .getConf()); recordParse.init(job); //
         * this.recordParse = (RecordParse) ReflectionUtils.newInstance( //
         * job.getConf().getClass( // Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS,
         * // RecordParseDefault.class), job.getConf()); //
         * this.recordParse.init(job); this.recordParse = recordParse;
         */
        writePartition.setPartitioner(partitioner);
        writePartition.setRecordParse(this.recordParse);
        writePartition.setStaff(this);
        writePartition.setWorkerAgent(aStaffAgent);
        writePartition.setSsrc(ssrc);
        writePartition.setSssc(lsssc);

        writePartition.setTotalCatchSize(job.getConf().getInt(Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE,
                Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE_DEFAULT));

        int threadNum = job.getConf().getInt(Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER,
                Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER_DEFAULT);
        if (threadNum > this.staffNum) {
            threadNum = this.staffNum - 1;
        }
        writePartition.setSendThreadNum(threadNum);
        writePartition.write(input);

        ssrc.setDirFlag(new String[] { "1" });
        ssrc.setCheckNum(this.staffNum * multiple);
        lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH);

        LOG.info("The number of verteices from other staff that" + " cound not be parsed:" + this.lost);
        LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:"
                + graphData.sizeForAll());

        graphData.finishAdd();

        ssrc.setCheckNum(this.staffNum * (multiple + 1));
        ssrc.setDirFlag(new String[] { "2" });
        lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH);
    }

    long end = System.currentTimeMillis();
    LOG.info("in BCBSP with PartitionType is:HASH" + " end time:" + end);
    LOG.info(
            "in BCBSP with PartitionType is:HASH" + " using time:" + (float) (end - start) / 1000 + " seconds");

    return true;
}

From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java

License:Apache License

/**
 * Rebuid the partition and read data from checkpoint for intializing.
 * //  w ww .j  a v a  2 s . c o  m
 * @param job
 * @param writePartition
 * @throws ClassNotFoundException
 * @throws IOException
 */
private void intializePartitionForRecovery(BSPJob job, WritePartition writePartition)
        throws ClassNotFoundException, IOException {
    this.currentSuperStepCounter = ssc.getNextSuperStepNum();
    LOG.info("Now, this super step count is " + this.currentSuperStepCounter);
    this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance(
            job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class),
            job.getConf());

    if (writePartition instanceof HashWithBalancerWritePartition) {
        this.partitioner.setNumPartition(this.staffNum * numCopy);
    } else {
        this.partitioner.setNumPartition(this.staffNum);
    }
    org.apache.hadoop.mapreduce.InputSplit split = null;
    if (rawSplitClass.equals("no")) {

    } else {

        DataInputBuffer splitBuffer = new DataInputBuffer();
        splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
        SerializationFactory factory = new SerializationFactory(job.getConf());
        Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory
                .getDeserializer(job.getConf().getClassByName(rawSplitClass));
        deserializer.open(splitBuffer);
        split = deserializer.deserialize(null);
    }
    this.partitioner.intialize(job, split);
    displayFirstRoute();
}

From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java

License:Apache License

/**
 * loadData: load data for the staff in SGA-Graph.
 * //from   w ww .java 2  s . c o m
 * @param job
 *        BSP job configuration
 * @param workerAgent
 *        Protocol that staff child process uses to contact its parent process
 * @return boolean
 * @throws ClassNotFoundException
 * @throws IOException
 *         e
 * @throws InterruptedException
 *         e
 */
@SuppressWarnings("unchecked")
public boolean loadDataSGAGraph(BSPJob job, WorkerAgentProtocol workerAgent,
        WorkerAgentForStaffInterface aStaffAgent)
        throws ClassNotFoundException, IOException, InterruptedException {
    // rebuild the input split
    RecordReader input = null;
    org.apache.hadoop.mapreduce.InputSplit split = null;
    if (rawSplitClass.equals("no")) {
        input = null;
    } else {
        DataInputBuffer splitBuffer = new DataInputBuffer();
        splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
        SerializationFactory factory = new SerializationFactory(job.getConf());
        Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory
                .getDeserializer(job.getConf().getClassByName(rawSplitClass));
        deserializer.open(splitBuffer);
        split = deserializer.deserialize(null);

        // rebuild the InputFormat class according to the user configuration
        InputFormat inputformat = (InputFormat) ReflectionUtils.newInstance(
                job.getConf().getClass(Constants.USER_BC_BSP_JOB_INPUT_FORMAT_CLASS, InputFormat.class),
                job.getConf());
        inputformat.initialize(job.getConf());
        input = inputformat.createRecordReader(split, job);
        input.initialize(split, job.getConf());
    }
    SuperStepReportContainer ssrc = new SuperStepReportContainer();
    ssrc.setPartitionId(this.getPartition());
    this.numCopy = (int) (1 / (job.getConf().getFloat(Constants.USER_BC_BSP_JOB_BALANCE_FACTOR,
            Constants.USER_BC_BSP_JOB_BALANCE_FACTOR_DEFAULT)));
    ssrc.setNumCopy(numCopy);
    ssrc.setCheckNum(this.staffNum);
    StaffSSControllerInterface lsssc = new StaffSSController(this.getJobId(), this.getSid(), workerAgent);
    long start = System.currentTimeMillis();
    LOG.info("in BCBSP with PartitionType is: Hash" + " start time:" + start);
    if (this.staffNum == 1 || job.getConf().getBoolean(Constants.USER_BC_BSP_JOB_ISDIVIDE, false)) {

        this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance(
                job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class),
                job.getConf());
        this.partitioner.setNumPartition(this.staffNum);
        this.partitioner.intialize(job, split);

        WritePartition writePartition = new NotDivideWritePartition();
        /*
         * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance(
         * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS,
         * RecordParseDefault.class), job .getConf()); recordParse.init(job);
         * //add by chen for null bug this.recordParse = recordParse;
         * //this.recordParse.init(job);
         */
        writePartition.setRecordParse(this.recordParse);
        writePartition.setStaff(this);
        writePartition.write(input);

        ssrc.setDirFlag(new String[] { "1" });
        ssrc.setCheckNum(this.staffNum);
        lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH);

        LOG.info("The number of verteices from other staff" + " that cound not be parsed:" + this.lost);
        LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:"
                + graphData.sizeForAll());

        graphData.finishAdd();
        ssrc.setCheckNum(this.staffNum * 2);
        ssrc.setDirFlag(new String[] { "2" });
        lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH);

    } else {
        this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance(
                job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class),
                job.getConf());

        WritePartition writePartition = (WritePartition) ReflectionUtils.newInstance(job.getConf().getClass(
                Constants.USER_BC_BSP_JOB_WRITEPARTITION_CLASS, HashWritePartition.class), job.getConf());
        int multiple = 1;
        if (writePartition instanceof HashWithBalancerWritePartition) {
            this.partitioner.setNumPartition(this.staffNum * numCopy);
            multiple = 2;
        } else {

            this.partitioner.setNumPartition(this.staffNum);
            multiple = 1;
            if (writePartition instanceof RangeWritePartition) {
                multiple = 2;
            }
        }
        this.partitioner.intialize(job, split);
        /*
         * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance(
         * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS,
         * RecordParseDefault.class), job .getConf()); recordParse.init(job); //
         * this.recordParse = (RecordParse) ReflectionUtils.newInstance( //
         * job.getConf().getClass( // Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS,
         * // RecordParseDefault.class), job.getConf()); //
         * this.recordParse.init(job); this.recordParse = recordParse;
         */
        writePartition.setPartitioner(partitioner);
        writePartition.setRecordParse(this.recordParse);
        writePartition.setStaff(this);
        writePartition.setWorkerAgent(aStaffAgent);
        writePartition.setSsrc(ssrc);
        writePartition.setSssc(lsssc);

        writePartition.setTotalCatchSize(job.getConf().getInt(Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE,
                Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE_DEFAULT));

        int threadNum = job.getConf().getInt(Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER,
                Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER_DEFAULT);
        if (threadNum > this.staffNum) {
            threadNum = this.staffNum - 1;
        }
        writePartition.setSendThreadNum(threadNum);
        writePartition.write(input);

        ssrc.setDirFlag(new String[] { "1" });
        ssrc.setCheckNum(this.staffNum * multiple);
        lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH);

        LOG.info("The number of verteices from other staff that" + " cound not be parsed:" + this.lost);
        LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:"
                + graphData.sizeForAll());

        graphData.finishAdd();

        ssrc.setCheckNum(this.staffNum * (multiple + 1));
        ssrc.setDirFlag(new String[] { "2" });
        lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH);
        // for input graph evaluate.
        // this.evaluateflag = lsssc.graphEvaluateBarrier(0, this.staffNum,
        // this.graphData.getGlobalFactor());
    }

    long end = System.currentTimeMillis();
    LOG.info("in BCBSP with PartitionType is:HASH" + " end time:" + end);
    LOG.info(
            "in BCBSP with PartitionType is:HASH" + " using time:" + (float) (end - start) / 1000 + " seconds");

    return true;
}

From source file:com.cloudera.crunch.impl.mr.run.CrunchInputSplit.java

License:Apache License

public void readFields(DataInput in) throws IOException {
    nodeIndex = in.readInt();/*from w  ww.  ja v  a 2  s  . c o  m*/
    inputFormatClass = (Class<? extends InputFormat>) readClass(in);
    Class<? extends InputSplit> inputSplitClass = (Class<? extends InputSplit>) readClass(in);
    inputSplit = (InputSplit) ReflectionUtils.newInstance(inputSplitClass, conf);
    SerializationFactory factory = new SerializationFactory(conf);
    Deserializer deserializer = factory.getDeserializer(inputSplitClass);
    deserializer.open((DataInputStream) in);
    inputSplit = (InputSplit) deserializer.deserialize(inputSplit);
}

From source file:com.datasalt.pangool.serialization.HadoopSerialization.java

License:Apache License

/**
 * Deseerializes into the given object using the Hadoop serialization system.
 * Object cannot be null./*from ww w  .  j av  a  2  s  . co  m*/
 */
public <T> T deser(Object obj, InputStream in) throws IOException {
    Map<Class, Deserializer> deserializers = cachedDeserializers.get();
    Deserializer deSer = deserializers.get(obj.getClass());
    if (deSer == null) {
        deSer = serialization.getDeserializer(obj.getClass());
        deserializers.put(obj.getClass(), deSer);
    }
    deSer.open(in);
    obj = deSer.deserialize(obj);
    deSer.close();
    return (T) obj;
}

From source file:com.datasalt.pangool.serialization.HadoopSerialization.java

License:Apache License

/**
 * Return a new instance of the given class with the deserialized data from
 * the input stream./*  w w  w  .j  a  v  a  2s. c  o m*/
 */
public <T> T deser(Class clazz, InputStream in) throws IOException {
    Map<Class, Deserializer> deserializers = cachedDeserializers.get();
    Deserializer deSer = deserializers.get(clazz);
    if (deSer == null) {
        deSer = serialization.getDeserializer(clazz);
        deserializers.put(clazz, deSer);
    }

    deSer.open(in);
    Object obj = deSer.deserialize(null);
    deSer.close();
    return (T) obj;
}