Example usage for org.apache.hadoop.io.serializer Deserializer open

List of usage examples for org.apache.hadoop.io.serializer Deserializer open

Introduction

In this page you can find the example usage for org.apache.hadoop.io.serializer Deserializer open.

Prototype

void open(InputStream in) throws IOException;

Source Link

Document

Prepare the deserializer for reading.

Usage

From source file:cascading.tuple.hadoop.SerializationElementReader.java

License:Open Source License

private Deserializer getDeserializerFor(DataInputStream inputStream, String className) throws IOException {
    Deserializer deserializer = deserializers.get(className);

    if (deserializer == null) {
        deserializer = tupleSerialization.getNewDeserializer(className);
        deserializer.open(inputStream);
        deserializers.put(className, deserializer);
    }//from   w ww.  j a  v  a  2  s  . c o  m

    return deserializer;
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//w w  w. jav a 2 s . c o  m
public void readFields(DataInput in) throws IOException {
    name = Text.readString(in);
    inputSplitClass = (Class<? extends InputSplit>) readClass(in);
    inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in);
    mapperClassName = Text.readString(in);
    inputConfigs = GSON.fromJson(Text.readString(in), STRING_STRING_MAP_TYPE);
    inputSplit = ReflectionUtils.newInstance(inputSplitClass, conf);
    SerializationFactory factory = new SerializationFactory(conf);
    Deserializer deserializer = factory.getDeserializer(inputSplitClass);
    deserializer.open((DataInputStream) in);
    inputSplit = (InputSplit) deserializer.deserialize(inputSplit);
}

From source file:com.ambiata.ivory.operation.hadoop.TaggedInputSplit.java

License:Apache License

@SuppressWarnings("unchecked")
public void readFields(DataInput in) throws IOException {
    inputSplitClass = (Class<? extends InputSplit>) readClass(in);
    inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in);
    mapperClass = (Class<? extends Mapper<?, ?, ?, ?>>) readClass(in);
    inputSplit = (InputSplit) ReflectionUtils.newInstance(inputSplitClass, conf);
    SerializationFactory factory = new SerializationFactory(conf);
    Deserializer deserializer = factory.getDeserializer(inputSplitClass);
    deserializer.open((DataInputStream) in);
    inputSplit = (InputSplit) deserializer.deserialize(inputSplit);
}

From source file:com.baynote.hadoop.TaggedInputSplit.java

License:Apache License

@SuppressWarnings("unchecked")
public void readFields(DataInput in) throws IOException {
    inputSplitClass = (Class<? extends InputSplit>) readClass(in);
    inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in);
    mapperClass = (Class<? extends Mapper<?, ?, ?, ?>>) readClass(in);
    inputSplit = ReflectionUtils.newInstance(inputSplitClass, conf);
    SerializationFactory factory = new SerializationFactory(conf);
    Deserializer deserializer = factory.getDeserializer(inputSplitClass);
    deserializer.open((DataInputStream) in);
    inputSplit = (InputSplit) deserializer.deserialize(inputSplit);
}

From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java

License:Apache License

/**
 * loadData: load data for the staff.//from  w  ww. j  a  va  2 s. c  o  m
 * 
 * @param job
 *        BSP job configuration
 * @param workerAgent
 *        Protocol that staff child process uses to contact its parent process
 * @return boolean
 * @throws ClassNotFoundException
 * @throws IOException
 *         e
 * @throws InterruptedException
 *         e
 */
@SuppressWarnings("unchecked")
public boolean loadData(BSPJob job, WorkerAgentProtocol workerAgent, WorkerAgentForStaffInterface aStaffAgent)
        throws ClassNotFoundException, IOException, InterruptedException {
    // rebuild the input split
    RecordReader input = null;
    org.apache.hadoop.mapreduce.InputSplit split = null;
    if (rawSplitClass.equals("no")) {
        input = null;
    } else {

        DataInputBuffer splitBuffer = new DataInputBuffer();
        splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
        SerializationFactory factory = new SerializationFactory(job.getConf());
        Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory
                .getDeserializer(job.getConf().getClassByName(rawSplitClass));
        deserializer.open(splitBuffer);
        split = deserializer.deserialize(null);

        // rebuild the InputFormat class according to the user configuration
        InputFormat inputformat = (InputFormat) ReflectionUtils.newInstance(
                job.getConf().getClass(Constants.USER_BC_BSP_JOB_INPUT_FORMAT_CLASS, InputFormat.class),
                job.getConf());
        inputformat.initialize(job.getConf());
        input = inputformat.createRecordReader(split, job);
        input.initialize(split, job.getConf());
    }
    SuperStepReportContainer ssrc = new SuperStepReportContainer();
    ssrc.setPartitionId(this.getPartition());
    this.numCopy = (int) (1 / (job.getConf().getFloat(Constants.USER_BC_BSP_JOB_BALANCE_FACTOR,
            Constants.USER_BC_BSP_JOB_BALANCE_FACTOR_DEFAULT)));
    ssrc.setNumCopy(numCopy);
    ssrc.setCheckNum(this.staffNum);
    StaffSSControllerInterface lsssc = new StaffSSController(this.getJobId(), this.getSid(), workerAgent);
    long start = System.currentTimeMillis();
    LOG.info("in BCBSP with PartitionType is: Hash" + " start time:" + start);
    if (this.staffNum == 1 || job.getConf().getBoolean(Constants.USER_BC_BSP_JOB_ISDIVIDE, false)) {

        this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance(
                job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class),
                job.getConf());
        this.partitioner.setNumPartition(this.staffNum);
        this.partitioner.intialize(job, split);

        WritePartition writePartition = new NotDivideWritePartition();
        /*
         * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance(
         * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS,
         * RecordParseDefault.class), job .getConf()); recordParse.init(job);
         * //add by chen for null bug this.recordParse = recordParse;
         * //this.recordParse.init(job);
         */
        writePartition.setRecordParse(this.recordParse);
        writePartition.setStaff(this);
        writePartition.write(input);

        ssrc.setDirFlag(new String[] { "1" });
        ssrc.setCheckNum(this.staffNum);
        lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH);

        LOG.info("The number of verteices from other staff" + " that cound not be parsed:" + this.lost);
        LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:"
                + graphData.sizeForAll());

        graphData.finishAdd();
        ssrc.setCheckNum(this.staffNum * 2);
        ssrc.setDirFlag(new String[] { "2" });
        lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH);

    } else {
        this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance(
                job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class),
                job.getConf());

        WritePartition writePartition = (WritePartition) ReflectionUtils.newInstance(job.getConf().getClass(
                Constants.USER_BC_BSP_JOB_WRITEPARTITION_CLASS, HashWritePartition.class), job.getConf());
        int multiple = 1;
        if (writePartition instanceof HashWithBalancerWritePartition) {
            this.partitioner.setNumPartition(this.staffNum * numCopy);
            multiple = 2;
        } else {

            this.partitioner.setNumPartition(this.staffNum);
            multiple = 1;
            if (writePartition instanceof RangeWritePartition) {
                multiple = 2;
            }
        }
        this.partitioner.intialize(job, split);
        /*
         * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance(
         * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS,
         * RecordParseDefault.class), job .getConf()); recordParse.init(job); //
         * this.recordParse = (RecordParse) ReflectionUtils.newInstance( //
         * job.getConf().getClass( // Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS,
         * // RecordParseDefault.class), job.getConf()); //
         * this.recordParse.init(job); this.recordParse = recordParse;
         */
        writePartition.setPartitioner(partitioner);
        writePartition.setRecordParse(this.recordParse);
        writePartition.setStaff(this);
        writePartition.setWorkerAgent(aStaffAgent);
        writePartition.setSsrc(ssrc);
        writePartition.setSssc(lsssc);

        writePartition.setTotalCatchSize(job.getConf().getInt(Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE,
                Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE_DEFAULT));

        int threadNum = job.getConf().getInt(Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER,
                Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER_DEFAULT);
        if (threadNum > this.staffNum) {
            threadNum = this.staffNum - 1;
        }
        writePartition.setSendThreadNum(threadNum);
        writePartition.write(input);

        ssrc.setDirFlag(new String[] { "1" });
        ssrc.setCheckNum(this.staffNum * multiple);
        lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH);

        LOG.info("The number of verteices from other staff that" + " cound not be parsed:" + this.lost);
        LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:"
                + graphData.sizeForAll());

        graphData.finishAdd();

        ssrc.setCheckNum(this.staffNum * (multiple + 1));
        ssrc.setDirFlag(new String[] { "2" });
        lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH);
    }

    long end = System.currentTimeMillis();
    LOG.info("in BCBSP with PartitionType is:HASH" + " end time:" + end);
    LOG.info(
            "in BCBSP with PartitionType is:HASH" + " using time:" + (float) (end - start) / 1000 + " seconds");

    return true;
}

From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java

License:Apache License

/**
 * Rebuid the partition and read data from checkpoint for intializing.
 * /*www  . j a  va2  s .  c  o  m*/
 * @param job
 * @param writePartition
 * @throws ClassNotFoundException
 * @throws IOException
 */
private void intializePartitionForRecovery(BSPJob job, WritePartition writePartition)
        throws ClassNotFoundException, IOException {
    this.currentSuperStepCounter = ssc.getNextSuperStepNum();
    LOG.info("Now, this super step count is " + this.currentSuperStepCounter);
    this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance(
            job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class),
            job.getConf());

    if (writePartition instanceof HashWithBalancerWritePartition) {
        this.partitioner.setNumPartition(this.staffNum * numCopy);
    } else {
        this.partitioner.setNumPartition(this.staffNum);
    }
    org.apache.hadoop.mapreduce.InputSplit split = null;
    if (rawSplitClass.equals("no")) {

    } else {

        DataInputBuffer splitBuffer = new DataInputBuffer();
        splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
        SerializationFactory factory = new SerializationFactory(job.getConf());
        Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory
                .getDeserializer(job.getConf().getClassByName(rawSplitClass));
        deserializer.open(splitBuffer);
        split = deserializer.deserialize(null);
    }
    this.partitioner.intialize(job, split);
    displayFirstRoute();
}

From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java

License:Apache License

/**
 * loadData: load data for the staff in SGA-Graph.
 * //from  www  .j  a v  a2 s.c  o  m
 * @param job
 *        BSP job configuration
 * @param workerAgent
 *        Protocol that staff child process uses to contact its parent process
 * @return boolean
 * @throws ClassNotFoundException
 * @throws IOException
 *         e
 * @throws InterruptedException
 *         e
 */
@SuppressWarnings("unchecked")
public boolean loadDataSGAGraph(BSPJob job, WorkerAgentProtocol workerAgent,
        WorkerAgentForStaffInterface aStaffAgent)
        throws ClassNotFoundException, IOException, InterruptedException {
    // rebuild the input split
    RecordReader input = null;
    org.apache.hadoop.mapreduce.InputSplit split = null;
    if (rawSplitClass.equals("no")) {
        input = null;
    } else {
        DataInputBuffer splitBuffer = new DataInputBuffer();
        splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
        SerializationFactory factory = new SerializationFactory(job.getConf());
        Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory
                .getDeserializer(job.getConf().getClassByName(rawSplitClass));
        deserializer.open(splitBuffer);
        split = deserializer.deserialize(null);

        // rebuild the InputFormat class according to the user configuration
        InputFormat inputformat = (InputFormat) ReflectionUtils.newInstance(
                job.getConf().getClass(Constants.USER_BC_BSP_JOB_INPUT_FORMAT_CLASS, InputFormat.class),
                job.getConf());
        inputformat.initialize(job.getConf());
        input = inputformat.createRecordReader(split, job);
        input.initialize(split, job.getConf());
    }
    SuperStepReportContainer ssrc = new SuperStepReportContainer();
    ssrc.setPartitionId(this.getPartition());
    this.numCopy = (int) (1 / (job.getConf().getFloat(Constants.USER_BC_BSP_JOB_BALANCE_FACTOR,
            Constants.USER_BC_BSP_JOB_BALANCE_FACTOR_DEFAULT)));
    ssrc.setNumCopy(numCopy);
    ssrc.setCheckNum(this.staffNum);
    StaffSSControllerInterface lsssc = new StaffSSController(this.getJobId(), this.getSid(), workerAgent);
    long start = System.currentTimeMillis();
    LOG.info("in BCBSP with PartitionType is: Hash" + " start time:" + start);
    if (this.staffNum == 1 || job.getConf().getBoolean(Constants.USER_BC_BSP_JOB_ISDIVIDE, false)) {

        this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance(
                job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class),
                job.getConf());
        this.partitioner.setNumPartition(this.staffNum);
        this.partitioner.intialize(job, split);

        WritePartition writePartition = new NotDivideWritePartition();
        /*
         * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance(
         * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS,
         * RecordParseDefault.class), job .getConf()); recordParse.init(job);
         * //add by chen for null bug this.recordParse = recordParse;
         * //this.recordParse.init(job);
         */
        writePartition.setRecordParse(this.recordParse);
        writePartition.setStaff(this);
        writePartition.write(input);

        ssrc.setDirFlag(new String[] { "1" });
        ssrc.setCheckNum(this.staffNum);
        lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH);

        LOG.info("The number of verteices from other staff" + " that cound not be parsed:" + this.lost);
        LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:"
                + graphData.sizeForAll());

        graphData.finishAdd();
        ssrc.setCheckNum(this.staffNum * 2);
        ssrc.setDirFlag(new String[] { "2" });
        lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH);

    } else {
        this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance(
                job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class),
                job.getConf());

        WritePartition writePartition = (WritePartition) ReflectionUtils.newInstance(job.getConf().getClass(
                Constants.USER_BC_BSP_JOB_WRITEPARTITION_CLASS, HashWritePartition.class), job.getConf());
        int multiple = 1;
        if (writePartition instanceof HashWithBalancerWritePartition) {
            this.partitioner.setNumPartition(this.staffNum * numCopy);
            multiple = 2;
        } else {

            this.partitioner.setNumPartition(this.staffNum);
            multiple = 1;
            if (writePartition instanceof RangeWritePartition) {
                multiple = 2;
            }
        }
        this.partitioner.intialize(job, split);
        /*
         * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance(
         * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS,
         * RecordParseDefault.class), job .getConf()); recordParse.init(job); //
         * this.recordParse = (RecordParse) ReflectionUtils.newInstance( //
         * job.getConf().getClass( // Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS,
         * // RecordParseDefault.class), job.getConf()); //
         * this.recordParse.init(job); this.recordParse = recordParse;
         */
        writePartition.setPartitioner(partitioner);
        writePartition.setRecordParse(this.recordParse);
        writePartition.setStaff(this);
        writePartition.setWorkerAgent(aStaffAgent);
        writePartition.setSsrc(ssrc);
        writePartition.setSssc(lsssc);

        writePartition.setTotalCatchSize(job.getConf().getInt(Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE,
                Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE_DEFAULT));

        int threadNum = job.getConf().getInt(Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER,
                Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER_DEFAULT);
        if (threadNum > this.staffNum) {
            threadNum = this.staffNum - 1;
        }
        writePartition.setSendThreadNum(threadNum);
        writePartition.write(input);

        ssrc.setDirFlag(new String[] { "1" });
        ssrc.setCheckNum(this.staffNum * multiple);
        lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH);

        LOG.info("The number of verteices from other staff that" + " cound not be parsed:" + this.lost);
        LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:"
                + graphData.sizeForAll());

        graphData.finishAdd();

        ssrc.setCheckNum(this.staffNum * (multiple + 1));
        ssrc.setDirFlag(new String[] { "2" });
        lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH);
        // for input graph evaluate.
        // this.evaluateflag = lsssc.graphEvaluateBarrier(0, this.staffNum,
        // this.graphData.getGlobalFactor());
    }

    long end = System.currentTimeMillis();
    LOG.info("in BCBSP with PartitionType is:HASH" + " end time:" + end);
    LOG.info(
            "in BCBSP with PartitionType is:HASH" + " using time:" + (float) (end - start) / 1000 + " seconds");

    return true;
}

From source file:com.cloudera.crunch.impl.mr.run.CrunchInputSplit.java

License:Apache License

public void readFields(DataInput in) throws IOException {
    nodeIndex = in.readInt();// ww w .j ava 2 s. c o m
    inputFormatClass = (Class<? extends InputFormat>) readClass(in);
    Class<? extends InputSplit> inputSplitClass = (Class<? extends InputSplit>) readClass(in);
    inputSplit = (InputSplit) ReflectionUtils.newInstance(inputSplitClass, conf);
    SerializationFactory factory = new SerializationFactory(conf);
    Deserializer deserializer = factory.getDeserializer(inputSplitClass);
    deserializer.open((DataInputStream) in);
    inputSplit = (InputSplit) deserializer.deserialize(inputSplit);
}

From source file:com.datasalt.pangool.serialization.HadoopSerialization.java

License:Apache License

/**
 * Deseerializes into the given object using the Hadoop serialization system.
 * Object cannot be null.//w  ww.jav a  2 s.c om
 */
public <T> T deser(Object obj, InputStream in) throws IOException {
    Map<Class, Deserializer> deserializers = cachedDeserializers.get();
    Deserializer deSer = deserializers.get(obj.getClass());
    if (deSer == null) {
        deSer = serialization.getDeserializer(obj.getClass());
        deserializers.put(obj.getClass(), deSer);
    }
    deSer.open(in);
    obj = deSer.deserialize(obj);
    deSer.close();
    return (T) obj;
}

From source file:com.datasalt.pangool.serialization.HadoopSerialization.java

License:Apache License

/**
 * Return a new instance of the given class with the deserialized data from
 * the input stream./* w  w w  .j a  va 2 s  .  com*/
 */
public <T> T deser(Class clazz, InputStream in) throws IOException {
    Map<Class, Deserializer> deserializers = cachedDeserializers.get();
    Deserializer deSer = deserializers.get(clazz);
    if (deSer == null) {
        deSer = serialization.getDeserializer(clazz);
        deserializers.put(clazz, deSer);
    }

    deSer.open(in);
    Object obj = deSer.deserialize(null);
    deSer.close();
    return (T) obj;
}