List of usage examples for org.apache.hadoop.io.serializer Deserializer open
void open(InputStream in) throws IOException;
Prepare the deserializer for reading.
From source file:cascading.tuple.hadoop.SerializationElementReader.java
License:Open Source License
private Deserializer getDeserializerFor(DataInputStream inputStream, String className) throws IOException { Deserializer deserializer = deserializers.get(className); if (deserializer == null) { deserializer = tupleSerialization.getNewDeserializer(className); deserializer.open(inputStream); deserializers.put(className, deserializer); }//from w ww. j a v a 2 s . c o m return deserializer; }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit.java
License:Apache License
@SuppressWarnings("unchecked") @Override//w w w. jav a 2 s . c o m public void readFields(DataInput in) throws IOException { name = Text.readString(in); inputSplitClass = (Class<? extends InputSplit>) readClass(in); inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in); mapperClassName = Text.readString(in); inputConfigs = GSON.fromJson(Text.readString(in), STRING_STRING_MAP_TYPE); inputSplit = ReflectionUtils.newInstance(inputSplitClass, conf); SerializationFactory factory = new SerializationFactory(conf); Deserializer deserializer = factory.getDeserializer(inputSplitClass); deserializer.open((DataInputStream) in); inputSplit = (InputSplit) deserializer.deserialize(inputSplit); }
From source file:com.ambiata.ivory.operation.hadoop.TaggedInputSplit.java
License:Apache License
@SuppressWarnings("unchecked") public void readFields(DataInput in) throws IOException { inputSplitClass = (Class<? extends InputSplit>) readClass(in); inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in); mapperClass = (Class<? extends Mapper<?, ?, ?, ?>>) readClass(in); inputSplit = (InputSplit) ReflectionUtils.newInstance(inputSplitClass, conf); SerializationFactory factory = new SerializationFactory(conf); Deserializer deserializer = factory.getDeserializer(inputSplitClass); deserializer.open((DataInputStream) in); inputSplit = (InputSplit) deserializer.deserialize(inputSplit); }
From source file:com.baynote.hadoop.TaggedInputSplit.java
License:Apache License
@SuppressWarnings("unchecked") public void readFields(DataInput in) throws IOException { inputSplitClass = (Class<? extends InputSplit>) readClass(in); inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in); mapperClass = (Class<? extends Mapper<?, ?, ?, ?>>) readClass(in); inputSplit = ReflectionUtils.newInstance(inputSplitClass, conf); SerializationFactory factory = new SerializationFactory(conf); Deserializer deserializer = factory.getDeserializer(inputSplitClass); deserializer.open((DataInputStream) in); inputSplit = (InputSplit) deserializer.deserialize(inputSplit); }
From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java
License:Apache License
/** * loadData: load data for the staff.//from w ww. j a va 2 s. c o m * * @param job * BSP job configuration * @param workerAgent * Protocol that staff child process uses to contact its parent process * @return boolean * @throws ClassNotFoundException * @throws IOException * e * @throws InterruptedException * e */ @SuppressWarnings("unchecked") public boolean loadData(BSPJob job, WorkerAgentProtocol workerAgent, WorkerAgentForStaffInterface aStaffAgent) throws ClassNotFoundException, IOException, InterruptedException { // rebuild the input split RecordReader input = null; org.apache.hadoop.mapreduce.InputSplit split = null; if (rawSplitClass.equals("no")) { input = null; } else { DataInputBuffer splitBuffer = new DataInputBuffer(); splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength()); SerializationFactory factory = new SerializationFactory(job.getConf()); Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(job.getConf().getClassByName(rawSplitClass)); deserializer.open(splitBuffer); split = deserializer.deserialize(null); // rebuild the InputFormat class according to the user configuration InputFormat inputformat = (InputFormat) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_INPUT_FORMAT_CLASS, InputFormat.class), job.getConf()); inputformat.initialize(job.getConf()); input = inputformat.createRecordReader(split, job); input.initialize(split, job.getConf()); } SuperStepReportContainer ssrc = new SuperStepReportContainer(); ssrc.setPartitionId(this.getPartition()); this.numCopy = (int) (1 / (job.getConf().getFloat(Constants.USER_BC_BSP_JOB_BALANCE_FACTOR, Constants.USER_BC_BSP_JOB_BALANCE_FACTOR_DEFAULT))); ssrc.setNumCopy(numCopy); ssrc.setCheckNum(this.staffNum); StaffSSControllerInterface lsssc = new StaffSSController(this.getJobId(), this.getSid(), workerAgent); long start = System.currentTimeMillis(); LOG.info("in BCBSP with PartitionType is: Hash" + " start time:" + start); if (this.staffNum == 1 || job.getConf().getBoolean(Constants.USER_BC_BSP_JOB_ISDIVIDE, false)) { this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); this.partitioner.setNumPartition(this.staffNum); this.partitioner.intialize(job, split); WritePartition writePartition = new NotDivideWritePartition(); /* * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance( * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * RecordParseDefault.class), job .getConf()); recordParse.init(job); * //add by chen for null bug this.recordParse = recordParse; * //this.recordParse.init(job); */ writePartition.setRecordParse(this.recordParse); writePartition.setStaff(this); writePartition.write(input); ssrc.setDirFlag(new String[] { "1" }); ssrc.setCheckNum(this.staffNum); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); LOG.info("The number of verteices from other staff" + " that cound not be parsed:" + this.lost); LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:" + graphData.sizeForAll()); graphData.finishAdd(); ssrc.setCheckNum(this.staffNum * 2); ssrc.setDirFlag(new String[] { "2" }); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); } else { this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); WritePartition writePartition = (WritePartition) ReflectionUtils.newInstance(job.getConf().getClass( Constants.USER_BC_BSP_JOB_WRITEPARTITION_CLASS, HashWritePartition.class), job.getConf()); int multiple = 1; if (writePartition instanceof HashWithBalancerWritePartition) { this.partitioner.setNumPartition(this.staffNum * numCopy); multiple = 2; } else { this.partitioner.setNumPartition(this.staffNum); multiple = 1; if (writePartition instanceof RangeWritePartition) { multiple = 2; } } this.partitioner.intialize(job, split); /* * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance( * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * RecordParseDefault.class), job .getConf()); recordParse.init(job); // * this.recordParse = (RecordParse) ReflectionUtils.newInstance( // * job.getConf().getClass( // Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * // RecordParseDefault.class), job.getConf()); // * this.recordParse.init(job); this.recordParse = recordParse; */ writePartition.setPartitioner(partitioner); writePartition.setRecordParse(this.recordParse); writePartition.setStaff(this); writePartition.setWorkerAgent(aStaffAgent); writePartition.setSsrc(ssrc); writePartition.setSssc(lsssc); writePartition.setTotalCatchSize(job.getConf().getInt(Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE, Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE_DEFAULT)); int threadNum = job.getConf().getInt(Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER, Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER_DEFAULT); if (threadNum > this.staffNum) { threadNum = this.staffNum - 1; } writePartition.setSendThreadNum(threadNum); writePartition.write(input); ssrc.setDirFlag(new String[] { "1" }); ssrc.setCheckNum(this.staffNum * multiple); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); LOG.info("The number of verteices from other staff that" + " cound not be parsed:" + this.lost); LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:" + graphData.sizeForAll()); graphData.finishAdd(); ssrc.setCheckNum(this.staffNum * (multiple + 1)); ssrc.setDirFlag(new String[] { "2" }); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); } long end = System.currentTimeMillis(); LOG.info("in BCBSP with PartitionType is:HASH" + " end time:" + end); LOG.info( "in BCBSP with PartitionType is:HASH" + " using time:" + (float) (end - start) / 1000 + " seconds"); return true; }
From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java
License:Apache License
/** * Rebuid the partition and read data from checkpoint for intializing. * /*www . j a va2 s . c o m*/ * @param job * @param writePartition * @throws ClassNotFoundException * @throws IOException */ private void intializePartitionForRecovery(BSPJob job, WritePartition writePartition) throws ClassNotFoundException, IOException { this.currentSuperStepCounter = ssc.getNextSuperStepNum(); LOG.info("Now, this super step count is " + this.currentSuperStepCounter); this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); if (writePartition instanceof HashWithBalancerWritePartition) { this.partitioner.setNumPartition(this.staffNum * numCopy); } else { this.partitioner.setNumPartition(this.staffNum); } org.apache.hadoop.mapreduce.InputSplit split = null; if (rawSplitClass.equals("no")) { } else { DataInputBuffer splitBuffer = new DataInputBuffer(); splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength()); SerializationFactory factory = new SerializationFactory(job.getConf()); Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(job.getConf().getClassByName(rawSplitClass)); deserializer.open(splitBuffer); split = deserializer.deserialize(null); } this.partitioner.intialize(job, split); displayFirstRoute(); }
From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java
License:Apache License
/** * loadData: load data for the staff in SGA-Graph. * //from www .j a v a2 s.c o m * @param job * BSP job configuration * @param workerAgent * Protocol that staff child process uses to contact its parent process * @return boolean * @throws ClassNotFoundException * @throws IOException * e * @throws InterruptedException * e */ @SuppressWarnings("unchecked") public boolean loadDataSGAGraph(BSPJob job, WorkerAgentProtocol workerAgent, WorkerAgentForStaffInterface aStaffAgent) throws ClassNotFoundException, IOException, InterruptedException { // rebuild the input split RecordReader input = null; org.apache.hadoop.mapreduce.InputSplit split = null; if (rawSplitClass.equals("no")) { input = null; } else { DataInputBuffer splitBuffer = new DataInputBuffer(); splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength()); SerializationFactory factory = new SerializationFactory(job.getConf()); Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(job.getConf().getClassByName(rawSplitClass)); deserializer.open(splitBuffer); split = deserializer.deserialize(null); // rebuild the InputFormat class according to the user configuration InputFormat inputformat = (InputFormat) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_INPUT_FORMAT_CLASS, InputFormat.class), job.getConf()); inputformat.initialize(job.getConf()); input = inputformat.createRecordReader(split, job); input.initialize(split, job.getConf()); } SuperStepReportContainer ssrc = new SuperStepReportContainer(); ssrc.setPartitionId(this.getPartition()); this.numCopy = (int) (1 / (job.getConf().getFloat(Constants.USER_BC_BSP_JOB_BALANCE_FACTOR, Constants.USER_BC_BSP_JOB_BALANCE_FACTOR_DEFAULT))); ssrc.setNumCopy(numCopy); ssrc.setCheckNum(this.staffNum); StaffSSControllerInterface lsssc = new StaffSSController(this.getJobId(), this.getSid(), workerAgent); long start = System.currentTimeMillis(); LOG.info("in BCBSP with PartitionType is: Hash" + " start time:" + start); if (this.staffNum == 1 || job.getConf().getBoolean(Constants.USER_BC_BSP_JOB_ISDIVIDE, false)) { this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); this.partitioner.setNumPartition(this.staffNum); this.partitioner.intialize(job, split); WritePartition writePartition = new NotDivideWritePartition(); /* * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance( * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * RecordParseDefault.class), job .getConf()); recordParse.init(job); * //add by chen for null bug this.recordParse = recordParse; * //this.recordParse.init(job); */ writePartition.setRecordParse(this.recordParse); writePartition.setStaff(this); writePartition.write(input); ssrc.setDirFlag(new String[] { "1" }); ssrc.setCheckNum(this.staffNum); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); LOG.info("The number of verteices from other staff" + " that cound not be parsed:" + this.lost); LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:" + graphData.sizeForAll()); graphData.finishAdd(); ssrc.setCheckNum(this.staffNum * 2); ssrc.setDirFlag(new String[] { "2" }); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); } else { this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); WritePartition writePartition = (WritePartition) ReflectionUtils.newInstance(job.getConf().getClass( Constants.USER_BC_BSP_JOB_WRITEPARTITION_CLASS, HashWritePartition.class), job.getConf()); int multiple = 1; if (writePartition instanceof HashWithBalancerWritePartition) { this.partitioner.setNumPartition(this.staffNum * numCopy); multiple = 2; } else { this.partitioner.setNumPartition(this.staffNum); multiple = 1; if (writePartition instanceof RangeWritePartition) { multiple = 2; } } this.partitioner.intialize(job, split); /* * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance( * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * RecordParseDefault.class), job .getConf()); recordParse.init(job); // * this.recordParse = (RecordParse) ReflectionUtils.newInstance( // * job.getConf().getClass( // Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * // RecordParseDefault.class), job.getConf()); // * this.recordParse.init(job); this.recordParse = recordParse; */ writePartition.setPartitioner(partitioner); writePartition.setRecordParse(this.recordParse); writePartition.setStaff(this); writePartition.setWorkerAgent(aStaffAgent); writePartition.setSsrc(ssrc); writePartition.setSssc(lsssc); writePartition.setTotalCatchSize(job.getConf().getInt(Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE, Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE_DEFAULT)); int threadNum = job.getConf().getInt(Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER, Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER_DEFAULT); if (threadNum > this.staffNum) { threadNum = this.staffNum - 1; } writePartition.setSendThreadNum(threadNum); writePartition.write(input); ssrc.setDirFlag(new String[] { "1" }); ssrc.setCheckNum(this.staffNum * multiple); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); LOG.info("The number of verteices from other staff that" + " cound not be parsed:" + this.lost); LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:" + graphData.sizeForAll()); graphData.finishAdd(); ssrc.setCheckNum(this.staffNum * (multiple + 1)); ssrc.setDirFlag(new String[] { "2" }); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); // for input graph evaluate. // this.evaluateflag = lsssc.graphEvaluateBarrier(0, this.staffNum, // this.graphData.getGlobalFactor()); } long end = System.currentTimeMillis(); LOG.info("in BCBSP with PartitionType is:HASH" + " end time:" + end); LOG.info( "in BCBSP with PartitionType is:HASH" + " using time:" + (float) (end - start) / 1000 + " seconds"); return true; }
From source file:com.cloudera.crunch.impl.mr.run.CrunchInputSplit.java
License:Apache License
public void readFields(DataInput in) throws IOException { nodeIndex = in.readInt();// ww w .j ava 2 s. c o m inputFormatClass = (Class<? extends InputFormat>) readClass(in); Class<? extends InputSplit> inputSplitClass = (Class<? extends InputSplit>) readClass(in); inputSplit = (InputSplit) ReflectionUtils.newInstance(inputSplitClass, conf); SerializationFactory factory = new SerializationFactory(conf); Deserializer deserializer = factory.getDeserializer(inputSplitClass); deserializer.open((DataInputStream) in); inputSplit = (InputSplit) deserializer.deserialize(inputSplit); }
From source file:com.datasalt.pangool.serialization.HadoopSerialization.java
License:Apache License
/** * Deseerializes into the given object using the Hadoop serialization system. * Object cannot be null.//w ww.jav a 2 s.c om */ public <T> T deser(Object obj, InputStream in) throws IOException { Map<Class, Deserializer> deserializers = cachedDeserializers.get(); Deserializer deSer = deserializers.get(obj.getClass()); if (deSer == null) { deSer = serialization.getDeserializer(obj.getClass()); deserializers.put(obj.getClass(), deSer); } deSer.open(in); obj = deSer.deserialize(obj); deSer.close(); return (T) obj; }
From source file:com.datasalt.pangool.serialization.HadoopSerialization.java
License:Apache License
/** * Return a new instance of the given class with the deserialized data from * the input stream./* w w w .j a va 2 s . com*/ */ public <T> T deser(Class clazz, InputStream in) throws IOException { Map<Class, Deserializer> deserializers = cachedDeserializers.get(); Deserializer deSer = deserializers.get(clazz); if (deSer == null) { deSer = serialization.getDeserializer(clazz); deserializers.put(clazz, deSer); } deSer.open(in); Object obj = deSer.deserialize(null); deSer.close(); return (T) obj; }