List of usage examples for org.apache.hadoop.io.serializer SerializationFactory getDeserializer
public <T> Deserializer<T> getDeserializer(Class<T> c)
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit.java
License:Apache License
@SuppressWarnings("unchecked") @Override//w w w . j a v a2 s . c o m public void readFields(DataInput in) throws IOException { name = Text.readString(in); inputSplitClass = (Class<? extends InputSplit>) readClass(in); inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in); mapperClassName = Text.readString(in); inputConfigs = GSON.fromJson(Text.readString(in), STRING_STRING_MAP_TYPE); inputSplit = ReflectionUtils.newInstance(inputSplitClass, conf); SerializationFactory factory = new SerializationFactory(conf); Deserializer deserializer = factory.getDeserializer(inputSplitClass); deserializer.open((DataInputStream) in); inputSplit = (InputSplit) deserializer.deserialize(inputSplit); }
From source file:com.ambiata.ivory.operation.hadoop.TaggedInputSplit.java
License:Apache License
@SuppressWarnings("unchecked") public void readFields(DataInput in) throws IOException { inputSplitClass = (Class<? extends InputSplit>) readClass(in); inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in); mapperClass = (Class<? extends Mapper<?, ?, ?, ?>>) readClass(in); inputSplit = (InputSplit) ReflectionUtils.newInstance(inputSplitClass, conf); SerializationFactory factory = new SerializationFactory(conf); Deserializer deserializer = factory.getDeserializer(inputSplitClass); deserializer.open((DataInputStream) in); inputSplit = (InputSplit) deserializer.deserialize(inputSplit); }
From source file:com.baynote.hadoop.TaggedInputSplit.java
License:Apache License
@SuppressWarnings("unchecked") public void readFields(DataInput in) throws IOException { inputSplitClass = (Class<? extends InputSplit>) readClass(in); inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in); mapperClass = (Class<? extends Mapper<?, ?, ?, ?>>) readClass(in); inputSplit = ReflectionUtils.newInstance(inputSplitClass, conf); SerializationFactory factory = new SerializationFactory(conf); Deserializer deserializer = factory.getDeserializer(inputSplitClass); deserializer.open((DataInputStream) in); inputSplit = (InputSplit) deserializer.deserialize(inputSplit); }
From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java
License:Apache License
/** * loadData: load data for the staff./*from w w w. ja v a 2 s .c o m*/ * * @param job * BSP job configuration * @param workerAgent * Protocol that staff child process uses to contact its parent process * @return boolean * @throws ClassNotFoundException * @throws IOException * e * @throws InterruptedException * e */ @SuppressWarnings("unchecked") public boolean loadData(BSPJob job, WorkerAgentProtocol workerAgent, WorkerAgentForStaffInterface aStaffAgent) throws ClassNotFoundException, IOException, InterruptedException { // rebuild the input split RecordReader input = null; org.apache.hadoop.mapreduce.InputSplit split = null; if (rawSplitClass.equals("no")) { input = null; } else { DataInputBuffer splitBuffer = new DataInputBuffer(); splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength()); SerializationFactory factory = new SerializationFactory(job.getConf()); Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(job.getConf().getClassByName(rawSplitClass)); deserializer.open(splitBuffer); split = deserializer.deserialize(null); // rebuild the InputFormat class according to the user configuration InputFormat inputformat = (InputFormat) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_INPUT_FORMAT_CLASS, InputFormat.class), job.getConf()); inputformat.initialize(job.getConf()); input = inputformat.createRecordReader(split, job); input.initialize(split, job.getConf()); } SuperStepReportContainer ssrc = new SuperStepReportContainer(); ssrc.setPartitionId(this.getPartition()); this.numCopy = (int) (1 / (job.getConf().getFloat(Constants.USER_BC_BSP_JOB_BALANCE_FACTOR, Constants.USER_BC_BSP_JOB_BALANCE_FACTOR_DEFAULT))); ssrc.setNumCopy(numCopy); ssrc.setCheckNum(this.staffNum); StaffSSControllerInterface lsssc = new StaffSSController(this.getJobId(), this.getSid(), workerAgent); long start = System.currentTimeMillis(); LOG.info("in BCBSP with PartitionType is: Hash" + " start time:" + start); if (this.staffNum == 1 || job.getConf().getBoolean(Constants.USER_BC_BSP_JOB_ISDIVIDE, false)) { this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); this.partitioner.setNumPartition(this.staffNum); this.partitioner.intialize(job, split); WritePartition writePartition = new NotDivideWritePartition(); /* * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance( * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * RecordParseDefault.class), job .getConf()); recordParse.init(job); * //add by chen for null bug this.recordParse = recordParse; * //this.recordParse.init(job); */ writePartition.setRecordParse(this.recordParse); writePartition.setStaff(this); writePartition.write(input); ssrc.setDirFlag(new String[] { "1" }); ssrc.setCheckNum(this.staffNum); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); LOG.info("The number of verteices from other staff" + " that cound not be parsed:" + this.lost); LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:" + graphData.sizeForAll()); graphData.finishAdd(); ssrc.setCheckNum(this.staffNum * 2); ssrc.setDirFlag(new String[] { "2" }); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); } else { this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); WritePartition writePartition = (WritePartition) ReflectionUtils.newInstance(job.getConf().getClass( Constants.USER_BC_BSP_JOB_WRITEPARTITION_CLASS, HashWritePartition.class), job.getConf()); int multiple = 1; if (writePartition instanceof HashWithBalancerWritePartition) { this.partitioner.setNumPartition(this.staffNum * numCopy); multiple = 2; } else { this.partitioner.setNumPartition(this.staffNum); multiple = 1; if (writePartition instanceof RangeWritePartition) { multiple = 2; } } this.partitioner.intialize(job, split); /* * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance( * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * RecordParseDefault.class), job .getConf()); recordParse.init(job); // * this.recordParse = (RecordParse) ReflectionUtils.newInstance( // * job.getConf().getClass( // Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * // RecordParseDefault.class), job.getConf()); // * this.recordParse.init(job); this.recordParse = recordParse; */ writePartition.setPartitioner(partitioner); writePartition.setRecordParse(this.recordParse); writePartition.setStaff(this); writePartition.setWorkerAgent(aStaffAgent); writePartition.setSsrc(ssrc); writePartition.setSssc(lsssc); writePartition.setTotalCatchSize(job.getConf().getInt(Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE, Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE_DEFAULT)); int threadNum = job.getConf().getInt(Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER, Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER_DEFAULT); if (threadNum > this.staffNum) { threadNum = this.staffNum - 1; } writePartition.setSendThreadNum(threadNum); writePartition.write(input); ssrc.setDirFlag(new String[] { "1" }); ssrc.setCheckNum(this.staffNum * multiple); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); LOG.info("The number of verteices from other staff that" + " cound not be parsed:" + this.lost); LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:" + graphData.sizeForAll()); graphData.finishAdd(); ssrc.setCheckNum(this.staffNum * (multiple + 1)); ssrc.setDirFlag(new String[] { "2" }); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); } long end = System.currentTimeMillis(); LOG.info("in BCBSP with PartitionType is:HASH" + " end time:" + end); LOG.info( "in BCBSP with PartitionType is:HASH" + " using time:" + (float) (end - start) / 1000 + " seconds"); return true; }
From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java
License:Apache License
/** * Rebuid the partition and read data from checkpoint for intializing. * //from w w w . j av a 2s.com * @param job * @param writePartition * @throws ClassNotFoundException * @throws IOException */ private void intializePartitionForRecovery(BSPJob job, WritePartition writePartition) throws ClassNotFoundException, IOException { this.currentSuperStepCounter = ssc.getNextSuperStepNum(); LOG.info("Now, this super step count is " + this.currentSuperStepCounter); this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); if (writePartition instanceof HashWithBalancerWritePartition) { this.partitioner.setNumPartition(this.staffNum * numCopy); } else { this.partitioner.setNumPartition(this.staffNum); } org.apache.hadoop.mapreduce.InputSplit split = null; if (rawSplitClass.equals("no")) { } else { DataInputBuffer splitBuffer = new DataInputBuffer(); splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength()); SerializationFactory factory = new SerializationFactory(job.getConf()); Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(job.getConf().getClassByName(rawSplitClass)); deserializer.open(splitBuffer); split = deserializer.deserialize(null); } this.partitioner.intialize(job, split); displayFirstRoute(); }
From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java
License:Apache License
/** * loadData: load data for the staff in SGA-Graph. * /* w w w . j a v a 2s . c om*/ * @param job * BSP job configuration * @param workerAgent * Protocol that staff child process uses to contact its parent process * @return boolean * @throws ClassNotFoundException * @throws IOException * e * @throws InterruptedException * e */ @SuppressWarnings("unchecked") public boolean loadDataSGAGraph(BSPJob job, WorkerAgentProtocol workerAgent, WorkerAgentForStaffInterface aStaffAgent) throws ClassNotFoundException, IOException, InterruptedException { // rebuild the input split RecordReader input = null; org.apache.hadoop.mapreduce.InputSplit split = null; if (rawSplitClass.equals("no")) { input = null; } else { DataInputBuffer splitBuffer = new DataInputBuffer(); splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength()); SerializationFactory factory = new SerializationFactory(job.getConf()); Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(job.getConf().getClassByName(rawSplitClass)); deserializer.open(splitBuffer); split = deserializer.deserialize(null); // rebuild the InputFormat class according to the user configuration InputFormat inputformat = (InputFormat) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_INPUT_FORMAT_CLASS, InputFormat.class), job.getConf()); inputformat.initialize(job.getConf()); input = inputformat.createRecordReader(split, job); input.initialize(split, job.getConf()); } SuperStepReportContainer ssrc = new SuperStepReportContainer(); ssrc.setPartitionId(this.getPartition()); this.numCopy = (int) (1 / (job.getConf().getFloat(Constants.USER_BC_BSP_JOB_BALANCE_FACTOR, Constants.USER_BC_BSP_JOB_BALANCE_FACTOR_DEFAULT))); ssrc.setNumCopy(numCopy); ssrc.setCheckNum(this.staffNum); StaffSSControllerInterface lsssc = new StaffSSController(this.getJobId(), this.getSid(), workerAgent); long start = System.currentTimeMillis(); LOG.info("in BCBSP with PartitionType is: Hash" + " start time:" + start); if (this.staffNum == 1 || job.getConf().getBoolean(Constants.USER_BC_BSP_JOB_ISDIVIDE, false)) { this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); this.partitioner.setNumPartition(this.staffNum); this.partitioner.intialize(job, split); WritePartition writePartition = new NotDivideWritePartition(); /* * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance( * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * RecordParseDefault.class), job .getConf()); recordParse.init(job); * //add by chen for null bug this.recordParse = recordParse; * //this.recordParse.init(job); */ writePartition.setRecordParse(this.recordParse); writePartition.setStaff(this); writePartition.write(input); ssrc.setDirFlag(new String[] { "1" }); ssrc.setCheckNum(this.staffNum); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); LOG.info("The number of verteices from other staff" + " that cound not be parsed:" + this.lost); LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:" + graphData.sizeForAll()); graphData.finishAdd(); ssrc.setCheckNum(this.staffNum * 2); ssrc.setDirFlag(new String[] { "2" }); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); } else { this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); WritePartition writePartition = (WritePartition) ReflectionUtils.newInstance(job.getConf().getClass( Constants.USER_BC_BSP_JOB_WRITEPARTITION_CLASS, HashWritePartition.class), job.getConf()); int multiple = 1; if (writePartition instanceof HashWithBalancerWritePartition) { this.partitioner.setNumPartition(this.staffNum * numCopy); multiple = 2; } else { this.partitioner.setNumPartition(this.staffNum); multiple = 1; if (writePartition instanceof RangeWritePartition) { multiple = 2; } } this.partitioner.intialize(job, split); /* * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance( * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * RecordParseDefault.class), job .getConf()); recordParse.init(job); // * this.recordParse = (RecordParse) ReflectionUtils.newInstance( // * job.getConf().getClass( // Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * // RecordParseDefault.class), job.getConf()); // * this.recordParse.init(job); this.recordParse = recordParse; */ writePartition.setPartitioner(partitioner); writePartition.setRecordParse(this.recordParse); writePartition.setStaff(this); writePartition.setWorkerAgent(aStaffAgent); writePartition.setSsrc(ssrc); writePartition.setSssc(lsssc); writePartition.setTotalCatchSize(job.getConf().getInt(Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE, Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE_DEFAULT)); int threadNum = job.getConf().getInt(Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER, Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER_DEFAULT); if (threadNum > this.staffNum) { threadNum = this.staffNum - 1; } writePartition.setSendThreadNum(threadNum); writePartition.write(input); ssrc.setDirFlag(new String[] { "1" }); ssrc.setCheckNum(this.staffNum * multiple); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); LOG.info("The number of verteices from other staff that" + " cound not be parsed:" + this.lost); LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:" + graphData.sizeForAll()); graphData.finishAdd(); ssrc.setCheckNum(this.staffNum * (multiple + 1)); ssrc.setDirFlag(new String[] { "2" }); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); // for input graph evaluate. // this.evaluateflag = lsssc.graphEvaluateBarrier(0, this.staffNum, // this.graphData.getGlobalFactor()); } long end = System.currentTimeMillis(); LOG.info("in BCBSP with PartitionType is:HASH" + " end time:" + end); LOG.info( "in BCBSP with PartitionType is:HASH" + " using time:" + (float) (end - start) / 1000 + " seconds"); return true; }
From source file:com.cloudera.crunch.impl.mr.run.CrunchInputSplit.java
License:Apache License
public void readFields(DataInput in) throws IOException { nodeIndex = in.readInt();//from w w w. j av a 2s . c o m inputFormatClass = (Class<? extends InputFormat>) readClass(in); Class<? extends InputSplit> inputSplitClass = (Class<? extends InputSplit>) readClass(in); inputSplit = (InputSplit) ReflectionUtils.newInstance(inputSplitClass, conf); SerializationFactory factory = new SerializationFactory(conf); Deserializer deserializer = factory.getDeserializer(inputSplitClass); deserializer.open((DataInputStream) in); inputSplit = (InputSplit) deserializer.deserialize(inputSplit); }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.TaggedInputSplit.java
License:Apache License
@SuppressWarnings("unchecked") public void readFields(DataInput in) throws IOException { inputSplitClass = (Class<? extends InputSplit>) readClass(in); inputFormatFile = Text.readString(in); inputProcessorFile = Text.readString(in); inputSplit = (InputSplit) ReflectionUtils.newInstance(inputSplitClass, conf); SerializationFactory factory = new SerializationFactory(conf); Deserializer deserializer = factory.getDeserializer(inputSplitClass); deserializer.open((DataInputStream) in); inputSplit = (InputSplit) deserializer.deserialize(inputSplit); }
From source file:com.datatorrent.demos.mroperator.MapOperator.java
License:Open Source License
@Override public void setup(OperatorContext context) { if (context != null) { operatorId = context.getId();/* w w w. j av a 2 s. c om*/ } reporter = new ReporterImpl(ReporterType.Mapper, new Counters()); outputCollector = new OutputCollectorImpl<K2, V2>(); Configuration conf = new Configuration(); try { inputFormat = inputFormatClass.newInstance(); SerializationFactory serializationFactory = new SerializationFactory(conf); Deserializer keyDesiralizer = serializationFactory.getDeserializer(inputSplitClass); keyDesiralizer.open(new ByteArrayInputStream(outstream.toByteArray())); inputSplit = (InputSplit) keyDesiralizer.deserialize(null); ((ReporterImpl) reporter).setInputSplit(inputSplit); reader = inputFormat.getRecordReader(inputSplit, new JobConf(conf), reporter); } catch (Exception e) { logger.info("failed to initialize inputformat obj {}", inputFormat); throw new RuntimeException(e); } InputStream stream = null; if (configFile != null && configFile.length() > 0) { stream = ClassLoader.getSystemResourceAsStream("/" + configFile); if (stream == null) { stream = ClassLoader.getSystemResourceAsStream(configFile); } } if (stream != null) { conf.addResource(stream); } jobConf = new JobConf(conf); if (mapClass != null) { try { mapObject = mapClass.newInstance(); } catch (Exception e) { logger.info("can't instantiate object {}", e.getMessage()); } mapObject.configure(jobConf); } if (combineClass != null) { try { combineObject = combineClass.newInstance(); } catch (Exception e) { logger.info("can't instantiate object {}", e.getMessage()); } combineObject.configure(jobConf); } }
From source file:com.google.appengine.tools.mapreduce.SerializationUtil.java
License:Apache License
/** * Deserialize an object from a byte array. This uses {@code conf}'s * serialization preferences to support arbitrary serialization mechanisms * using {@link SerializationFactory}./*from w ww. j a va 2 s.c o m*/ * * @param conf the configuration to use for serialization preferences * @param expectedClass a type token to set the return type * @param className the name of the class to deserialize to * @param toDeserialize the serialized object as a byte array * @param initialState an object with initial state. Some deserializers may * throw this away. You can pass {@code null} to signify that there is no * initial state. */ @SuppressWarnings("unchecked") public static <T> T deserializeFromByteArray(Configuration conf, Class<T> expectedClass, String className, byte[] toDeserialize, T initialState) { log.fine("Trying to deserialize: " + className); SerializationFactory serializationFactory = new SerializationFactory(conf); try { Class<?> deserializationClass = conf.getClassByName(className); if (!expectedClass.isAssignableFrom(deserializationClass)) { throw new ClassCastException("Attempted to deserialize a " + deserializationClass.getCanonicalName() + " but expected a " + expectedClass.getCanonicalName()); } Deserializer<T> deserializer = serializationFactory.getDeserializer((Class<T>) deserializationClass); ByteArrayInputStream inputStream = new ByteArrayInputStream(toDeserialize); deserializer.open(inputStream); return deserializer.deserialize(initialState); } catch (ClassNotFoundException e) { // If we're deserializing, then we should have already seen this class // and this is strictly a programming error. Hence the RuntimeException. throw new RuntimeException("Couldn't get class for deserializing " + className, e); } catch (UnsupportedEncodingException e) { throw new RuntimeException("JDK doesn't understand UTF8", e); } catch (IOException e) { throw new RuntimeException("Got an IOException from a ByteArrayInputStream. This should never happen.", e); } }