List of usage examples for org.apache.hadoop.io.serializer SerializationFactory SerializationFactory
public SerializationFactory(Configuration conf)
Serializations are found by reading the io.serializations property from conf, which is a comma-delimited list of classnames.
From source file:cascading.tuple.hadoop.TupleSerialization.java
License:Open Source License
SerializationFactory getSerializationFactory() {
if (serializationFactory == null)
serializationFactory = new SerializationFactory(getConf());
return serializationFactory;
}
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit.java
License:Apache License
@SuppressWarnings("unchecked") @Override/*from w w w . j a v a2 s. co m*/ public void readFields(DataInput in) throws IOException { name = Text.readString(in); inputSplitClass = (Class<? extends InputSplit>) readClass(in); inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in); mapperClassName = Text.readString(in); inputConfigs = GSON.fromJson(Text.readString(in), STRING_STRING_MAP_TYPE); inputSplit = ReflectionUtils.newInstance(inputSplitClass, conf); SerializationFactory factory = new SerializationFactory(conf); Deserializer deserializer = factory.getDeserializer(inputSplitClass); deserializer.open((DataInputStream) in); inputSplit = (InputSplit) deserializer.deserialize(inputSplit); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit.java
License:Apache License
@SuppressWarnings("unchecked") @Override/*from w w w .java 2s .c om*/ public void write(DataOutput out) throws IOException { Text.writeString(out, name); Text.writeString(out, inputSplitClass.getName()); Text.writeString(out, inputFormatClass.getName()); Text.writeString(out, mapperClassName); Text.writeString(out, GSON.toJson(inputConfigs)); SerializationFactory factory = new SerializationFactory(conf); Serializer serializer = factory.getSerializer(inputSplitClass); serializer.open((DataOutputStream) out); serializer.serialize(inputSplit); }
From source file:com.ambiata.ivory.operation.hadoop.TaggedInputSplit.java
License:Apache License
@SuppressWarnings("unchecked") public void readFields(DataInput in) throws IOException { inputSplitClass = (Class<? extends InputSplit>) readClass(in); inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in); mapperClass = (Class<? extends Mapper<?, ?, ?, ?>>) readClass(in); inputSplit = (InputSplit) ReflectionUtils.newInstance(inputSplitClass, conf); SerializationFactory factory = new SerializationFactory(conf); Deserializer deserializer = factory.getDeserializer(inputSplitClass); deserializer.open((DataInputStream) in); inputSplit = (InputSplit) deserializer.deserialize(inputSplit); }
From source file:com.ambiata.ivory.operation.hadoop.TaggedInputSplit.java
License:Apache License
@SuppressWarnings("unchecked") public void write(DataOutput out) throws IOException { Text.writeString(out, inputSplitClass.getName()); Text.writeString(out, inputFormatClass.getName()); Text.writeString(out, mapperClass.getName()); SerializationFactory factory = new SerializationFactory(conf); Serializer serializer = factory.getSerializer(inputSplitClass); serializer.open((DataOutputStream) out); serializer.serialize(inputSplit);//w w w . ja va2 s. c o m }
From source file:com.asakusafw.runtime.mapreduce.simple.KeyValueSorterTest.java
License:Apache License
private KeyValueSorter<IntWritable, Text> sorter() throws IOException { if (sorter == null) { sorter = manage(new KeyValueSorter<>( new SerializationFactory(new ConfigurationProvider().newInstance()), IntWritable.class, Text.class, new IntWritable.Comparator(), new KeyValueSorter.Options().withBufferSize(0) .withTemporaryDirectory(temporaryFolder.newFolder()))); }/*from w ww.ja v a 2s.co m*/ return sorter; }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java
License:Apache License
private <K, V> KeyValueSorter<?, ?> createSorter(Job job, Class<K> key, Class<V> value) { KeyValueSorter.Options options = getSorterOptions(job.getConfiguration()); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format( "shuffle buffer size: {1}bytes/page, {2}bytes/block, compression:{3} ({0})", //$NON-NLS-1$ job.getJobName(), options.getPageSize(), options.getBlockSize(), options.isCompressBlock())); }/* ww w. j av a 2 s . c o m*/ return new KeyValueSorter<>(new SerializationFactory(job.getConfiguration()), key, value, job.getSortComparator(), options); }
From source file:com.baynote.hadoop.TaggedInputSplit.java
License:Apache License
@SuppressWarnings("unchecked") public void readFields(DataInput in) throws IOException { inputSplitClass = (Class<? extends InputSplit>) readClass(in); inputFormatClass = (Class<? extends InputFormat<?, ?>>) readClass(in); mapperClass = (Class<? extends Mapper<?, ?, ?, ?>>) readClass(in); inputSplit = ReflectionUtils.newInstance(inputSplitClass, conf); SerializationFactory factory = new SerializationFactory(conf); Deserializer deserializer = factory.getDeserializer(inputSplitClass); deserializer.open((DataInputStream) in); inputSplit = (InputSplit) deserializer.deserialize(inputSplit); }
From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java
License:Apache License
/** * loadData: load data for the staff./*from www . j av a 2s .co m*/ * * @param job * BSP job configuration * @param workerAgent * Protocol that staff child process uses to contact its parent process * @return boolean * @throws ClassNotFoundException * @throws IOException * e * @throws InterruptedException * e */ @SuppressWarnings("unchecked") public boolean loadData(BSPJob job, WorkerAgentProtocol workerAgent, WorkerAgentForStaffInterface aStaffAgent) throws ClassNotFoundException, IOException, InterruptedException { // rebuild the input split RecordReader input = null; org.apache.hadoop.mapreduce.InputSplit split = null; if (rawSplitClass.equals("no")) { input = null; } else { DataInputBuffer splitBuffer = new DataInputBuffer(); splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength()); SerializationFactory factory = new SerializationFactory(job.getConf()); Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(job.getConf().getClassByName(rawSplitClass)); deserializer.open(splitBuffer); split = deserializer.deserialize(null); // rebuild the InputFormat class according to the user configuration InputFormat inputformat = (InputFormat) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_INPUT_FORMAT_CLASS, InputFormat.class), job.getConf()); inputformat.initialize(job.getConf()); input = inputformat.createRecordReader(split, job); input.initialize(split, job.getConf()); } SuperStepReportContainer ssrc = new SuperStepReportContainer(); ssrc.setPartitionId(this.getPartition()); this.numCopy = (int) (1 / (job.getConf().getFloat(Constants.USER_BC_BSP_JOB_BALANCE_FACTOR, Constants.USER_BC_BSP_JOB_BALANCE_FACTOR_DEFAULT))); ssrc.setNumCopy(numCopy); ssrc.setCheckNum(this.staffNum); StaffSSControllerInterface lsssc = new StaffSSController(this.getJobId(), this.getSid(), workerAgent); long start = System.currentTimeMillis(); LOG.info("in BCBSP with PartitionType is: Hash" + " start time:" + start); if (this.staffNum == 1 || job.getConf().getBoolean(Constants.USER_BC_BSP_JOB_ISDIVIDE, false)) { this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); this.partitioner.setNumPartition(this.staffNum); this.partitioner.intialize(job, split); WritePartition writePartition = new NotDivideWritePartition(); /* * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance( * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * RecordParseDefault.class), job .getConf()); recordParse.init(job); * //add by chen for null bug this.recordParse = recordParse; * //this.recordParse.init(job); */ writePartition.setRecordParse(this.recordParse); writePartition.setStaff(this); writePartition.write(input); ssrc.setDirFlag(new String[] { "1" }); ssrc.setCheckNum(this.staffNum); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); LOG.info("The number of verteices from other staff" + " that cound not be parsed:" + this.lost); LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:" + graphData.sizeForAll()); graphData.finishAdd(); ssrc.setCheckNum(this.staffNum * 2); ssrc.setDirFlag(new String[] { "2" }); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); } else { this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); WritePartition writePartition = (WritePartition) ReflectionUtils.newInstance(job.getConf().getClass( Constants.USER_BC_BSP_JOB_WRITEPARTITION_CLASS, HashWritePartition.class), job.getConf()); int multiple = 1; if (writePartition instanceof HashWithBalancerWritePartition) { this.partitioner.setNumPartition(this.staffNum * numCopy); multiple = 2; } else { this.partitioner.setNumPartition(this.staffNum); multiple = 1; if (writePartition instanceof RangeWritePartition) { multiple = 2; } } this.partitioner.intialize(job, split); /* * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance( * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * RecordParseDefault.class), job .getConf()); recordParse.init(job); // * this.recordParse = (RecordParse) ReflectionUtils.newInstance( // * job.getConf().getClass( // Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * // RecordParseDefault.class), job.getConf()); // * this.recordParse.init(job); this.recordParse = recordParse; */ writePartition.setPartitioner(partitioner); writePartition.setRecordParse(this.recordParse); writePartition.setStaff(this); writePartition.setWorkerAgent(aStaffAgent); writePartition.setSsrc(ssrc); writePartition.setSssc(lsssc); writePartition.setTotalCatchSize(job.getConf().getInt(Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE, Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE_DEFAULT)); int threadNum = job.getConf().getInt(Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER, Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER_DEFAULT); if (threadNum > this.staffNum) { threadNum = this.staffNum - 1; } writePartition.setSendThreadNum(threadNum); writePartition.write(input); ssrc.setDirFlag(new String[] { "1" }); ssrc.setCheckNum(this.staffNum * multiple); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); LOG.info("The number of verteices from other staff that" + " cound not be parsed:" + this.lost); LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:" + graphData.sizeForAll()); graphData.finishAdd(); ssrc.setCheckNum(this.staffNum * (multiple + 1)); ssrc.setDirFlag(new String[] { "2" }); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); } long end = System.currentTimeMillis(); LOG.info("in BCBSP with PartitionType is:HASH" + " end time:" + end); LOG.info( "in BCBSP with PartitionType is:HASH" + " using time:" + (float) (end - start) / 1000 + " seconds"); return true; }
From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java
License:Apache License
/** * Rebuid the partition and read data from checkpoint for intializing. * //ww w .j a v a2 s . co m * @param job * @param writePartition * @throws ClassNotFoundException * @throws IOException */ private void intializePartitionForRecovery(BSPJob job, WritePartition writePartition) throws ClassNotFoundException, IOException { this.currentSuperStepCounter = ssc.getNextSuperStepNum(); LOG.info("Now, this super step count is " + this.currentSuperStepCounter); this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); if (writePartition instanceof HashWithBalancerWritePartition) { this.partitioner.setNumPartition(this.staffNum * numCopy); } else { this.partitioner.setNumPartition(this.staffNum); } org.apache.hadoop.mapreduce.InputSplit split = null; if (rawSplitClass.equals("no")) { } else { DataInputBuffer splitBuffer = new DataInputBuffer(); splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength()); SerializationFactory factory = new SerializationFactory(job.getConf()); Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(job.getConf().getClassByName(rawSplitClass)); deserializer.open(splitBuffer); split = deserializer.deserialize(null); } this.partitioner.intialize(job, split); displayFirstRoute(); }