List of usage examples for org.apache.hadoop.io DataInputBuffer reset
public void reset(byte[] input, int start, int length)
From source file:com.alibaba.wasp.EntityGroupInfo.java
License:Apache License
/** * Parses all the EntityGroupInfo instances from the passed in stream until * EOF. Presumes the EntityGroupInfo's were serialized to the stream with * {@link #toDelimitedByteArray()}//from w w w.jav a 2s .co m * * @param bytes * serialized bytes * @param offset * the start offset into the byte[] buffer * @param length * how far we should read into the byte[] buffer * @return All the entityGroupInfos that are in the byte array. Keeps reading * till we hit the end. */ public static List<EntityGroupInfo> parseDelimitedFrom(final byte[] bytes, final int offset, final int length) throws IOException { if (bytes == null) { throw new IllegalArgumentException("Can't build an object with empty bytes array"); } DataInputBuffer in = new DataInputBuffer(); List<EntityGroupInfo> egis = new ArrayList<EntityGroupInfo>(); try { in.reset(bytes, offset, length); while (in.available() > 0) { EntityGroupInfo egi = parseFrom(in); egis.add(egi); } } finally { in.close(); } return egis; }
From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java
License:Apache License
/** * loadData: load data for the staff.//from www . ja v a 2 s. c o m * * @param job * BSP job configuration * @param workerAgent * Protocol that staff child process uses to contact its parent process * @return boolean * @throws ClassNotFoundException * @throws IOException * e * @throws InterruptedException * e */ @SuppressWarnings("unchecked") public boolean loadData(BSPJob job, WorkerAgentProtocol workerAgent, WorkerAgentForStaffInterface aStaffAgent) throws ClassNotFoundException, IOException, InterruptedException { // rebuild the input split RecordReader input = null; org.apache.hadoop.mapreduce.InputSplit split = null; if (rawSplitClass.equals("no")) { input = null; } else { DataInputBuffer splitBuffer = new DataInputBuffer(); splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength()); SerializationFactory factory = new SerializationFactory(job.getConf()); Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(job.getConf().getClassByName(rawSplitClass)); deserializer.open(splitBuffer); split = deserializer.deserialize(null); // rebuild the InputFormat class according to the user configuration InputFormat inputformat = (InputFormat) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_INPUT_FORMAT_CLASS, InputFormat.class), job.getConf()); inputformat.initialize(job.getConf()); input = inputformat.createRecordReader(split, job); input.initialize(split, job.getConf()); } SuperStepReportContainer ssrc = new SuperStepReportContainer(); ssrc.setPartitionId(this.getPartition()); this.numCopy = (int) (1 / (job.getConf().getFloat(Constants.USER_BC_BSP_JOB_BALANCE_FACTOR, Constants.USER_BC_BSP_JOB_BALANCE_FACTOR_DEFAULT))); ssrc.setNumCopy(numCopy); ssrc.setCheckNum(this.staffNum); StaffSSControllerInterface lsssc = new StaffSSController(this.getJobId(), this.getSid(), workerAgent); long start = System.currentTimeMillis(); LOG.info("in BCBSP with PartitionType is: Hash" + " start time:" + start); if (this.staffNum == 1 || job.getConf().getBoolean(Constants.USER_BC_BSP_JOB_ISDIVIDE, false)) { this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); this.partitioner.setNumPartition(this.staffNum); this.partitioner.intialize(job, split); WritePartition writePartition = new NotDivideWritePartition(); /* * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance( * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * RecordParseDefault.class), job .getConf()); recordParse.init(job); * //add by chen for null bug this.recordParse = recordParse; * //this.recordParse.init(job); */ writePartition.setRecordParse(this.recordParse); writePartition.setStaff(this); writePartition.write(input); ssrc.setDirFlag(new String[] { "1" }); ssrc.setCheckNum(this.staffNum); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); LOG.info("The number of verteices from other staff" + " that cound not be parsed:" + this.lost); LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:" + graphData.sizeForAll()); graphData.finishAdd(); ssrc.setCheckNum(this.staffNum * 2); ssrc.setDirFlag(new String[] { "2" }); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); } else { this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); WritePartition writePartition = (WritePartition) ReflectionUtils.newInstance(job.getConf().getClass( Constants.USER_BC_BSP_JOB_WRITEPARTITION_CLASS, HashWritePartition.class), job.getConf()); int multiple = 1; if (writePartition instanceof HashWithBalancerWritePartition) { this.partitioner.setNumPartition(this.staffNum * numCopy); multiple = 2; } else { this.partitioner.setNumPartition(this.staffNum); multiple = 1; if (writePartition instanceof RangeWritePartition) { multiple = 2; } } this.partitioner.intialize(job, split); /* * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance( * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * RecordParseDefault.class), job .getConf()); recordParse.init(job); // * this.recordParse = (RecordParse) ReflectionUtils.newInstance( // * job.getConf().getClass( // Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * // RecordParseDefault.class), job.getConf()); // * this.recordParse.init(job); this.recordParse = recordParse; */ writePartition.setPartitioner(partitioner); writePartition.setRecordParse(this.recordParse); writePartition.setStaff(this); writePartition.setWorkerAgent(aStaffAgent); writePartition.setSsrc(ssrc); writePartition.setSssc(lsssc); writePartition.setTotalCatchSize(job.getConf().getInt(Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE, Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE_DEFAULT)); int threadNum = job.getConf().getInt(Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER, Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER_DEFAULT); if (threadNum > this.staffNum) { threadNum = this.staffNum - 1; } writePartition.setSendThreadNum(threadNum); writePartition.write(input); ssrc.setDirFlag(new String[] { "1" }); ssrc.setCheckNum(this.staffNum * multiple); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); LOG.info("The number of verteices from other staff that" + " cound not be parsed:" + this.lost); LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:" + graphData.sizeForAll()); graphData.finishAdd(); ssrc.setCheckNum(this.staffNum * (multiple + 1)); ssrc.setDirFlag(new String[] { "2" }); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); } long end = System.currentTimeMillis(); LOG.info("in BCBSP with PartitionType is:HASH" + " end time:" + end); LOG.info( "in BCBSP with PartitionType is:HASH" + " using time:" + (float) (end - start) / 1000 + " seconds"); return true; }
From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java
License:Apache License
/** * Rebuid the partition and read data from checkpoint for intializing. * // w w w.j av a 2 s .com * @param job * @param writePartition * @throws ClassNotFoundException * @throws IOException */ private void intializePartitionForRecovery(BSPJob job, WritePartition writePartition) throws ClassNotFoundException, IOException { this.currentSuperStepCounter = ssc.getNextSuperStepNum(); LOG.info("Now, this super step count is " + this.currentSuperStepCounter); this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); if (writePartition instanceof HashWithBalancerWritePartition) { this.partitioner.setNumPartition(this.staffNum * numCopy); } else { this.partitioner.setNumPartition(this.staffNum); } org.apache.hadoop.mapreduce.InputSplit split = null; if (rawSplitClass.equals("no")) { } else { DataInputBuffer splitBuffer = new DataInputBuffer(); splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength()); SerializationFactory factory = new SerializationFactory(job.getConf()); Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(job.getConf().getClassByName(rawSplitClass)); deserializer.open(splitBuffer); split = deserializer.deserialize(null); } this.partitioner.intialize(job, split); displayFirstRoute(); }
From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java
License:Apache License
/** * loadData: load data for the staff in SGA-Graph. * /*from w ww . java 2 s .c o m*/ * @param job * BSP job configuration * @param workerAgent * Protocol that staff child process uses to contact its parent process * @return boolean * @throws ClassNotFoundException * @throws IOException * e * @throws InterruptedException * e */ @SuppressWarnings("unchecked") public boolean loadDataSGAGraph(BSPJob job, WorkerAgentProtocol workerAgent, WorkerAgentForStaffInterface aStaffAgent) throws ClassNotFoundException, IOException, InterruptedException { // rebuild the input split RecordReader input = null; org.apache.hadoop.mapreduce.InputSplit split = null; if (rawSplitClass.equals("no")) { input = null; } else { DataInputBuffer splitBuffer = new DataInputBuffer(); splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength()); SerializationFactory factory = new SerializationFactory(job.getConf()); Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(job.getConf().getClassByName(rawSplitClass)); deserializer.open(splitBuffer); split = deserializer.deserialize(null); // rebuild the InputFormat class according to the user configuration InputFormat inputformat = (InputFormat) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_INPUT_FORMAT_CLASS, InputFormat.class), job.getConf()); inputformat.initialize(job.getConf()); input = inputformat.createRecordReader(split, job); input.initialize(split, job.getConf()); } SuperStepReportContainer ssrc = new SuperStepReportContainer(); ssrc.setPartitionId(this.getPartition()); this.numCopy = (int) (1 / (job.getConf().getFloat(Constants.USER_BC_BSP_JOB_BALANCE_FACTOR, Constants.USER_BC_BSP_JOB_BALANCE_FACTOR_DEFAULT))); ssrc.setNumCopy(numCopy); ssrc.setCheckNum(this.staffNum); StaffSSControllerInterface lsssc = new StaffSSController(this.getJobId(), this.getSid(), workerAgent); long start = System.currentTimeMillis(); LOG.info("in BCBSP with PartitionType is: Hash" + " start time:" + start); if (this.staffNum == 1 || job.getConf().getBoolean(Constants.USER_BC_BSP_JOB_ISDIVIDE, false)) { this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); this.partitioner.setNumPartition(this.staffNum); this.partitioner.intialize(job, split); WritePartition writePartition = new NotDivideWritePartition(); /* * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance( * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * RecordParseDefault.class), job .getConf()); recordParse.init(job); * //add by chen for null bug this.recordParse = recordParse; * //this.recordParse.init(job); */ writePartition.setRecordParse(this.recordParse); writePartition.setStaff(this); writePartition.write(input); ssrc.setDirFlag(new String[] { "1" }); ssrc.setCheckNum(this.staffNum); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); LOG.info("The number of verteices from other staff" + " that cound not be parsed:" + this.lost); LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:" + graphData.sizeForAll()); graphData.finishAdd(); ssrc.setCheckNum(this.staffNum * 2); ssrc.setDirFlag(new String[] { "2" }); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); } else { this.partitioner = (Partitioner<Text>) ReflectionUtils.newInstance( job.getConf().getClass(Constants.USER_BC_BSP_JOB_PARTITIONER_CLASS, HashPartitioner.class), job.getConf()); WritePartition writePartition = (WritePartition) ReflectionUtils.newInstance(job.getConf().getClass( Constants.USER_BC_BSP_JOB_WRITEPARTITION_CLASS, HashWritePartition.class), job.getConf()); int multiple = 1; if (writePartition instanceof HashWithBalancerWritePartition) { this.partitioner.setNumPartition(this.staffNum * numCopy); multiple = 2; } else { this.partitioner.setNumPartition(this.staffNum); multiple = 1; if (writePartition instanceof RangeWritePartition) { multiple = 2; } } this.partitioner.intialize(job, split); /* * RecordParse recordParse = (RecordParse) ReflectionUtils .newInstance( * job.getConf() .getClass( Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * RecordParseDefault.class), job .getConf()); recordParse.init(job); // * this.recordParse = (RecordParse) ReflectionUtils.newInstance( // * job.getConf().getClass( // Constants.USER_BC_BSP_JOB_RECORDPARSE_CLASS, * // RecordParseDefault.class), job.getConf()); // * this.recordParse.init(job); this.recordParse = recordParse; */ writePartition.setPartitioner(partitioner); writePartition.setRecordParse(this.recordParse); writePartition.setStaff(this); writePartition.setWorkerAgent(aStaffAgent); writePartition.setSsrc(ssrc); writePartition.setSssc(lsssc); writePartition.setTotalCatchSize(job.getConf().getInt(Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE, Constants.USER_BC_BSP_JOB_TOTALCACHE_SIZE_DEFAULT)); int threadNum = job.getConf().getInt(Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER, Constants.USER_BC_BSP_JOB_SENDTHREADNUMBER_DEFAULT); if (threadNum > this.staffNum) { threadNum = this.staffNum - 1; } writePartition.setSendThreadNum(threadNum); writePartition.write(input); ssrc.setDirFlag(new String[] { "1" }); ssrc.setCheckNum(this.staffNum * multiple); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); LOG.info("The number of verteices from other staff that" + " cound not be parsed:" + this.lost); LOG.info("in BCBSP with PartitionType is:HASH" + " the number of HeadNode in this partition is:" + graphData.sizeForAll()); graphData.finishAdd(); ssrc.setCheckNum(this.staffNum * (multiple + 1)); ssrc.setDirFlag(new String[] { "2" }); lsssc.loadDataBarrier(ssrc, Constants.PARTITION_TYPE.HASH); // for input graph evaluate. // this.evaluateflag = lsssc.graphEvaluateBarrier(0, this.staffNum, // this.graphData.getGlobalFactor()); } long end = System.currentTimeMillis(); LOG.info("in BCBSP with PartitionType is:HASH" + " end time:" + end); LOG.info( "in BCBSP with PartitionType is:HASH" + " using time:" + (float) (end - start) / 1000 + " seconds"); return true; }
From source file:com.datasalt.pangool.BaseTest.java
License:Apache License
protected static void assertSerializable(HadoopSerialization ser, ITuple tuple, boolean debug) throws IOException { DataInputBuffer input = new DataInputBuffer(); DataOutputBuffer output = new DataOutputBuffer(); DatumWrapper<ITuple> wrapper = new DatumWrapper<ITuple>(tuple); ser.ser(wrapper, output);//from w ww . j a v a 2 s.c o m input.reset(output.getData(), 0, output.getLength()); DatumWrapper<ITuple> wrapper2 = new DatumWrapper<ITuple>(); wrapper2 = ser.deser(wrapper2, input); if (debug) { System.out.println("D:" + wrapper2.datum()); } assertEquals(tuple, wrapper2.datum()); }
From source file:com.datasalt.pangool.BaseTest.java
License:Apache License
protected static void assertSerializable(TupleSerializer ser, TupleDeserializer deser, DatumWrapper<ITuple> tuple, boolean debug) throws IOException { DataOutputBuffer output = new DataOutputBuffer(); ser.open(output);//w w w .ja v a 2 s . co m ser.serialize(tuple); ser.close(); DataInputBuffer input = new DataInputBuffer(); input.reset(output.getData(), 0, output.getLength()); DatumWrapper<ITuple> deserializedTuple = new DatumWrapper<ITuple>(); deser.open(input); deserializedTuple = deser.deserialize(deserializedTuple); deser.close(); if (debug) { System.out.println("D:" + deserializedTuple.datum()); } assertEquals(tuple.datum(), deserializedTuple.datum()); }
From source file:com.datasalt.pangool.BaseTest.java
License:Apache License
protected void assertSerializable(Tuple tuple, Tuple toReuse, boolean debug) throws IOException { HadoopSerialization hadoopSer = new HadoopSerialization(getConf()); SimpleTupleSerializer ser = new SimpleTupleSerializer(hadoopSer); SimpleTupleDeserializer deser = new SimpleTupleDeserializer(tuple.getSchema(), hadoopSer, getConf()); DataOutputBuffer output = new DataOutputBuffer(); ser.open(output);// www . ja v a 2s . co m ser.serialize(tuple); ser.close(); DataInputBuffer input = new DataInputBuffer(); input.reset(output.getData(), 0, output.getLength()); ITuple deserializedTuple = null; deser.open(input); deserializedTuple = deser.deserialize(null); deser.close(); if (debug) { System.out.println("D:" + tuple); } assertEquals(tuple, deserializedTuple); }
From source file:com.datasalt.pangool.serialization.HadoopSerialization.java
License:Apache License
/** * Deserialize an object using Hadoop serialization from a byte array. The * object cannot be null./* www . jav a 2 s . co m*/ */ public <T> T deser(Object obj, byte[] array, int offset, int length) throws IOException { Map<Class, Deserializer> deserializers = cachedDeserializers.get(); Deserializer deSer = deserializers.get(obj.getClass()); if (deSer == null) { deSer = serialization.getDeserializer(obj.getClass()); deserializers.put(obj.getClass(), deSer); } DataInputBuffer baIs = cachedInputStream.get(); baIs.reset(array, offset, length); deSer.open(baIs); obj = deSer.deserialize(obj); deSer.close(); baIs.close(); return (T) obj; }
From source file:com.ebay.erl.mobius.core.datajoin.DataJoinKey.java
License:Apache License
@Override public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { DataInputBuffer d1 = new DataInputBuffer(); d1.reset(b1, s1, l1); DataInputBuffer d2 = new DataInputBuffer(); d2.reset(b2, s2, l2);//from ww w .j a v a2 s .c o m int _compare_result = Integer.MAX_VALUE; try { // the comparing ordering: // 1. DataJoinKey#KEY_FIELDNAME // 2. DataJoinKey#DATASET_ID_FIELDNAME // 3. DataJoinKey#SORT_KEYWORD_FIELDNAME - removed // 4. DataJoinKey#SORT_COMPARATOR_FIELDNAME - removed // read number of columns from the two tuple, // but there is no need to compare the length // of columns, we just read the values. d1.readInt(); d2.readInt(); ////////////////////////////////////////////////////////// // compare KEY, values from DataJoinKey#KEY_FIELDNAME // KEY represents the actual key user specified /////////////////////////////////////////////////////////// byte type1 = d1.readByte(); byte type2 = d2.readByte(); _COLUMN_COMPARATOR.setType(type1, type2); // writable, check if they are Tuple or NullWritable if (type1 == Tuple.NULL_WRITABLE_TYPE && type2 == Tuple.NULL_WRITABLE_TYPE) { // consider equal, do nothing _compare_result = 0; } else if (type1 == Tuple.TUPLE_TYPE && type2 == Tuple.TUPLE_TYPE) { // both are Tuple Tuple k1 = (Tuple) getKey(type1, d1); Tuple k2 = (Tuple) getKey(type2, d2); _compare_result = _COLUMN_COMPARATOR.compareKey(k1, k2, this.getSorter(), conf); } else { // DataJoinKey only support NullWritable and Tuple for the DataJoinKey#KEY_FIELDNAME throw new IllegalArgumentException( "Cannot compare " + Tuple.getTypeString(type1) + " and " + Tuple.getTypeString(type2)); } // if they are not the same, these two records should go to // different reducer, or different reduce iteration. if (_compare_result != 0) return _compare_result; ////////////////////////////////////////////////////////////////////////// // compare DATASET_ID, values from DataJoinKey#DATASET_ID_FIELDNAME, // at this point, the keys are the same, they should go to the same // reducer, we need to make sure the values from DATASET1 always come // before DATASET2, so we need to compare the DATASET_ID here. ////////////////////////////////////////////////////////////////////////// try { _COLUMN_COMPARATOR.setType(d1.readByte(), d2.readByte()); _compare_result = _COLUMN_COMPARATOR.compare(d1, d2, this.conf); if (_compare_result != 0) return _compare_result; } catch (IOException e) { byte[] b = new byte[l1]; for (int i = 0; i < l1; i++) { b[i] = b1[s1 + i]; } System.err.println(Arrays.toString(b)); System.err.println("type1:" + type1 + ", type2:" + type2); throw e; } return 0; } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.ebay.erl.mobius.core.model.Tuple.java
License:Apache License
/** * compare two tuples in low level row format. *//* w ww . ja va 2 s.c o m*/ @Override public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { DataInputBuffer d1 = new DataInputBuffer(); d1.reset(b1, s1, l1); DataInputBuffer d2 = new DataInputBuffer(); d2.reset(b2, s2, l2); int _compare_result = Integer.MAX_VALUE; try { // read number of columns from the two tuple int columns_nbr1 = d1.readInt(); int columns_nbr2 = d2.readInt(); int upper_bound = Math.min(columns_nbr1, columns_nbr2); // same column size, start to compare column by column for (int i = 0; i < upper_bound; i++) { byte type1 = d1.readByte(); byte type2 = d2.readByte(); _COLUMN_COMPARATOR.setType(type1, type2); _compare_result = _COLUMN_COMPARATOR.compare(d1, d2, this.conf); // comparing for a column has complete if (_compare_result != 0 && _compare_result != Integer.MAX_VALUE) { // has different, return return _compare_result; } } // end of iterating columns until the upper limit // finished all columns comparison(up to the upper-bound), still cannot find difference, // use the column size as the comparing result. _compare_result = columns_nbr1 - columns_nbr2; } catch (IOException e) { throw new RuntimeException(e); } if (_compare_result == Integer.MAX_VALUE) throw new IllegalArgumentException(); return _compare_result; }