List of usage examples for org.apache.hadoop.io.serializer SerializationFactory getSerializer
public <T> Serializer<T> getSerializer(Class<T> c)
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit.java
License:Apache License
@SuppressWarnings("unchecked") @Override//ww w .ja v a 2 s .com public void write(DataOutput out) throws IOException { Text.writeString(out, name); Text.writeString(out, inputSplitClass.getName()); Text.writeString(out, inputFormatClass.getName()); Text.writeString(out, mapperClassName); Text.writeString(out, GSON.toJson(inputConfigs)); SerializationFactory factory = new SerializationFactory(conf); Serializer serializer = factory.getSerializer(inputSplitClass); serializer.open((DataOutputStream) out); serializer.serialize(inputSplit); }
From source file:com.ambiata.ivory.operation.hadoop.TaggedInputSplit.java
License:Apache License
@SuppressWarnings("unchecked") public void write(DataOutput out) throws IOException { Text.writeString(out, inputSplitClass.getName()); Text.writeString(out, inputFormatClass.getName()); Text.writeString(out, mapperClass.getName()); SerializationFactory factory = new SerializationFactory(conf); Serializer serializer = factory.getSerializer(inputSplitClass); serializer.open((DataOutputStream) out); serializer.serialize(inputSplit);/* w w w. ja v a2s .c o m*/ }
From source file:com.chinamobile.bcbsp.client.BSPJobClient.java
License:Apache License
/** * Write splits.//from ww w. j a v a 2 s.c o m * @param job BSPJob * @param submitSplitFile Path * @param <T> org.apache.hadoop.mapreduce.InputSplit * @return splitNum the count of split */ @SuppressWarnings("unchecked") private <T extends org.apache.hadoop.mapreduce.InputSplit> int writeSplits(BSPJob job, Path submitSplitFile) throws IOException, InterruptedException, ClassNotFoundException { Configuration confs = job.getConf(); com.chinamobile.bcbsp.io.InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), confs); input.initialize(job.getConf()); List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(job); int maxSplits = job.getNumPartition(); int splitNum = splits.size(); double factor = splitNum / (float) maxSplits; if (factor > 1.0) { job.setInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, (int) Math.ceil(factor)); LOG.info("[Split Adjust Factor] " + (int) Math.ceil(factor)); LOG.info("[Partition Num] " + maxSplits); splits = input.getSplits(job); splitNum = splits.size(); } T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]); // sort the splits into order based on size, so that the biggest // go first Arrays.sort(array, new NewSplitComparator()); DataOutputStream out = writeSplitsFileHeader(confs, submitSplitFile, array.length); try { if (array.length != 0) { DataOutputBuffer buffer = new DataOutputBuffer(); RawSplit rawSplit = new RawSplit(); SerializationFactory factory = new SerializationFactory(confs); Serializer<T> serializer = factory.getSerializer((Class<T>) array[0].getClass()); serializer.open(buffer); for (T split : array) { rawSplit.setClassName(split.getClass().getName()); buffer.reset(); serializer.serialize(split); rawSplit.setDataLength(split.getLength()); rawSplit.setBytes(buffer.getData(), 0, buffer.getLength()); rawSplit.setLocations(split.getLocations()); rawSplit.write(out); } serializer.close(); } } finally { out.close(); } return splitNum; }
From source file:com.chinamobile.bcbsp.partition.HashWithBalancerWritePartition.java
License:Apache License
/** * This method is used to partition graph vertexes. Writing Each vertex to the * corresponding partition. In this method calls recordParse method to create * an HeadNode object. The last call partitioner's getPartitionId method to * calculate the HeadNode belongs to partition's id. If the HeadNode belongs * local partition then written to the local partition or send it to the * appropriate partition./* w w w . j a v a 2 s . c o m*/ * @param recordReader The recordreader of the split. * @throws IOException The io exception * @throws InterruptedException The Interrupted Exception */ @Override public void write(RecordReader recordReader) throws IOException, InterruptedException { int headNodeNum = 0; int local = 0; int send = 0; int lost = 0; ThreadPool tpool = new ThreadPool(this.sendThreadNum); int staffNum = this.staff.getStaffNum(); BytesWritable kbytes = new BytesWritable(); int ksize = 0; BytesWritable vbytes = new BytesWritable(); int vsize = 0; DataOutputBuffer bb = new DataOutputBuffer(); int bufferSize = (int) ((this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) * PART); int dataBufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) / (this.staff.getStaffNum() + this.sendThreadNum); byte[] buffer = new byte[bufferSize]; int bufindex = 0; SerializationFactory sFactory = new SerializationFactory(new Configuration()); Serializer<IntWritable> psserializer = sFactory.getSerializer(IntWritable.class); byte[] pidandsize = new byte[TIME * CONTAINERNUMBER * CONTAINERNUMBER]; int psindex = 0; BytesWritable pidbytes = new BytesWritable(); int psize = 0; BytesWritable sizebytes = new BytesWritable(); int ssize = 0; try { this.keyserializer.open(bb); this.valueserializer.open(bb); psserializer.open(bb); } catch (IOException e) { throw e; } String path = "/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID(); File dir = new File("/tmp/bcbsp/" + this.staff.getJobID()); dir.mkdir(); dir = new File("/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID()); dir.mkdir(); ArrayList<File> files = new ArrayList<File>(); try { File file = new File(path + "/" + "data" + ".txt"); files.add(file); DataOutputStream dataWriter = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(path + "/" + "data" + ".txt", true))); DataInputStream dataReader = new DataInputStream( new BufferedInputStream(new FileInputStream(path + "/" + "data" + ".txt"))); File filet = new File(path + "/" + "pidandsize" + ".txt"); files.add(filet); DataOutputStream psWriter = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(path + "/" + "pidandsize" + ".txt", true))); DataInputStream psReader = new DataInputStream( new BufferedInputStream(new FileInputStream(path + "/" + "pidandsize" + ".txt"))); while (recordReader != null && recordReader.nextKeyValue()) { headNodeNum++; Text key = new Text(recordReader.getCurrentKey().toString()); Text value = new Text(recordReader.getCurrentValue().toString()); int pid = -1; Text vertexID = this.recordParse.getVertexID(key); if (vertexID != null) { pid = this.partitioner.getPartitionID(vertexID); } else { lost++; continue; } if (this.counter.containsKey(pid)) { this.counter.put(pid, (this.counter.get(pid) + 1)); } else { this.counter.put(pid, 1); } bb.reset(); this.keyserializer.serialize(key); kbytes.set(bb.getData(), 0, bb.getLength()); ksize = kbytes.getLength(); bb.reset(); this.valueserializer.serialize(value); vbytes.set(bb.getData(), 0, bb.getLength()); vsize = vbytes.getLength(); bb.reset(); psserializer.serialize(new IntWritable(ksize + vsize)); sizebytes.set(bb.getData(), 0, bb.getLength()); ssize = sizebytes.getLength(); bb.reset(); psserializer.serialize(new IntWritable(pid)); pidbytes.set(bb.getData(), 0, bb.getLength()); psize = pidbytes.getLength(); if ((pidandsize.length - psindex) > (ssize + psize)) { System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize); psindex += ssize; System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize); psindex += psize; } else { psWriter.write(pidandsize, 0, psindex); psindex = 0; System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize); psindex += ssize; System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize); psindex += psize; } if ((buffer.length - bufindex) > (ksize + vsize)) { System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize); bufindex += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize); bufindex += vsize; } else if (buffer.length < (ksize + vsize)) { dataWriter.write(buffer, 0, bufindex); bufindex = 0; LOG.info("This is a super record"); dataWriter.write(kbytes.getBytes(), 0, ksize); dataWriter.write(vbytes.getBytes(), 0, vsize); } else { dataWriter.write(buffer, 0, bufindex); bufindex = 0; System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize); bufindex += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize); bufindex += vsize; } } if (psindex != 0) { psWriter.write(pidandsize, 0, psindex); } if (bufindex != 0) { dataWriter.write(buffer, 0, bufindex); bufindex = 0; } dataWriter.close(); dataWriter = null; psWriter.close(); psWriter = null; buffer = null; pidandsize = null; this.ssrc.setDirFlag(new String[] { "3" }); this.ssrc.setCounter(this.counter); HashMap<Integer, Integer> hashBucketToPartition = this.sssc.loadDataInBalancerBarrier(ssrc, Constants.PARTITION_TYPE.HASH); this.staff.setHashBucketToPartition(hashBucketToPartition); byte[][] databuf = new byte[staffNum][dataBufferSize]; int[] databufindex = new int[staffNum]; try { IntWritable pid = new IntWritable(); IntWritable size = new IntWritable(); int belongPid = 0; while (true) { size.readFields(psReader); pid.readFields(psReader); belongPid = hashBucketToPartition.get(pid.get()); if (belongPid != this.staff.getPartition()) { send++; } else { local++; } if ((databuf[belongPid].length - databufindex[belongPid]) > size.get()) { dataReader.read(databuf[belongPid], databufindex[belongPid], size.get()); databufindex[belongPid] += size.get(); } else if (databuf[belongPid].length < size.get()) { LOG.info("This is a super record"); byte[] tmp = new byte[size.get()]; dataReader.read(tmp, 0, size.get()); if (belongPid == this.staff.getPartition()) { DataInputStream reader = new DataInputStream( new BufferedInputStream(new ByteArrayInputStream(tmp))); try { boolean stop = true; while (stop) { Text key = new Text(); key.readFields(reader); Text value = new Text(); value.readFields(reader); if (key.getLength() > 0 && value.getLength() > 0) { Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } this.staff.getGraphData().addForAll(vertex); } else { stop = false; } } } catch (IOException e) { LOG.info("IO exception: " + e.getStackTrace()); } } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker( this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(belongPid); BytesWritable data = new BytesWritable(); data.set(tmp, 0, size.get()); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } tmp = null; } else { if (belongPid == this.staff.getPartition()) { DataInputStream reader = new DataInputStream(new BufferedInputStream( new ByteArrayInputStream(databuf[belongPid], 0, databufindex[belongPid]))); try { boolean stop = true; while (stop) { Text key = new Text(); key.readFields(reader); Text value = new Text(); value.readFields(reader); if (key.getLength() > 0 && value.getLength() > 0) { Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } this.staff.getGraphData().addForAll(vertex); } else { stop = false; } } } catch (IOException e) { LOG.info("IO exception: " + e.getStackTrace()); } } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker( this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(belongPid); BytesWritable data = new BytesWritable(); data.set(databuf[belongPid], 0, databufindex[belongPid]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } databufindex[belongPid] = 0; dataReader.read(databuf[belongPid], databufindex[belongPid], size.get()); databufindex[belongPid] += size.get(); } } } catch (EOFException ex) { LOG.error("[write]", ex); } for (int i = 0; i < staffNum; i++) { if (databufindex[i] != 0) { if (i == this.staff.getPartition()) { DataInputStream reader = new DataInputStream( new BufferedInputStream(new ByteArrayInputStream(databuf[i], 0, databufindex[i]))); try { boolean stop = true; while (stop) { Text key = new Text(); key.readFields(reader); Text value = new Text(); value.readFields(reader); if (key.getLength() > 0 && value.getLength() > 0) { Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } this.staff.getGraphData().addForAll(vertex); } else { stop = false; } } } catch (IOException e) { LOG.info("IO exception: " + e.getStackTrace()); } } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(i); BytesWritable data = new BytesWritable(); data.set(databuf[i], 0, databufindex[i]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } } } dataReader.close(); dataReader = null; psReader.close(); psReader = null; for (File f : files) { f.delete(); } dir.delete(); dir = new File(path.substring(0, path.lastIndexOf('/'))); dir.delete(); tpool.cleanup(); tpool = null; databuf = null; databufindex = null; this.counter = null; LOG.info("The number of vertices that were read from the input file: " + headNodeNum); LOG.info("The number of vertices that were put into the partition: " + local); LOG.info("The number of vertices that were sent to other partitions: " + send); LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost); } catch (IOException e) { throw e; } catch (InterruptedException e) { throw e; } finally { for (File f : files) { f.delete(); } dir.delete(); dir = new File(path.substring(0, path.lastIndexOf('/'))); dir.delete(); } }
From source file:com.cloudera.crunch.impl.mr.run.CrunchInputSplit.java
License:Apache License
public void write(DataOutput out) throws IOException { out.writeInt(nodeIndex);/* www. j a va 2s.c o m*/ Text.writeString(out, inputFormatClass.getName()); Text.writeString(out, inputSplit.getClass().getName()); SerializationFactory factory = new SerializationFactory(conf); Serializer serializer = factory.getSerializer(inputSplit.getClass()); serializer.open((DataOutputStream) out); serializer.serialize(inputSplit); }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.TaggedInputSplit.java
License:Apache License
@SuppressWarnings("unchecked") public void write(DataOutput out) throws IOException { Text.writeString(out, inputSplitClass.getName()); Text.writeString(out, inputFormatFile); Text.writeString(out, inputProcessorFile); SerializationFactory factory = new SerializationFactory(conf); Serializer serializer = factory.getSerializer(inputSplitClass); serializer.open((DataOutputStream) out); serializer.serialize(inputSplit);/*from w ww . j av a 2s . co m*/ }
From source file:com.datatorrent.demos.mroperator.MapOperator.java
License:Open Source License
@SuppressWarnings("rawtypes") @Override//from ww w .j a v a2 s . c o m public Collection<Partition<MapOperator<K1, V1, K2, V2>>> definePartitions( Collection<Partition<MapOperator<K1, V1, K2, V2>>> partitions, int incrementalCapacity) { Collection c = partitions; Collection<Partition<MapOperator<K1, V1, K2, V2>>> operatorPartitions = c; Partition<MapOperator<K1, V1, K2, V2>> template = null; Iterator<Partition<MapOperator<K1, V1, K2, V2>>> itr = operatorPartitions.iterator(); template = itr.next(); Configuration conf = new Configuration(); SerializationFactory serializationFactory = new SerializationFactory(conf); if (outstream.size() == 0) { InputSplit[] splits; try { splits = getSplits(new JobConf(conf), incrementalCapacity + 1, template.getPartitionedInstance().getDirName()); } catch (Exception e1) { logger.info(" can't get splits {}", e1.getMessage()); throw new RuntimeException(e1); } Collection<Partition<MapOperator<K1, V1, K2, V2>>> operList = new ArrayList<Partition<MapOperator<K1, V1, K2, V2>>>(); itr = operatorPartitions.iterator(); int size = splits.length; Serializer keySerializer = serializationFactory.getSerializer(splits[0].getClass()); while (size > 0 && itr.hasNext()) { Partition<MapOperator<K1, V1, K2, V2>> p = itr.next(); MapOperator<K1, V1, K2, V2> opr = p.getPartitionedInstance(); opr.setInputFormatClass(inputFormatClass); opr.setMapClass(mapClass); opr.setCombineClass(combineClass); opr.setConfigFile(configFile); try { keySerializer.open(opr.getOutstream()); keySerializer.serialize(splits[size - 1]); opr.setInputSplitClass(splits[size - 1].getClass()); } catch (IOException e) { logger.info("error while serializing {}", e.getMessage()); } size--; operList.add(p); } while (size > 0) { MapOperator<K1, V1, K2, V2> opr = new MapOperator<K1, V1, K2, V2>(); opr.setInputFormatClass(inputFormatClass); opr.setMapClass(mapClass); opr.setCombineClass(combineClass); opr.setConfigFile(configFile); try { keySerializer.open(opr.getOutstream()); keySerializer.serialize(splits[size - 1]); opr.setInputSplitClass(splits[size - 1].getClass()); } catch (IOException e) { logger.info("error while serializing {}", e.getMessage()); } size--; operList.add(new DefaultPartition<MapOperator<K1, V1, K2, V2>>(opr)); } try { keySerializer.close(); } catch (IOException e) { throw new RuntimeException(e); } return operList; } return null; }
From source file:com.datatorrent.demos.mroperator.MapOperatorTest.java
License:Open Source License
public void testNodeProcessingSchema(MapOperator<LongWritable, Text, Text, IntWritable> oper) throws IOException { CollectorTestSink sortSink = new CollectorTestSink(); oper.output.setSink(sortSink);// w ww.ja va 2 s . c o m oper.setMapClass(WordCount.Map.class); oper.setCombineClass(WordCount.Reduce.class); oper.setDirName("src/test/resources/mroperator/"); oper.setConfigFile(null); oper.setInputFormatClass(TextInputFormat.class); Configuration conf = new Configuration(); JobConf jobConf = new JobConf(conf); FileInputFormat.setInputPaths(jobConf, new Path("src/test/resources/mroperator/")); TextInputFormat inputFormat = new TextInputFormat(); inputFormat.configure(jobConf); InputSplit[] splits = inputFormat.getSplits(jobConf, 1); SerializationFactory serializationFactory = new SerializationFactory(conf); Serializer keySerializer = serializationFactory.getSerializer(splits[0].getClass()); keySerializer.open(oper.getOutstream()); keySerializer.serialize(splits[0]); oper.setInputSplitClass(splits[0].getClass()); keySerializer.close(); oper.setup(null); oper.beginWindow(0); oper.emitTuples(); oper.emitTuples(); oper.endWindow(); oper.beginWindow(1); oper.emitTuples(); oper.endWindow(); Assert.assertEquals("number emitted tuples", 6, sortSink.collectedTuples.size()); for (Object o : sortSink.collectedTuples) { logger.debug(o.toString()); } logger.debug("Done testing round\n"); }
From source file:com.google.appengine.tools.mapreduce.SerializationUtil.java
License:Apache License
@SuppressWarnings("unchecked") private static ByteArrayOutputStream serializeToByteArrayOutputStream(Configuration conf, Object toSerialize) { SerializationFactory serializationFactory = new SerializationFactory(conf); Serializer serializer = serializationFactory.getSerializer(toSerialize.getClass()); ByteArrayOutputStream serializationStream = new ByteArrayOutputStream(); try {/*from w w w .j a v a 2s. c o m*/ serializer.open(serializationStream); serializer.serialize(toSerialize); return serializationStream; } catch (IOException ioe) { throw new RuntimeException("Got an IOException from a ByteArrayOutputStream. This should never happen.", ioe); } }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.Aleph2MultiInputSplit.java
License:Apache License
@SuppressWarnings("unchecked") @Override// w w w . ja v a 2 s . co m public void write(DataOutput out) throws IOException { Text.writeString(out, _name); Text.writeString(out, _input_split.getName()); Text.writeString(out, _input_format.getName()); Text.writeString(out, _mapper.getName()); final SerializationFactory factory = new SerializationFactory(_conf); final Serializer serializer = factory.getSerializer(_input_split); serializer.open((DataOutputStream) out); serializer.serialize(_delegate); }