List of usage examples for org.apache.hadoop.io DataOutputBuffer DataOutputBuffer
public DataOutputBuffer()
From source file:com.asakusafw.dmdl.thundergate.driver.ModelInputDriverTest.java
License:Apache License
/** * all primitive types.//from w w w .ja v a2 s. c om * @throws Exception if test was failed */ @SuppressWarnings("unchecked") @Test public void primitives() throws Exception { ModelLoader loader = generateJava("primitives"); Class<?> type = loader.modelType("Primitives"); assertThat(type.isAnnotationPresent(ModelInputLocation.class), is(true)); assertThat(type.isAnnotationPresent(ModelOutputLocation.class), is(true)); ModelWrapper object = loader.newModel("Primitives"); object.set("type_boolean", true); object.set("type_byte", (byte) 64); object.set("type_short", (short) 256); object.set("type_int", 100); object.set("type_long", 200L); object.set("type_float", 300.f); object.set("type_double", 400.d); object.set("type_decimal", new BigDecimal("1234.567")); object.set("type_text", new Text("Hello, world!")); object.set("type_date", new Date(2011, 3, 31)); object.set("type_datetime", new DateTime(2011, 3, 31, 23, 30, 1)); DataOutputBuffer output = new DataOutputBuffer(); try (ModelOutput<Object> modelOut = (ModelOutput<Object>) type.getAnnotation(ModelOutputLocation.class) .value().getDeclaredConstructor(RecordEmitter.class) .newInstance(new TsvEmitter(new OutputStreamWriter(output, "UTF-8")))) { modelOut.write(object.unwrap()); modelOut.write(object.unwrap()); modelOut.write(object.unwrap()); } DataInputBuffer input = new DataInputBuffer(); input.reset(output.getData(), output.getLength()); try (ModelInput<Object> modelIn = (ModelInput<Object>) type.getAnnotation(ModelInputLocation.class).value() .getDeclaredConstructor(RecordParser.class) .newInstance(new TsvParser(new InputStreamReader(input, "UTF-8")))) { ModelWrapper copy = loader.newModel("Primitives"); modelIn.readTo(copy.unwrap()); assertThat(object.unwrap(), equalTo(copy.unwrap())); assertThat(input.read(), is(-1)); } }
From source file:com.asakusafw.runtime.io.util.WritableTestRoot.java
License:Apache License
static byte[] ser(Writable writable) throws IOException { DataOutputBuffer out = new DataOutputBuffer(); writable.write(out);/*from www .ja v a 2 s . c o m*/ byte[] results = Arrays.copyOfRange(out.getData(), 0, out.getLength()); return results; }
From source file:com.asakusafw.runtime.io.util.WritableTestRoot.java
License:Apache License
static byte[] ser(WritableRawComparable writable) throws IOException { DataOutputBuffer out = new DataOutputBuffer(); writable.write(out);//w ww. ja v a 2s . c o m assertThat(writable.getSizeInBytes(out.getData(), 0), is(out.getLength())); byte[] results = Arrays.copyOfRange(out.getData(), 0, out.getLength()); return results; }
From source file:com.asakusafw.runtime.stage.collector.SortableSlotTest.java
License:Apache License
static byte[] write(Writable writable) { DataOutputBuffer buffer = new DataOutputBuffer(); buffer.reset();/*from w w w .j a v a 2s. c om*/ try { writable.write(buffer); } catch (IOException e) { throw new AssertionError(e); } return Arrays.copyOf(buffer.getData(), buffer.getLength()); }
From source file:com.bigjob.ApplicationMaster.java
License:Apache License
/** * Main run function for the application master * * @throws YarnException//www . j av a 2 s. c o m * @throws IOException */ @SuppressWarnings({ "unchecked" }) public boolean run() throws YarnException, IOException { LOG.info("Starting ApplicationMaster"); Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); // Now remove the AM->RM token so that containers cannot access it. Iterator<Token<?>> iter = credentials.getAllTokens().iterator(); while (iter.hasNext()) { Token<?> token = iter.next(); if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) { iter.remove(); } } allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler(); amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener); amRMClient.init(conf); amRMClient.start(); containerListener = createNMCallbackHandler(); nmClientAsync = new NMClientAsyncImpl(containerListener); nmClientAsync.init(conf); nmClientAsync.start(); // Setup local RPC Server to accept status requests directly from clients // TODO need to setup a protocol for client to be able to communicate to // the RPC server // TODO use the rpc port info to register with the RM for the client to // send requests to this app master // Register self with ResourceManager // This will start heartbeating to the RM appMasterHostname = NetUtils.getHostname(); RegisterApplicationMasterResponse response = amRMClient.registerApplicationMaster(appMasterHostname, appMasterRpcPort, appMasterTrackingUrl); // Dump out information about cluster capability as seen by the // resource manager int maxMem = response.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); int maxVCores = response.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max vcores capabililty of resources in this cluster " + maxVCores); // A resource ask cannot exceed the max. if (containerMemory > maxMem) { LOG.info("Container memory specified above max threshold of cluster." + " Using max value." + ", specified=" + containerMemory + ", max=" + maxMem); containerMemory = maxMem; } if (containerVirtualCores > maxVCores) { LOG.info("Container virtual cores specified above max threshold of cluster." + " Using max value." + ", specified=" + containerVirtualCores + ", max=" + maxVCores); containerVirtualCores = maxVCores; } // Setup ask for containers from RM // Send request for containers to RM // Until we get our fully allocated quota, we keep on polling RM for // containers // Keep looping until all the containers are launched and shell script // executed on them ( regardless of success/failure). for (int i = 0; i < numTotalContainers; ++i) { ContainerRequest containerAsk = setupContainerAskForRM(); amRMClient.addContainerRequest(containerAsk); } numRequestedContainers.set(numTotalContainers); while (!done && (numCompletedContainers.get() != numTotalContainers)) { try { Thread.sleep(200); } catch (InterruptedException ex) { } } finish(); return success; }
From source file:com.bigjob.Client.java
License:Apache License
/** * Main run function for the client//from w ww . j av a 2s . com * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM (RM)" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setApplicationName(appName); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path // if (dfsUrl!=null && dfsUrl.equals("")==false){ // conf.set("fs.defaultFS", dfsUrl); // } FileSystem fs = FileSystem.get(conf); addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.getId(), localResources, null); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { addToLocalResources(fs, log4jPropFile, log4jPath, appId.getId(), localResources, null); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.getId() + "/" + (Shell.WINDOWS ? windowBatPath : linuxShellPath); Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } if (!shellCommand.isEmpty()) { addToLocalResources(fs, null, shellCommandPath, appId.getId(), localResources, shellCommand); } if (shellArgs.length > 0) { addToLocalResources(fs, null, shellArgsPath, appId.getId(), localResources, StringUtils.join(shellArgs, " ")); } // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar) .append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); amContainer.setEnvironment(env); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); vargs.add("--num_containers " + String.valueOf(numContainers)); vargs.add("--priority " + String.valueOf(shellCmdPriority)); for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); capability.setVirtualCores(amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); // TODO - what is the range for priority? how to decide? pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? // Monitor the application //return monitorApplication(appId); System.out.println("ApplicationId:" + appId); return true; }
From source file:com.chinamobile.bcbsp.client.BSPJobClient.java
License:Apache License
/** * Write splits./*from w w w.j av a 2 s. c o m*/ * @param job BSPJob * @param submitSplitFile Path * @param <T> org.apache.hadoop.mapreduce.InputSplit * @return splitNum the count of split */ @SuppressWarnings("unchecked") private <T extends org.apache.hadoop.mapreduce.InputSplit> int writeSplits(BSPJob job, Path submitSplitFile) throws IOException, InterruptedException, ClassNotFoundException { Configuration confs = job.getConf(); com.chinamobile.bcbsp.io.InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), confs); input.initialize(job.getConf()); List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(job); int maxSplits = job.getNumPartition(); int splitNum = splits.size(); double factor = splitNum / (float) maxSplits; if (factor > 1.0) { job.setInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, (int) Math.ceil(factor)); LOG.info("[Split Adjust Factor] " + (int) Math.ceil(factor)); LOG.info("[Partition Num] " + maxSplits); splits = input.getSplits(job); splitNum = splits.size(); } T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]); // sort the splits into order based on size, so that the biggest // go first Arrays.sort(array, new NewSplitComparator()); DataOutputStream out = writeSplitsFileHeader(confs, submitSplitFile, array.length); try { if (array.length != 0) { DataOutputBuffer buffer = new DataOutputBuffer(); RawSplit rawSplit = new RawSplit(); SerializationFactory factory = new SerializationFactory(confs); Serializer<T> serializer = factory.getSerializer((Class<T>) array[0].getClass()); serializer.open(buffer); for (T split : array) { rawSplit.setClassName(split.getClass().getName()); buffer.reset(); serializer.serialize(split); rawSplit.setDataLength(split.getLength()); rawSplit.setBytes(buffer.getData(), 0, buffer.getLength()); rawSplit.setLocations(split.getLocations()); rawSplit.write(out); } serializer.close(); } } finally { out.close(); } return splitNum; }
From source file:com.chinamobile.bcbsp.ml.HashMLWritePartition.java
License:Apache License
/** * This method is used to partition graph vertexes. Writing Each vertex to the * corresponding partition. In this method calls recordParse method to create * an HeadNode object. The last call partitioner's getPartitionId method to * calculate the HeadNode belongs to partition's id. If the HeadNode belongs * local partition then written to the local partition or send it to the * appropriate partition./*from www .j av a 2s . co m*/ * @param recordReader The recordreader of the split. * @throws IOException The io exception * @throws InterruptedException The Interrupted Exception */ @Override public void write(RecordReader recordReader) throws IOException, InterruptedException { int headNodeNum = 0; int local = 0; int send = 0; int lost = 0; ThreadPool tpool = new ThreadPool(this.sendThreadNum); int bufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) / (this.staff.getStaffNum() + this.sendThreadNum); byte[][] buffer = new byte[this.staff.getStaffNum()][bufferSize]; int[] bufindex = new int[this.staff.getStaffNum()]; BytesWritable kbytes = new BytesWritable(); int ksize = 0; BytesWritable vbytes = new BytesWritable(); int vsize = 0; DataOutputBuffer bb = new DataOutputBuffer(); try { this.keyserializer.open(bb); this.valueserializer.open(bb); } catch (IOException e) { throw e; } try { while (recordReader != null && recordReader.nextKeyValue()) { headNodeNum++; Text key = new Text(recordReader.getCurrentKey().toString()); Text value = new Text(recordReader.getCurrentValue().toString()); int pid = -1; if (key != null) { pid = this.partitioner.getPartitionID(key); } else { lost++; continue; } if (pid == this.staff.getPartition()) { local++; KeyValuePair pair = (KeyValuePair) this.recordParse.recordParse(key.toString(), value.toString()); if (pair == null) { lost++; continue; } staff.getGraphData().addForAll(pair); } else { send++; bb.reset(); this.keyserializer.serialize(key); kbytes.set(bb.getData(), 0, bb.getLength()); ksize = kbytes.getLength(); bb.reset(); this.valueserializer.serialize(value); vbytes.set(bb.getData(), 0, bb.getLength()); vsize = vbytes.getLength(); if ((buffer[pid].length - bufindex[pid]) > (ksize + vsize)) { System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize); bufindex[pid] += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize); bufindex[pid] += vsize; } else if (buffer[pid].length < (ksize + vsize)) { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(pid); BytesWritable data = new BytesWritable(); byte[] tmp = new byte[vsize + ksize]; System.arraycopy(kbytes.getBytes(), 0, tmp, 0, ksize); System.arraycopy(vbytes.getBytes(), 0, tmp, ksize, vsize); data.set(tmp, 0, (ksize + vsize)); t.setData(data); tmp = null; LOG.info("Using Thread is: " + t.getThreadNumber()); LOG.info("this is a super record"); t.setStatus(true); } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(pid); BytesWritable data = new BytesWritable(); data.set(buffer[pid], 0, bufindex[pid]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); bufindex[pid] = 0; // store data System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize); bufindex[pid] += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize); bufindex[pid] += vsize; } } } for (int i = 0; i < this.staff.getStaffNum(); i++) { if (bufindex[i] != 0) { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(i); BytesWritable data = new BytesWritable(); data.set(buffer[i], 0, bufindex[i]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } } tpool.cleanup(); tpool = null; buffer = null; bufindex = null; LOG.info("The number of vertices that were read from the input file: " + headNodeNum); LOG.info("The number of vertices that were put into the partition: " + local); LOG.info("The number of vertices that were sent to other partitions: " + send); LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost); } catch (IOException e) { throw e; } catch (InterruptedException e) { throw e; } }
From source file:com.chinamobile.bcbsp.partition.HashWithBalancerWritePartition.java
License:Apache License
/** * This method is used to partition graph vertexes. Writing Each vertex to the * corresponding partition. In this method calls recordParse method to create * an HeadNode object. The last call partitioner's getPartitionId method to * calculate the HeadNode belongs to partition's id. If the HeadNode belongs * local partition then written to the local partition or send it to the * appropriate partition.//from www. ja v a 2 s . c o m * @param recordReader The recordreader of the split. * @throws IOException The io exception * @throws InterruptedException The Interrupted Exception */ @Override public void write(RecordReader recordReader) throws IOException, InterruptedException { int headNodeNum = 0; int local = 0; int send = 0; int lost = 0; ThreadPool tpool = new ThreadPool(this.sendThreadNum); int staffNum = this.staff.getStaffNum(); BytesWritable kbytes = new BytesWritable(); int ksize = 0; BytesWritable vbytes = new BytesWritable(); int vsize = 0; DataOutputBuffer bb = new DataOutputBuffer(); int bufferSize = (int) ((this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) * PART); int dataBufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) / (this.staff.getStaffNum() + this.sendThreadNum); byte[] buffer = new byte[bufferSize]; int bufindex = 0; SerializationFactory sFactory = new SerializationFactory(new Configuration()); Serializer<IntWritable> psserializer = sFactory.getSerializer(IntWritable.class); byte[] pidandsize = new byte[TIME * CONTAINERNUMBER * CONTAINERNUMBER]; int psindex = 0; BytesWritable pidbytes = new BytesWritable(); int psize = 0; BytesWritable sizebytes = new BytesWritable(); int ssize = 0; try { this.keyserializer.open(bb); this.valueserializer.open(bb); psserializer.open(bb); } catch (IOException e) { throw e; } String path = "/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID(); File dir = new File("/tmp/bcbsp/" + this.staff.getJobID()); dir.mkdir(); dir = new File("/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID()); dir.mkdir(); ArrayList<File> files = new ArrayList<File>(); try { File file = new File(path + "/" + "data" + ".txt"); files.add(file); DataOutputStream dataWriter = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(path + "/" + "data" + ".txt", true))); DataInputStream dataReader = new DataInputStream( new BufferedInputStream(new FileInputStream(path + "/" + "data" + ".txt"))); File filet = new File(path + "/" + "pidandsize" + ".txt"); files.add(filet); DataOutputStream psWriter = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(path + "/" + "pidandsize" + ".txt", true))); DataInputStream psReader = new DataInputStream( new BufferedInputStream(new FileInputStream(path + "/" + "pidandsize" + ".txt"))); while (recordReader != null && recordReader.nextKeyValue()) { headNodeNum++; Text key = new Text(recordReader.getCurrentKey().toString()); Text value = new Text(recordReader.getCurrentValue().toString()); int pid = -1; Text vertexID = this.recordParse.getVertexID(key); if (vertexID != null) { pid = this.partitioner.getPartitionID(vertexID); } else { lost++; continue; } if (this.counter.containsKey(pid)) { this.counter.put(pid, (this.counter.get(pid) + 1)); } else { this.counter.put(pid, 1); } bb.reset(); this.keyserializer.serialize(key); kbytes.set(bb.getData(), 0, bb.getLength()); ksize = kbytes.getLength(); bb.reset(); this.valueserializer.serialize(value); vbytes.set(bb.getData(), 0, bb.getLength()); vsize = vbytes.getLength(); bb.reset(); psserializer.serialize(new IntWritable(ksize + vsize)); sizebytes.set(bb.getData(), 0, bb.getLength()); ssize = sizebytes.getLength(); bb.reset(); psserializer.serialize(new IntWritable(pid)); pidbytes.set(bb.getData(), 0, bb.getLength()); psize = pidbytes.getLength(); if ((pidandsize.length - psindex) > (ssize + psize)) { System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize); psindex += ssize; System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize); psindex += psize; } else { psWriter.write(pidandsize, 0, psindex); psindex = 0; System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize); psindex += ssize; System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize); psindex += psize; } if ((buffer.length - bufindex) > (ksize + vsize)) { System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize); bufindex += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize); bufindex += vsize; } else if (buffer.length < (ksize + vsize)) { dataWriter.write(buffer, 0, bufindex); bufindex = 0; LOG.info("This is a super record"); dataWriter.write(kbytes.getBytes(), 0, ksize); dataWriter.write(vbytes.getBytes(), 0, vsize); } else { dataWriter.write(buffer, 0, bufindex); bufindex = 0; System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize); bufindex += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize); bufindex += vsize; } } if (psindex != 0) { psWriter.write(pidandsize, 0, psindex); } if (bufindex != 0) { dataWriter.write(buffer, 0, bufindex); bufindex = 0; } dataWriter.close(); dataWriter = null; psWriter.close(); psWriter = null; buffer = null; pidandsize = null; this.ssrc.setDirFlag(new String[] { "3" }); this.ssrc.setCounter(this.counter); HashMap<Integer, Integer> hashBucketToPartition = this.sssc.loadDataInBalancerBarrier(ssrc, Constants.PARTITION_TYPE.HASH); this.staff.setHashBucketToPartition(hashBucketToPartition); byte[][] databuf = new byte[staffNum][dataBufferSize]; int[] databufindex = new int[staffNum]; try { IntWritable pid = new IntWritable(); IntWritable size = new IntWritable(); int belongPid = 0; while (true) { size.readFields(psReader); pid.readFields(psReader); belongPid = hashBucketToPartition.get(pid.get()); if (belongPid != this.staff.getPartition()) { send++; } else { local++; } if ((databuf[belongPid].length - databufindex[belongPid]) > size.get()) { dataReader.read(databuf[belongPid], databufindex[belongPid], size.get()); databufindex[belongPid] += size.get(); } else if (databuf[belongPid].length < size.get()) { LOG.info("This is a super record"); byte[] tmp = new byte[size.get()]; dataReader.read(tmp, 0, size.get()); if (belongPid == this.staff.getPartition()) { DataInputStream reader = new DataInputStream( new BufferedInputStream(new ByteArrayInputStream(tmp))); try { boolean stop = true; while (stop) { Text key = new Text(); key.readFields(reader); Text value = new Text(); value.readFields(reader); if (key.getLength() > 0 && value.getLength() > 0) { Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } this.staff.getGraphData().addForAll(vertex); } else { stop = false; } } } catch (IOException e) { LOG.info("IO exception: " + e.getStackTrace()); } } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker( this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(belongPid); BytesWritable data = new BytesWritable(); data.set(tmp, 0, size.get()); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } tmp = null; } else { if (belongPid == this.staff.getPartition()) { DataInputStream reader = new DataInputStream(new BufferedInputStream( new ByteArrayInputStream(databuf[belongPid], 0, databufindex[belongPid]))); try { boolean stop = true; while (stop) { Text key = new Text(); key.readFields(reader); Text value = new Text(); value.readFields(reader); if (key.getLength() > 0 && value.getLength() > 0) { Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } this.staff.getGraphData().addForAll(vertex); } else { stop = false; } } } catch (IOException e) { LOG.info("IO exception: " + e.getStackTrace()); } } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker( this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(belongPid); BytesWritable data = new BytesWritable(); data.set(databuf[belongPid], 0, databufindex[belongPid]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } databufindex[belongPid] = 0; dataReader.read(databuf[belongPid], databufindex[belongPid], size.get()); databufindex[belongPid] += size.get(); } } } catch (EOFException ex) { LOG.error("[write]", ex); } for (int i = 0; i < staffNum; i++) { if (databufindex[i] != 0) { if (i == this.staff.getPartition()) { DataInputStream reader = new DataInputStream( new BufferedInputStream(new ByteArrayInputStream(databuf[i], 0, databufindex[i]))); try { boolean stop = true; while (stop) { Text key = new Text(); key.readFields(reader); Text value = new Text(); value.readFields(reader); if (key.getLength() > 0 && value.getLength() > 0) { Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } this.staff.getGraphData().addForAll(vertex); } else { stop = false; } } } catch (IOException e) { LOG.info("IO exception: " + e.getStackTrace()); } } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(i); BytesWritable data = new BytesWritable(); data.set(databuf[i], 0, databufindex[i]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } } } dataReader.close(); dataReader = null; psReader.close(); psReader = null; for (File f : files) { f.delete(); } dir.delete(); dir = new File(path.substring(0, path.lastIndexOf('/'))); dir.delete(); tpool.cleanup(); tpool = null; databuf = null; databufindex = null; this.counter = null; LOG.info("The number of vertices that were read from the input file: " + headNodeNum); LOG.info("The number of vertices that were put into the partition: " + local); LOG.info("The number of vertices that were sent to other partitions: " + send); LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost); } catch (IOException e) { throw e; } catch (InterruptedException e) { throw e; } finally { for (File f : files) { f.delete(); } dir.delete(); dir = new File(path.substring(0, path.lastIndexOf('/'))); dir.delete(); } }
From source file:com.chinamobile.bcbsp.partition.HashWritePartition.java
License:Apache License
/** * This method is used to partition graph vertexes. Writing Each vertex to the * corresponding partition. In this method calls recordParse method to create * an HeadNode object. The last call partitioner's getPartitionId method to * calculate the HeadNode belongs to partition's id. If the HeadNode belongs * local partition then written to the local partition or send it to the * appropriate partition./*from ww w. j a v a 2 s . co m*/ * @param recordReader The recordreader of the split. * @throws IOException The io exception * @throws InterruptedException The Interrupted Exception */ @Override public void write(RecordReader recordReader) throws IOException, InterruptedException { int headNodeNum = 0; int local = 0; int send = 0; int lost = 0; ThreadPool tpool = new ThreadPool(this.sendThreadNum); int bufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) / (this.staff.getStaffNum() + this.sendThreadNum); byte[][] buffer = new byte[this.staff.getStaffNum()][bufferSize]; int[] bufindex = new int[this.staff.getStaffNum()]; BytesWritable kbytes = new BytesWritable(); int ksize = 0; BytesWritable vbytes = new BytesWritable(); int vsize = 0; DataOutputBuffer bb = new DataOutputBuffer(); try { this.keyserializer.open(bb); this.valueserializer.open(bb); } catch (IOException e) { throw e; } try { while (recordReader != null && recordReader.nextKeyValue()) { headNodeNum++; Text key = new Text(recordReader.getCurrentKey().toString()); Text value = new Text(recordReader.getCurrentValue().toString()); int pid = -1; Text vertexID = this.recordParse.getVertexID(key); if (vertexID != null) { pid = this.partitioner.getPartitionID(vertexID); } else { lost++; continue; } if (pid == this.staff.getPartition()) { local++; Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } staff.getGraphData().addForAll(vertex); } else { send++; bb.reset(); this.keyserializer.serialize(key); kbytes.set(bb.getData(), 0, bb.getLength()); ksize = kbytes.getLength(); bb.reset(); this.valueserializer.serialize(value); vbytes.set(bb.getData(), 0, bb.getLength()); vsize = vbytes.getLength(); if ((buffer[pid].length - bufindex[pid]) > (ksize + vsize)) { System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize); bufindex[pid] += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize); bufindex[pid] += vsize; } else if (buffer[pid].length < (ksize + vsize)) { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(pid); BytesWritable data = new BytesWritable(); byte[] tmp = new byte[vsize + ksize]; System.arraycopy(kbytes.getBytes(), 0, tmp, 0, ksize); System.arraycopy(vbytes.getBytes(), 0, tmp, ksize, vsize); data.set(tmp, 0, (ksize + vsize)); t.setData(data); tmp = null; LOG.info("Using Thread is: " + t.getThreadNumber()); LOG.info("this is a super record"); t.setStatus(true); } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(pid); BytesWritable data = new BytesWritable(); data.set(buffer[pid], 0, bufindex[pid]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); bufindex[pid] = 0; // store data System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize); bufindex[pid] += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize); bufindex[pid] += vsize; } } } for (int i = 0; i < this.staff.getStaffNum(); i++) { if (bufindex[i] != 0) { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(i); BytesWritable data = new BytesWritable(); data.set(buffer[i], 0, bufindex[i]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } } tpool.cleanup(); tpool = null; buffer = null; bufindex = null; LOG.info("The number of vertices that were read from the input file: " + headNodeNum); LOG.info("The number of vertices that were put into the partition: " + local); LOG.info("The number of vertices that were sent to other partitions: " + send); LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost); } catch (IOException e) { throw e; } catch (InterruptedException e) { throw e; } }