List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:com.cg.mapreduce.myfpgrowth.ParallelFPGrowthMapper.java
License:Apache License
@Override protected void map(LongWritable offset, Text input, Context context) throws IOException, InterruptedException { String[] items = splitter.split(input.toString()); List<String> record = FpGrow.sortByF1(Arrays.asList(items), fList); OpenIntHashSet groups = new OpenIntHashSet(); for (int j = record.size() - 1; j >= 0; j--) { // generate group dependent shards String item = record.get(j); int groupID = fMap.get(item) / maxPerGroup; if (!groups.contains(groupID)) { ArrayList<String> tempItems = Lists.newArrayList(); for (int i = 0; i <= j; i++) { tempItems.add(record.get(i)); }/*from w w w . j a v a 2 s . co m*/ wGroupID.set(groupID); if (tempItems.size() > 1) { // System.out.println(groupID+" "+tempItems); context.write(wGroupID, tempItems); } } groups.add(groupID); } }
From source file:com.chinamobile.bcbsp.bspcontroller.BSPController.java
License:Apache License
/** * kill staffs of the job in schedule process whose staffs haven't * schedule over//from www . ja va 2 s. co m * @param queueManager * queueManager that handle the jobs in the waitQueues * @throws IOException * exceptions haappened during handle hdfs file. */ public void killStaffInScheduleing(QueueManager queueManager) throws IOException { // String WAIT_QUEUE = "waitQueue"; // Queue<JobInProgress> waitQueue = queueManager.findQueue(WAIT_QUEUE); Collection<JobInProgress> jobsInWaitQueue = queueManager.getJobs(); if (this.haLogOperator.isExist(conf.get(Constants.BC_BSP_HA_LOG_DIR) + Constants.BC_BSP_HA_SCHEDULE_LOG)) { // FSDataInputStream in = this.haLogOperator.readFile(conf // .get(Constants.BC_BSP_HA_LOG_DIR) // + Constants.BC_BSP_HA_SCHEDULE_LOG); BSPFSDataInputStream bspin = new BSPFSDataInputStreamImpl(haLogOperator, conf.get(Constants.BC_BSP_HA_LOG_DIR) + Constants.BC_BSP_HA_SCHEDULE_LOG); if (bspin != null) { String jobid = bspin.readUTF(); bspin = null; for (JobInProgress jip : jobsInWaitQueue) { if (jip.getJobID().equals(new BSPJobID().forName(jobid))) { ArrayList<WorkerManagerStatus> wmsl = new ArrayList<WorkerManagerStatus>(); // in = this.haLogOperator.readFile(conf // .get(Constants.BC_BSP_HA_LOG_DIR) // + jip.getJobID().toString()); BSPFSDataInputStream bspIn = new BSPFSDataInputStreamImpl(haLogOperator, conf.get(Constants.BC_BSP_HA_LOG_DIR) + jip.getJobID().toString()); Text loaFactor = new Text(); loaFactor.readFields(bspIn.getIn()); while (bspIn != null) { try { WorkerManagerStatus wmStatus = new WorkerManagerStatus(); wmStatus.readFields(bspIn.getIn()); wmsl.add(wmStatus); } catch (EOFException e) { bspIn = null; } } // recovery the jobInprogress state StaffInProgress[] staffs = jip.getStaffInProgress(); for (int i = 0; i < staffs.length; i++) { if (!staffs[i].isRunning() && !staffs[i].isComplete()) { Staff t = jip.obtainNewStaff(wmsl, i, Double.parseDouble(loaFactor.toString())); WorkerManagerStatus wmss = staffs[i].getWorkerManagerStatus(); jip.updateStaffStatus(staffs[i], new StaffStatus(jip.getJobID(), staffs[i].getStaffID(), 0, StaffStatus.State.UNASSIGNED, "running", wmss.getWorkerManagerName(), StaffStatus.Phase.STARTING)); // update the WorkerManagerStatus Cache wmss.setRunningStaffsCount(wmss.getRunningStaffsCount() + 1); LOG.info("debug: kill staffs of the job in schedule process" + "whose staffs haven't schedule over"); this.updateWhiteWorkerManagersKey(wmss, wmss); LOG.info(t.getStaffAttemptId() + " is divided to the " + wmss.getWorkerManagerName()); } } this.killJob(jip); } } } } }
From source file:com.chinamobile.bcbsp.bspcontroller.BSPController.java
License:Apache License
/** * start all of the job that have already in running queue * @param queueManager//from w w w . j av a2 s . c o m * queueManager that handle the processing queue. * @throws IOException * exceptions during handle hdfs log. */ public void startAllRunningJob(QueueManager queueManager) throws IOException { String PROCESSING_QUEUE = "processingQueue"; Queue<JobInProgress> processingQueue = queueManager.findQueue(PROCESSING_QUEUE); Collection<JobInProgress> jobs = processingQueue.getJobs(); for (JobInProgress jip : jobs) { Collection<WorkerManagerStatus> wmlist = null; ArrayList<WorkerManagerStatus> wmsl = new ArrayList<WorkerManagerStatus>(); // FSDataInputStream in = this.haLogOperator.readFile(conf // .get(Constants.BC_BSP_HA_LOG_DIR) // + jip.getJobID().toString()); BSPFSDataInputStream bspin = new BSPFSDataInputStreamImpl(haLogOperator, conf.get(Constants.BC_BSP_HA_LOG_DIR) + jip.getJobID().toString()); Text loaFactor = new Text(); loaFactor.readFields(bspin.getIn()); while (bspin != null) { try { WorkerManagerStatus wmStatus = new WorkerManagerStatus(); wmStatus.readFields(bspin.getIn()); wmsl.add(wmStatus); } catch (EOFException e) { bspin = null; } } wmlist = wmsl; // LOG.info("wmlist size=" + wmsl.size()); // recovery the jobInprogress state StaffInProgress[] staffs = jip.getStaffInProgress(); for (int i = 0; i < staffs.length; i++) { if (!staffs[i].isRunning() && !staffs[i].isComplete()) { Staff t = jip.obtainNewStaff(wmlist, i, Double.parseDouble(loaFactor.toString())); WorkerManagerStatus wmss = staffs[i].getWorkerManagerStatus(); jip.updateStaffStatus(staffs[i], new StaffStatus(jip.getJobID(), staffs[i].getStaffID(), 0, StaffStatus.State.UNASSIGNED, "running", wmss.getWorkerManagerName(), StaffStatus.Phase.STARTING)); // update the WorkerManagerStatus Cache wmss.setRunningStaffsCount(wmss.getRunningStaffsCount() + 1); // LOG.info("debug: start all the running job"); this.updateWhiteWorkerManagersKey(wmss, wmss); LOG.info(t.getStaffAttemptId() + " is divided to the " + wmss.getWorkerManagerName()); } } jip.getGssc().setCurrentSuperStep(); // LOG.info("before jip.getGssc().start(); "); jip.getGssc().setCheckNumBase(); jip.getGssc().start(); } }
From source file:com.chinamobile.bcbsp.examples.simrank.SRRecordParse.java
License:Apache License
/** * This method is used to parse a record and obtain VertexID . * @param key The key of the vertex record * @return the vertex id//from ww w . j av a 2s . co m */ public Text getVertexID(Text key) { try { StringTokenizer str = new StringTokenizer(key.toString(), ":"); if (str.countTokens() != 3) { return null; } return new Text(str.nextToken()); } catch (Exception e) { return null; } }
From source file:com.chinamobile.bcbsp.io.db.TableRecordWriter.java
License:Apache License
/** * Writes a key/value pair into the table. * * @param key/*from w w w . j av a 2s . c o m*/ * The key. * @param value * The value. * @throws IOException * When writing fails. * @see com.chinamobile.bcbsp.io.RecordWriter#write(java.lang.Object, * java.lang.Object) */ @Override public void write(Text key, Text value) throws IOException { Log.info("key = " + key.toString()); Log.info("value = " + value.toString()); // String key = ""; // String value1 = ""; // LOG.info("keyValue = " +keyValue); // StringTokenizer str = new StringTokenizer(keyValue.toString(), "\t"); // if (str.hasMoreElements()) { // key = str.nextToken(); // LOG.info("key = " +key); // } // if (str.hasMoreElements()) { //// value.set(str.nextToken()); // value1 = str.nextToken(); // LOG.info("Value = " +value1); // } // Put put=new Put(key.toString().getBytes()); //// BSPHBPut put = new BSPHBPutImpl(key.toString().getBytes()); // put.add("BorderNode".getBytes(), "nodeData".getBytes(), value1.getBytes()); Put put = new Put(key.toString().getBytes()); put.add("BorderNode".getBytes(), "nodeData".getBytes(), value.toString().getBytes()); this.table.put(put); }
From source file:com.chinamobile.bcbsp.io.db.TableRecordWriter.java
License:Apache License
@Override public void write(Text keyValue) throws IOException, InterruptedException { String key = ""; String value = ""; StringTokenizer str = new StringTokenizer(keyValue.toString(), "\t"); if (str.hasMoreElements()) { key = str.nextToken();/*w w w . ja v a2 s. c om*/ } Put put = new Put(key.toString().getBytes()); if (str.hasMoreElements()) { // value.set(str.nextToken()); value = str.nextToken(); } put.add("BorderNode".getBytes(), "nodeData".getBytes(), value.toString().getBytes()); this.table.put(put); }
From source file:com.chinamobile.bcbsp.io.titan.TitanRecordWriter.java
License:Apache License
@Override public void write(Text key, Text value) throws IOException, InterruptedException { if (key == null) { return;//from w ww. jav a2s.c om } String[] vertexInfo = key.toString().split(":"); String vertexID = vertexInfo[0]; String vertexValue = vertexInfo[1]; if (value == null) { try { if (!hasVertex(vertexID)) { client.execute("g.addVertex([vertexID:'" + vertexID + "', value:'" + vertexValue + "'])"); } else { client.execute( "g.V('vertexID','" + vertexID + "').sideEffect{it.value = '" + vertexValue + "'}"); } } catch (RexProException e) { LOG.error("Can not write record to database!"); return; } return; } String[] strs = value.toString().split(" "); String[] outgoingVertexIDs = new String[strs.length]; String[] weights = new String[strs.length]; for (int i = 0; i < strs.length; i++) { String[] str = strs[i].split(":"); outgoingVertexIDs[i] = str[0]; weights[i] = str[1]; } try { if (!hasVertex(vertexID)) { client.execute("g.addVertex([vertexID:'" + vertexID + "', value:'" + vertexValue + "'])"); } else { client.execute("g.V('vertexID','" + vertexID + "').sideEffect{it.value = '" + vertexValue + "'}"); } for (int i = 0; i < outgoingVertexIDs.length; i++) { if (!hasVertex(outgoingVertexIDs[i])) { client.execute("g.addVertex([vertexID:'" + outgoingVertexIDs[i] + "', value:''])"); } /* * else { client.execute("g.V('vertexID','" + outgoingVertexIDs[i] + * "')"); } */ client.execute("g.addEdge(g.V('vertexID','" + vertexID + "').next(), g.V('vertexID','" + outgoingVertexIDs[i] + "').next(), 'outgoing', [weight:" + weights[i] + "])"); } } catch (RexProException e) { LOG.error("Can not write record to database!"); return; } }
From source file:com.chinamobile.bcbsp.io.titan.TitanRecordWriter.java
License:Apache License
@Override public void write(Text keyValue) throws IOException, InterruptedException { Text key = new Text(); Text value = new Text(); StringTokenizer str1 = new StringTokenizer(keyValue.toString(), "\t"); if (str1.hasMoreElements()) { key.set(str1.nextToken());/*w w w . j a v a 2 s .c om*/ } if (str1.hasMoreElements()) { value.set(str1.nextToken()); } if (key == new Text()) { return; } String[] vertexInfo = key.toString().split(":"); String vertexID = vertexInfo[0]; String vertexValue = vertexInfo[1]; if (value == new Text()) { try { if (!hasVertex(vertexID)) { client.execute("g.addVertex([vertexID:'" + vertexID + "', value:'" + vertexValue + "'])"); } else { client.execute( "g.V('vertexID','" + vertexID + "').sideEffect{it.value = '" + vertexValue + "'}"); } } catch (RexProException e) { LOG.error("Can not write record to database!"); return; } return; } String[] strs = value.toString().split(" "); String[] outgoingVertexIDs = new String[strs.length]; String[] weights = new String[strs.length]; for (int i = 0; i < strs.length; i++) { String[] str = strs[i].split(":"); outgoingVertexIDs[i] = str[0]; weights[i] = str[1]; } try { if (!hasVertex(vertexID)) { client.execute("g.addVertex([vertexID:'" + vertexID + "', value:'" + vertexValue + "'])"); } else { client.execute("g.V('vertexID','" + vertexID + "').sideEffect{it.value = '" + vertexValue + "'}"); } for (int i = 0; i < outgoingVertexIDs.length; i++) { if (!hasVertex(outgoingVertexIDs[i])) { client.execute("g.addVertex([vertexID:'" + outgoingVertexIDs[i] + "', value:''])"); } /* * else { client.execute("g.V('vertexID','" + outgoingVertexIDs[i] + * "')"); } */ client.execute("g.addEdge(g.V('vertexID','" + vertexID + "').next(), g.V('vertexID','" + outgoingVertexIDs[i] + "').next(), 'outgoing', [weight:" + weights[i] + "])"); } } catch (RexProException e) { LOG.error("Can not write record to database!"); return; } }
From source file:com.chinamobile.bcbsp.ml.HashMLWritePartition.java
License:Apache License
/** * This method is used to partition graph vertexes. Writing Each vertex to the * corresponding partition. In this method calls recordParse method to create * an HeadNode object. The last call partitioner's getPartitionId method to * calculate the HeadNode belongs to partition's id. If the HeadNode belongs * local partition then written to the local partition or send it to the * appropriate partition./*from w ww . j a va 2 s.c o m*/ * @param recordReader The recordreader of the split. * @throws IOException The io exception * @throws InterruptedException The Interrupted Exception */ @Override public void write(RecordReader recordReader) throws IOException, InterruptedException { int headNodeNum = 0; int local = 0; int send = 0; int lost = 0; ThreadPool tpool = new ThreadPool(this.sendThreadNum); int bufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) / (this.staff.getStaffNum() + this.sendThreadNum); byte[][] buffer = new byte[this.staff.getStaffNum()][bufferSize]; int[] bufindex = new int[this.staff.getStaffNum()]; BytesWritable kbytes = new BytesWritable(); int ksize = 0; BytesWritable vbytes = new BytesWritable(); int vsize = 0; DataOutputBuffer bb = new DataOutputBuffer(); try { this.keyserializer.open(bb); this.valueserializer.open(bb); } catch (IOException e) { throw e; } try { while (recordReader != null && recordReader.nextKeyValue()) { headNodeNum++; Text key = new Text(recordReader.getCurrentKey().toString()); Text value = new Text(recordReader.getCurrentValue().toString()); int pid = -1; if (key != null) { pid = this.partitioner.getPartitionID(key); } else { lost++; continue; } if (pid == this.staff.getPartition()) { local++; KeyValuePair pair = (KeyValuePair) this.recordParse.recordParse(key.toString(), value.toString()); if (pair == null) { lost++; continue; } staff.getGraphData().addForAll(pair); } else { send++; bb.reset(); this.keyserializer.serialize(key); kbytes.set(bb.getData(), 0, bb.getLength()); ksize = kbytes.getLength(); bb.reset(); this.valueserializer.serialize(value); vbytes.set(bb.getData(), 0, bb.getLength()); vsize = vbytes.getLength(); if ((buffer[pid].length - bufindex[pid]) > (ksize + vsize)) { System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize); bufindex[pid] += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize); bufindex[pid] += vsize; } else if (buffer[pid].length < (ksize + vsize)) { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(pid); BytesWritable data = new BytesWritable(); byte[] tmp = new byte[vsize + ksize]; System.arraycopy(kbytes.getBytes(), 0, tmp, 0, ksize); System.arraycopy(vbytes.getBytes(), 0, tmp, ksize, vsize); data.set(tmp, 0, (ksize + vsize)); t.setData(data); tmp = null; LOG.info("Using Thread is: " + t.getThreadNumber()); LOG.info("this is a super record"); t.setStatus(true); } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(pid); BytesWritable data = new BytesWritable(); data.set(buffer[pid], 0, bufindex[pid]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); bufindex[pid] = 0; // store data System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize); bufindex[pid] += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize); bufindex[pid] += vsize; } } } for (int i = 0; i < this.staff.getStaffNum(); i++) { if (bufindex[i] != 0) { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(i); BytesWritable data = new BytesWritable(); data.set(buffer[i], 0, bufindex[i]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } } tpool.cleanup(); tpool = null; buffer = null; bufindex = null; LOG.info("The number of vertices that were read from the input file: " + headNodeNum); LOG.info("The number of vertices that were put into the partition: " + local); LOG.info("The number of vertices that were sent to other partitions: " + send); LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost); } catch (IOException e) { throw e; } catch (InterruptedException e) { throw e; } }
From source file:com.chinamobile.bcbsp.ml.RecordParseML.java
License:Apache License
/** * This method is used to parse a record and obtain VertexID . * @param key The key of the vertex record * @return the vertex id/*from w w w . j av a 2 s . c om*/ */ @Override public Text getVertexID(Text key) { try { StringTokenizer str = new StringTokenizer(key.toString(), Constants.SPLIT_FLAG); if (str.countTokens() < 1) { return null; } return new Text(str.nextToken()); } catch (Exception e) { return null; } }