Example usage for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString()

Source Link

Document

Convert text back to string

Usage

From source file:com.cg.mapreduce.myfpgrowth.ParallelFPGrowthMapper.java

License:Apache License

@Override
protected void map(LongWritable offset, Text input, Context context) throws IOException, InterruptedException {

    String[] items = splitter.split(input.toString());
    List<String> record = FpGrow.sortByF1(Arrays.asList(items), fList);
    OpenIntHashSet groups = new OpenIntHashSet();
    for (int j = record.size() - 1; j >= 0; j--) {
        // generate group dependent shards
        String item = record.get(j);
        int groupID = fMap.get(item) / maxPerGroup;
        if (!groups.contains(groupID)) {
            ArrayList<String> tempItems = Lists.newArrayList();
            for (int i = 0; i <= j; i++) {
                tempItems.add(record.get(i));
            }/*from   w w  w . j  a v a 2  s  .  co m*/
            wGroupID.set(groupID);
            if (tempItems.size() > 1) {
                // System.out.println(groupID+"   "+tempItems);
                context.write(wGroupID, tempItems);
            }
        }
        groups.add(groupID);
    }

}

From source file:com.chinamobile.bcbsp.bspcontroller.BSPController.java

License:Apache License

/**
 * kill staffs of the job in schedule process whose staffs haven't
 * schedule over//from   www  . ja va 2  s.  co m
 * @param queueManager
 *        queueManager that handle the jobs in the waitQueues
 * @throws IOException
 *         exceptions haappened during handle hdfs file.
 */
public void killStaffInScheduleing(QueueManager queueManager) throws IOException {
    // String WAIT_QUEUE = "waitQueue";
    // Queue<JobInProgress> waitQueue = queueManager.findQueue(WAIT_QUEUE);
    Collection<JobInProgress> jobsInWaitQueue = queueManager.getJobs();
    if (this.haLogOperator.isExist(conf.get(Constants.BC_BSP_HA_LOG_DIR) + Constants.BC_BSP_HA_SCHEDULE_LOG)) {
        // FSDataInputStream in = this.haLogOperator.readFile(conf
        // .get(Constants.BC_BSP_HA_LOG_DIR)
        // + Constants.BC_BSP_HA_SCHEDULE_LOG);
        BSPFSDataInputStream bspin = new BSPFSDataInputStreamImpl(haLogOperator,
                conf.get(Constants.BC_BSP_HA_LOG_DIR) + Constants.BC_BSP_HA_SCHEDULE_LOG);
        if (bspin != null) {
            String jobid = bspin.readUTF();
            bspin = null;
            for (JobInProgress jip : jobsInWaitQueue) {
                if (jip.getJobID().equals(new BSPJobID().forName(jobid))) {
                    ArrayList<WorkerManagerStatus> wmsl = new ArrayList<WorkerManagerStatus>();
                    // in = this.haLogOperator.readFile(conf
                    // .get(Constants.BC_BSP_HA_LOG_DIR)
                    // + jip.getJobID().toString());
                    BSPFSDataInputStream bspIn = new BSPFSDataInputStreamImpl(haLogOperator,
                            conf.get(Constants.BC_BSP_HA_LOG_DIR) + jip.getJobID().toString());
                    Text loaFactor = new Text();
                    loaFactor.readFields(bspIn.getIn());
                    while (bspIn != null) {
                        try {
                            WorkerManagerStatus wmStatus = new WorkerManagerStatus();
                            wmStatus.readFields(bspIn.getIn());
                            wmsl.add(wmStatus);
                        } catch (EOFException e) {
                            bspIn = null;
                        }
                    }
                    // recovery the jobInprogress state
                    StaffInProgress[] staffs = jip.getStaffInProgress();
                    for (int i = 0; i < staffs.length; i++) {
                        if (!staffs[i].isRunning() && !staffs[i].isComplete()) {
                            Staff t = jip.obtainNewStaff(wmsl, i, Double.parseDouble(loaFactor.toString()));
                            WorkerManagerStatus wmss = staffs[i].getWorkerManagerStatus();
                            jip.updateStaffStatus(staffs[i],
                                    new StaffStatus(jip.getJobID(), staffs[i].getStaffID(), 0,
                                            StaffStatus.State.UNASSIGNED, "running",
                                            wmss.getWorkerManagerName(), StaffStatus.Phase.STARTING));
                            // update the WorkerManagerStatus Cache
                            wmss.setRunningStaffsCount(wmss.getRunningStaffsCount() + 1);
                            LOG.info("debug: kill staffs of the job in schedule process"
                                    + "whose staffs haven't schedule over");
                            this.updateWhiteWorkerManagersKey(wmss, wmss);
                            LOG.info(t.getStaffAttemptId() + " is divided to the "
                                    + wmss.getWorkerManagerName());
                        }
                    }
                    this.killJob(jip);
                }
            }
        }
    }
}

From source file:com.chinamobile.bcbsp.bspcontroller.BSPController.java

License:Apache License

/**
 * start all of the job that have already in running queue
 * @param queueManager//from  w  w w . j av a2  s  .  c  o  m
 *        queueManager that handle the processing queue.
 * @throws IOException
 *         exceptions during handle hdfs log.
 */
public void startAllRunningJob(QueueManager queueManager) throws IOException {
    String PROCESSING_QUEUE = "processingQueue";
    Queue<JobInProgress> processingQueue = queueManager.findQueue(PROCESSING_QUEUE);
    Collection<JobInProgress> jobs = processingQueue.getJobs();
    for (JobInProgress jip : jobs) {
        Collection<WorkerManagerStatus> wmlist = null;
        ArrayList<WorkerManagerStatus> wmsl = new ArrayList<WorkerManagerStatus>();
        // FSDataInputStream in = this.haLogOperator.readFile(conf
        // .get(Constants.BC_BSP_HA_LOG_DIR)
        // + jip.getJobID().toString());
        BSPFSDataInputStream bspin = new BSPFSDataInputStreamImpl(haLogOperator,
                conf.get(Constants.BC_BSP_HA_LOG_DIR) + jip.getJobID().toString());
        Text loaFactor = new Text();
        loaFactor.readFields(bspin.getIn());
        while (bspin != null) {
            try {
                WorkerManagerStatus wmStatus = new WorkerManagerStatus();
                wmStatus.readFields(bspin.getIn());
                wmsl.add(wmStatus);
            } catch (EOFException e) {
                bspin = null;
            }
        }
        wmlist = wmsl;
        // LOG.info("wmlist size=" + wmsl.size());
        // recovery the jobInprogress state
        StaffInProgress[] staffs = jip.getStaffInProgress();
        for (int i = 0; i < staffs.length; i++) {
            if (!staffs[i].isRunning() && !staffs[i].isComplete()) {
                Staff t = jip.obtainNewStaff(wmlist, i, Double.parseDouble(loaFactor.toString()));
                WorkerManagerStatus wmss = staffs[i].getWorkerManagerStatus();
                jip.updateStaffStatus(staffs[i],
                        new StaffStatus(jip.getJobID(), staffs[i].getStaffID(), 0, StaffStatus.State.UNASSIGNED,
                                "running", wmss.getWorkerManagerName(), StaffStatus.Phase.STARTING));
                // update the WorkerManagerStatus Cache
                wmss.setRunningStaffsCount(wmss.getRunningStaffsCount() + 1);
                // LOG.info("debug: start all the running job");
                this.updateWhiteWorkerManagersKey(wmss, wmss);
                LOG.info(t.getStaffAttemptId() + " is divided to the " + wmss.getWorkerManagerName());
            }
        }
        jip.getGssc().setCurrentSuperStep();
        // LOG.info("before jip.getGssc().start(); ");
        jip.getGssc().setCheckNumBase();
        jip.getGssc().start();
    }
}

From source file:com.chinamobile.bcbsp.examples.simrank.SRRecordParse.java

License:Apache License

/**
 * This method is used to parse a record and obtain VertexID .
 * @param key The key of the vertex record
 * @return the vertex id//from ww  w  .  j  av a  2s  . co m
 */
public Text getVertexID(Text key) {
    try {
        StringTokenizer str = new StringTokenizer(key.toString(), ":");
        if (str.countTokens() != 3) {
            return null;
        }
        return new Text(str.nextToken());
    } catch (Exception e) {
        return null;
    }
}

From source file:com.chinamobile.bcbsp.io.db.TableRecordWriter.java

License:Apache License

/**
 * Writes a key/value pair into the table.
 *
 * @param key/*from w  w  w  . j  av a  2s  . c o  m*/
 *        The key.
 * @param value
 *        The value.
 * @throws IOException
 *         When writing fails.
 * @see com.chinamobile.bcbsp.io.RecordWriter#write(java.lang.Object,
 *      java.lang.Object)
 */
@Override
public void write(Text key, Text value) throws IOException {
    Log.info("key = " + key.toString());
    Log.info("value = " + value.toString());
    //    String key = "";
    //    String value1 = "";
    //    LOG.info("keyValue = " +keyValue);
    //    StringTokenizer str = new StringTokenizer(keyValue.toString(), "\t");
    //    if (str.hasMoreElements()) {
    //      key = str.nextToken();
    //      LOG.info("key = " +key);
    //    }
    //    if (str.hasMoreElements()) {
    ////      value.set(str.nextToken());
    //      value1 = str.nextToken();
    //      LOG.info("Value = " +value1);
    //    }
    //    Put put=new Put(key.toString().getBytes());
    ////    BSPHBPut put = new BSPHBPutImpl(key.toString().getBytes());
    //    put.add("BorderNode".getBytes(), "nodeData".getBytes(), value1.getBytes());
    Put put = new Put(key.toString().getBytes());
    put.add("BorderNode".getBytes(), "nodeData".getBytes(), value.toString().getBytes());
    this.table.put(put);
}

From source file:com.chinamobile.bcbsp.io.db.TableRecordWriter.java

License:Apache License

@Override
public void write(Text keyValue) throws IOException, InterruptedException {
    String key = "";
    String value = "";
    StringTokenizer str = new StringTokenizer(keyValue.toString(), "\t");
    if (str.hasMoreElements()) {
        key = str.nextToken();/*w  w  w  . ja v  a2 s.  c  om*/
    }
    Put put = new Put(key.toString().getBytes());
    if (str.hasMoreElements()) {
        //      value.set(str.nextToken());
        value = str.nextToken();
    }
    put.add("BorderNode".getBytes(), "nodeData".getBytes(), value.toString().getBytes());
    this.table.put(put);
}

From source file:com.chinamobile.bcbsp.io.titan.TitanRecordWriter.java

License:Apache License

@Override
public void write(Text key, Text value) throws IOException, InterruptedException {
    if (key == null) {
        return;//from w  ww.  jav a2s.c  om
    }
    String[] vertexInfo = key.toString().split(":");
    String vertexID = vertexInfo[0];
    String vertexValue = vertexInfo[1];
    if (value == null) {
        try {
            if (!hasVertex(vertexID)) {
                client.execute("g.addVertex([vertexID:'" + vertexID + "', value:'" + vertexValue + "'])");
            } else {
                client.execute(
                        "g.V('vertexID','" + vertexID + "').sideEffect{it.value = '" + vertexValue + "'}");
            }
        } catch (RexProException e) {
            LOG.error("Can not write record to database!");
            return;
        }
        return;
    }
    String[] strs = value.toString().split(" ");
    String[] outgoingVertexIDs = new String[strs.length];
    String[] weights = new String[strs.length];
    for (int i = 0; i < strs.length; i++) {
        String[] str = strs[i].split(":");
        outgoingVertexIDs[i] = str[0];
        weights[i] = str[1];
    }
    try {
        if (!hasVertex(vertexID)) {
            client.execute("g.addVertex([vertexID:'" + vertexID + "', value:'" + vertexValue + "'])");
        } else {
            client.execute("g.V('vertexID','" + vertexID + "').sideEffect{it.value = '" + vertexValue + "'}");
        }
        for (int i = 0; i < outgoingVertexIDs.length; i++) {
            if (!hasVertex(outgoingVertexIDs[i])) {
                client.execute("g.addVertex([vertexID:'" + outgoingVertexIDs[i] + "', value:''])");
            } /*
               * else { client.execute("g.V('vertexID','" + outgoingVertexIDs[i] +
               * "')"); }
               */
            client.execute("g.addEdge(g.V('vertexID','" + vertexID + "').next(), g.V('vertexID','"
                    + outgoingVertexIDs[i] + "').next(), 'outgoing', [weight:" + weights[i] + "])");
        }
    } catch (RexProException e) {
        LOG.error("Can not write record to database!");
        return;
    }
}

From source file:com.chinamobile.bcbsp.io.titan.TitanRecordWriter.java

License:Apache License

@Override
public void write(Text keyValue) throws IOException, InterruptedException {
    Text key = new Text();
    Text value = new Text();
    StringTokenizer str1 = new StringTokenizer(keyValue.toString(), "\t");
    if (str1.hasMoreElements()) {
        key.set(str1.nextToken());/*w  w  w  .  j a v  a 2 s .c  om*/
    }
    if (str1.hasMoreElements()) {
        value.set(str1.nextToken());
    }
    if (key == new Text()) {
        return;
    }
    String[] vertexInfo = key.toString().split(":");
    String vertexID = vertexInfo[0];
    String vertexValue = vertexInfo[1];
    if (value == new Text()) {
        try {
            if (!hasVertex(vertexID)) {
                client.execute("g.addVertex([vertexID:'" + vertexID + "', value:'" + vertexValue + "'])");
            } else {
                client.execute(
                        "g.V('vertexID','" + vertexID + "').sideEffect{it.value = '" + vertexValue + "'}");
            }
        } catch (RexProException e) {
            LOG.error("Can not write record to database!");
            return;
        }
        return;
    }
    String[] strs = value.toString().split(" ");
    String[] outgoingVertexIDs = new String[strs.length];
    String[] weights = new String[strs.length];
    for (int i = 0; i < strs.length; i++) {
        String[] str = strs[i].split(":");
        outgoingVertexIDs[i] = str[0];
        weights[i] = str[1];
    }
    try {
        if (!hasVertex(vertexID)) {
            client.execute("g.addVertex([vertexID:'" + vertexID + "', value:'" + vertexValue + "'])");
        } else {
            client.execute("g.V('vertexID','" + vertexID + "').sideEffect{it.value = '" + vertexValue + "'}");
        }
        for (int i = 0; i < outgoingVertexIDs.length; i++) {
            if (!hasVertex(outgoingVertexIDs[i])) {
                client.execute("g.addVertex([vertexID:'" + outgoingVertexIDs[i] + "', value:''])");
            } /*
               * else { client.execute("g.V('vertexID','" + outgoingVertexIDs[i] +
               * "')"); }
               */
            client.execute("g.addEdge(g.V('vertexID','" + vertexID + "').next(), g.V('vertexID','"
                    + outgoingVertexIDs[i] + "').next(), 'outgoing', [weight:" + weights[i] + "])");
        }
    } catch (RexProException e) {
        LOG.error("Can not write record to database!");
        return;
    }
}

From source file:com.chinamobile.bcbsp.ml.HashMLWritePartition.java

License:Apache License

/**
 * This method is used to partition graph vertexes. Writing Each vertex to the
 * corresponding partition. In this method calls recordParse method to create
 * an HeadNode object. The last call partitioner's getPartitionId method to
 * calculate the HeadNode belongs to partition's id. If the HeadNode belongs
 * local partition then written to the local partition or send it to the
 * appropriate partition./*from w  ww  . j  a va  2  s.c  o  m*/
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int send = 0;
    int lost = 0;
    ThreadPool tpool = new ThreadPool(this.sendThreadNum);
    int bufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER)
            / (this.staff.getStaffNum() + this.sendThreadNum);
    byte[][] buffer = new byte[this.staff.getStaffNum()][bufferSize];
    int[] bufindex = new int[this.staff.getStaffNum()];
    BytesWritable kbytes = new BytesWritable();
    int ksize = 0;
    BytesWritable vbytes = new BytesWritable();
    int vsize = 0;
    DataOutputBuffer bb = new DataOutputBuffer();
    try {
        this.keyserializer.open(bb);
        this.valueserializer.open(bb);
    } catch (IOException e) {
        throw e;
    }
    try {
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            int pid = -1;
            if (key != null) {
                pid = this.partitioner.getPartitionID(key);
            } else {
                lost++;
                continue;
            }
            if (pid == this.staff.getPartition()) {
                local++;

                KeyValuePair pair = (KeyValuePair) this.recordParse.recordParse(key.toString(),
                        value.toString());

                if (pair == null) {
                    lost++;
                    continue;
                }
                staff.getGraphData().addForAll(pair);
            } else {
                send++;
                bb.reset();
                this.keyserializer.serialize(key);
                kbytes.set(bb.getData(), 0, bb.getLength());
                ksize = kbytes.getLength();
                bb.reset();
                this.valueserializer.serialize(value);
                vbytes.set(bb.getData(), 0, bb.getLength());
                vsize = vbytes.getLength();
                if ((buffer[pid].length - bufindex[pid]) > (ksize + vsize)) {
                    System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                    bufindex[pid] += ksize;
                    System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                    bufindex[pid] += vsize;
                } else if (buffer[pid].length < (ksize + vsize)) {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(pid);
                    BytesWritable data = new BytesWritable();
                    byte[] tmp = new byte[vsize + ksize];
                    System.arraycopy(kbytes.getBytes(), 0, tmp, 0, ksize);
                    System.arraycopy(vbytes.getBytes(), 0, tmp, ksize, vsize);
                    data.set(tmp, 0, (ksize + vsize));
                    t.setData(data);
                    tmp = null;
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    LOG.info("this is a super record");
                    t.setStatus(true);
                } else {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(pid);
                    BytesWritable data = new BytesWritable();
                    data.set(buffer[pid], 0, bufindex[pid]);
                    t.setData(data);
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    t.setStatus(true);
                    bufindex[pid] = 0;
                    // store data
                    System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                    bufindex[pid] += ksize;
                    System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                    bufindex[pid] += vsize;
                }
            }
        }
        for (int i = 0; i < this.staff.getStaffNum(); i++) {
            if (bufindex[i] != 0) {
                ThreadSignle t = tpool.getThread();
                while (t == null) {
                    t = tpool.getThread();
                }
                t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i));
                t.setJobId(staff.getJobID());
                t.setTaskId(staff.getStaffID());
                t.setBelongPartition(i);
                BytesWritable data = new BytesWritable();
                data.set(buffer[i], 0, bufindex[i]);
                t.setData(data);
                LOG.info("Using Thread is: " + t.getThreadNumber());
                t.setStatus(true);
            }
        }
        tpool.cleanup();
        tpool = null;
        buffer = null;
        bufindex = null;
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of vertices that were sent to other partitions: " + send);
        LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    }
}

From source file:com.chinamobile.bcbsp.ml.RecordParseML.java

License:Apache License

/**
 * This method is used to parse a record and obtain VertexID .
 * @param key The key of the vertex record
 * @return the vertex id/*from w w  w . j  av  a  2 s  .  c  om*/
 */
@Override
public Text getVertexID(Text key) {
    try {
        StringTokenizer str = new StringTokenizer(key.toString(), Constants.SPLIT_FLAG);
        if (str.countTokens() < 1) {
            return null;
        }
        return new Text(str.nextToken());
    } catch (Exception e) {
        return null;
    }
}