com.chinamobile.bcbsp.partition.HashWritePartition.java Source code

Java tutorial

Introduction

Here is the source code for com.chinamobile.bcbsp.partition.HashWritePartition.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.chinamobile.bcbsp.partition;

import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.Text;

import com.chinamobile.bcbsp.api.Vertex;
import com.chinamobile.bcbsp.io.RecordReader;
import com.chinamobile.bcbsp.util.ThreadPool;
import com.chinamobile.bcbsp.util.ThreadSignle;
import com.chinamobile.bcbsp.api.Partitioner;
import com.chinamobile.bcbsp.bspstaff.BSPStaff;
import com.chinamobile.bcbsp.bspstaff.BSPStaff.WorkerAgentForStaffInterface;

/**
 * HashWritePartition Implements hash-based partition method.The user must
 * provide a no-argument constructor.
 * @author
 * @version
 */
public class HashWritePartition extends WritePartition {
    /**The log of the HashWritePartition class.*/
    public static final Log LOG = LogFactory.getLog(HashWritePartition.class);
    /**The number of the byte owns the bits.*/
    private static final int CONTAINERNUMBER = 1024;

    /**
     * The constructor of the HashWritePartition class.
     */
    public HashWritePartition() {
    }

    /**
     * The constructor of the HashWritePartition class.
     * @param aWorkerAgent The workerAgent of the staff.
     * @param bspstaff The staff owns the writepartition.
     * @param aPartitioner The partitioner of the WritePartition.
     */
    public HashWritePartition(WorkerAgentForStaffInterface aWorkerAgent, BSPStaff bspstaff,
            Partitioner<Text> aPartitioner) {
        this.workerAgent = aWorkerAgent;
        this.staff = bspstaff;
        this.partitioner = aPartitioner;
    }

    /**
     * This method is used to partition graph vertexes. Writing Each vertex to the
     * corresponding partition. In this method calls recordParse method to create
     * an HeadNode object. The last call partitioner's getPartitionId method to
     * calculate the HeadNode belongs to partition's id. If the HeadNode belongs
     * local partition then written to the local partition or send it to the
     * appropriate partition.
     * @param recordReader The recordreader of the split.
     * @throws IOException The io exception
     * @throws InterruptedException The Interrupted Exception
     */
    @Override
    public void write(RecordReader recordReader) throws IOException, InterruptedException {
        int headNodeNum = 0;
        int local = 0;
        int send = 0;
        int lost = 0;
        ThreadPool tpool = new ThreadPool(this.sendThreadNum);
        int bufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER)
                / (this.staff.getStaffNum() + this.sendThreadNum);
        byte[][] buffer = new byte[this.staff.getStaffNum()][bufferSize];
        int[] bufindex = new int[this.staff.getStaffNum()];
        BytesWritable kbytes = new BytesWritable();
        int ksize = 0;
        BytesWritable vbytes = new BytesWritable();
        int vsize = 0;
        DataOutputBuffer bb = new DataOutputBuffer();
        try {
            this.keyserializer.open(bb);
            this.valueserializer.open(bb);
        } catch (IOException e) {
            throw e;
        }
        try {
            while (recordReader != null && recordReader.nextKeyValue()) {
                headNodeNum++;
                Text key = new Text(recordReader.getCurrentKey().toString());
                Text value = new Text(recordReader.getCurrentValue().toString());
                int pid = -1;
                Text vertexID = this.recordParse.getVertexID(key);
                if (vertexID != null) {
                    pid = this.partitioner.getPartitionID(vertexID);
                } else {
                    lost++;
                    continue;
                }
                if (pid == this.staff.getPartition()) {
                    local++;
                    Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString());
                    if (vertex == null) {
                        lost++;
                        continue;
                    }
                    staff.getGraphData().addForAll(vertex);
                } else {
                    send++;
                    bb.reset();
                    this.keyserializer.serialize(key);
                    kbytes.set(bb.getData(), 0, bb.getLength());
                    ksize = kbytes.getLength();
                    bb.reset();
                    this.valueserializer.serialize(value);
                    vbytes.set(bb.getData(), 0, bb.getLength());
                    vsize = vbytes.getLength();
                    if ((buffer[pid].length - bufindex[pid]) > (ksize + vsize)) {
                        System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                        bufindex[pid] += ksize;
                        System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                        bufindex[pid] += vsize;
                    } else if (buffer[pid].length < (ksize + vsize)) {
                        ThreadSignle t = tpool.getThread();
                        while (t == null) {
                            t = tpool.getThread();
                        }
                        t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                        t.setJobId(staff.getJobID());
                        t.setTaskId(staff.getStaffID());
                        t.setBelongPartition(pid);
                        BytesWritable data = new BytesWritable();
                        byte[] tmp = new byte[vsize + ksize];
                        System.arraycopy(kbytes.getBytes(), 0, tmp, 0, ksize);
                        System.arraycopy(vbytes.getBytes(), 0, tmp, ksize, vsize);
                        data.set(tmp, 0, (ksize + vsize));
                        t.setData(data);
                        tmp = null;
                        LOG.info("Using Thread is: " + t.getThreadNumber());
                        LOG.info("this is a super record");
                        t.setStatus(true);
                    } else {
                        ThreadSignle t = tpool.getThread();
                        while (t == null) {
                            t = tpool.getThread();
                        }
                        t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                        t.setJobId(staff.getJobID());
                        t.setTaskId(staff.getStaffID());
                        t.setBelongPartition(pid);
                        BytesWritable data = new BytesWritable();
                        data.set(buffer[pid], 0, bufindex[pid]);
                        t.setData(data);
                        LOG.info("Using Thread is: " + t.getThreadNumber());
                        t.setStatus(true);
                        bufindex[pid] = 0;
                        // store data
                        System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                        bufindex[pid] += ksize;
                        System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                        bufindex[pid] += vsize;
                    }
                }
            }
            for (int i = 0; i < this.staff.getStaffNum(); i++) {
                if (bufindex[i] != 0) {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(i);
                    BytesWritable data = new BytesWritable();
                    data.set(buffer[i], 0, bufindex[i]);
                    t.setData(data);
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    t.setStatus(true);
                }
            }
            tpool.cleanup();
            tpool = null;
            buffer = null;
            bufindex = null;
            LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
            LOG.info("The number of vertices that were put into the partition: " + local);
            LOG.info("The number of vertices that were sent to other partitions: " + send);
            LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
        } catch (IOException e) {
            throw e;
        } catch (InterruptedException e) {
            throw e;
        }
    }
    /**
     * @param recordReader The recordReader of the staff write.
     * @param application The application of the writepartition.
     * @param staffNum The staff number of the job.
     * @throws IOException the io exception
     * @throws InterruptedException the Interrupted Exception
     */
    //  public void write(RecordReader recordReader, Application application,
    //      int staffNum) throws IOException, InterruptedException {
    //    LOG.info("in hashWritePartion write method");
    //    int headNodeNum = 0;
    //    int local = 0;
    //    int send = 0;
    //    int lost = 0;
    //    ThreadPool tpool = new ThreadPool(this.sendThreadNum);
    //    int bufferSize = (int) ((this.TotalCacheSize * CONTAINERNUMBER
    //        * CONTAINERNUMBER)
    //        / (this.staff.getStaffNum() + this.sendThreadNum));
    //    byte[][] buffer = new byte[this.staff.getStaffNum()][bufferSize];
    //    int[] bufindex = new int[this.staff.getStaffNum()];
    //    BytesWritable kbytes = new BytesWritable();
    //    int ksize = 0;
    //    BytesWritable vbytes = new BytesWritable();
    //    int vsize = 0;
    //    DataOutputBuffer bb = new DataOutputBuffer();
    //    try {
    //      this.keyserializer.open(bb);
    //      this.valueserializer.open(bb);
    //    } catch (IOException e) {
    //      throw e;
    //    }
    //    try {
    //      while (recordReader != null && recordReader.nextKeyValue()) {
    //        headNodeNum++;
    //        Text key = new Text(recordReader.getCurrentKey().toString());
    //        Text value = new Text(recordReader.getCurrentValue().toString());
    //        LOG.info("KEY:" + key.toString() + "\tVALUE:" + value.toString());
    //        int pid = -1;
    //        Text vertexID = this.recordParse.getVertexID(key);
    //        pid = this.partitioner.getPartitionID(vertexID);
    //        if (pid == -1) {
    //          lost++;
    //          continue;
    //        }
    //        if (pid == this.staff.getPartition()) {
    //          local++;
    //          LOG.info("send to local");
    //          application.getDownlink().sendKeyValue(key.toString(),
    //              value.toString());
    //        } else {
    //          send++;
    //          LOG.info("send to remote");
    //          bb.reset();
    //          this.keyserializer.serialize(key);
    //          kbytes.set(bb.getData(), 0, bb.getLength());
    //          ksize = kbytes.getLength();
    //          bb.reset();
    //          this.valueserializer.serialize(value);
    //          vbytes.set(bb.getData(), 0, bb.getLength());
    //          vsize = vbytes.getLength();
    //          if ((buffer[pid].length - bufindex[pid]) > (ksize + vsize)) {
    //            System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid],
    //                ksize);
    //            bufindex[pid] += ksize;
    //            System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid],
    //                vsize);
    //            bufindex[pid] += vsize;
    //          } else if (buffer[pid].length < (ksize + vsize)) {
    //            ThreadSignle t = tpool.getThread();
    //            while (t == null) {
    //              t = tpool.getThread();
    //            }
    //            t.setDataType(1);
    //            t.setWorker(this.workerAgent.getWorker(staff.getJobID(),
    //                staff.getStaffID(), pid));
    //            t.setJobId(staff.getJobID());
    //            t.setTaskId(staff.getStaffID());
    //            t.setBelongPartition(pid);
    //            BytesWritable data = new BytesWritable();
    //            byte[] tmp = new byte[vsize + ksize];
    //            System.arraycopy(kbytes.getBytes(), 0, tmp, 0, ksize);
    //            System.arraycopy(vbytes.getBytes(), 0, tmp, ksize, vsize);
    //            data.set(tmp, 0, (ksize + vsize));
    //            t.setData(data);
    //            tmp = null;
    //            LOG.info("Using Thread is: " + t.getThreadNumber());
    //            LOG.info("this is a super record");
    //            t.setStatus(true);
    //          } else {
    //            ThreadSignle t = tpool.getThread();
    //            while (t == null) {
    //              t = tpool.getThread();
    //            }
    //            t.setDataType(1);
    //            t.setWorker(this.workerAgent.getWorker(staff.getJobID(),
    //                staff.getStaffID(), pid));
    //            t.setJobId(staff.getJobID());
    //            t.setTaskId(staff.getStaffID());
    //            t.setBelongPartition(pid);
    //            BytesWritable data = new BytesWritable();
    //            data.set(buffer[pid], 0, bufindex[pid]);
    //            t.setData(data);
    //            LOG.info("Using Thread is: " + t.getThreadNumber());
    //            t.setStatus(true);
    //            bufindex[pid] = 0;
    //            System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid],
    //                ksize);
    //            bufindex[pid] += ksize;
    //            System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid],
    //                vsize);
    //            bufindex[pid] += vsize;
    //          }
    //        }
    //      }
    //      for (int i = 0; i < this.staff.getStaffNum(); i++) {
    //        if (bufindex[i] != 0) {
    //          ThreadSignle t = tpool.getThread();
    //          while (t == null) {
    //            t = tpool.getThread();
    //          }
    //          t.setWorker(this.workerAgent.getWorker(staff.getJobID(),
    //              staff.getStaffID(), i));
    //          t.setJobId(staff.getJobID());
    //          t.setTaskId(staff.getStaffID());
    //          t.setDataType(1);
    //          t.setBelongPartition(i);
    //          BytesWritable data = new BytesWritable();
    //          data.set(buffer[i], 0, bufindex[i]);
    //          t.setData(data);
    //          LOG.info("Using Thread is: " + t.getThreadNumber());
    //          t.setStatus(true);
    //        }
    //      }
    //      tpool.cleanup();
    //      tpool = null;
    //      buffer = null;
    //      bufindex = null;
    //      LOG.info("The number of vertices that were read from the input file: "
    //          + headNodeNum);
    //      LOG.info("The number of vertices that were put into the partition: "
    //          + local);
    //      LOG.info("The number of vertices that were sent to other partitions: "
    //          + send);
    //      LOG.info("The number of verteices in the partition that cound not be "
    //          + "parsed:" + lost);
    //    } catch (IOException e) {
    //      throw e;
    //    } catch (InterruptedException e) {
    //      throw e;
    //    }
    //  }
}