Example usage for org.apache.hadoop.io DataOutputBuffer reset

List of usage examples for org.apache.hadoop.io DataOutputBuffer reset

Introduction

In this page you can find the example usage for org.apache.hadoop.io DataOutputBuffer reset.

Prototype

public DataOutputBuffer reset() 

Source Link

Document

Resets the buffer to empty.

Usage

From source file:CompressionTest.java

License:Open Source License

public static void main(String[] args) throws IOException {
    DataOutputBuffer chunksBuffer = new DataOutputBuffer();
    DataOutputBuffer metasBuffer = new DataOutputBuffer();

    byte[] data = "alskjdflkajsldfkja;s".getBytes();
    chunksBuffer.write(data);/*from   w w  w  . ja va  2s .  c  om*/
    System.out.println(chunksBuffer.size());
    System.out.println(chunksBuffer.getLength());
    chunksBuffer.reset();
    chunksBuffer.write(data, 0, 10);
    System.out.println(chunksBuffer.size());
    System.out.println(chunksBuffer.getLength());

}

From source file:StreamWikiDumpInputFormat.java

License:Apache License

private static void offsetWrite(DataOutputBuffer to, int fromOffset, DataOutputBuffer from) throws IOException {
    if (from.getLength() <= fromOffset || fromOffset < 0) {
        throw new IllegalArgumentException(
                String.format("invalid offset: offset=%d length=%d", fromOffset, from.getLength()));
    }//w  ww .  j a v  a 2 s .c  om
    byte[] bytes = new byte[from.getLength() - fromOffset];
    System.arraycopy(from.getData(), fromOffset, bytes, 0, bytes.length);
    to.reset();
    to.write(bytes);
}

From source file:com.asakusafw.runtime.stage.collector.SortableSlotTest.java

License:Apache License

static byte[] write(Writable writable) {
    DataOutputBuffer buffer = new DataOutputBuffer();
    buffer.reset();
    try {/*from   w  w w .j  av  a  2s .  c o  m*/
        writable.write(buffer);
    } catch (IOException e) {
        throw new AssertionError(e);
    }
    return Arrays.copyOf(buffer.getData(), buffer.getLength());
}

From source file:com.chinamobile.bcbsp.client.BSPJobClient.java

License:Apache License

/**
 * Write splits./* www . j  a va2s  .  c o  m*/
 * @param job BSPJob
 * @param submitSplitFile Path
 * @param <T> org.apache.hadoop.mapreduce.InputSplit
 * @return splitNum the count of split
 */
@SuppressWarnings("unchecked")
private <T extends org.apache.hadoop.mapreduce.InputSplit> int writeSplits(BSPJob job, Path submitSplitFile)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration confs = job.getConf();
    com.chinamobile.bcbsp.io.InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(),
            confs);
    input.initialize(job.getConf());
    List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(job);
    int maxSplits = job.getNumPartition();
    int splitNum = splits.size();
    double factor = splitNum / (float) maxSplits;
    if (factor > 1.0) {
        job.setInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, (int) Math.ceil(factor));
        LOG.info("[Split Adjust Factor] " + (int) Math.ceil(factor));
        LOG.info("[Partition Num] " + maxSplits);
        splits = input.getSplits(job);
        splitNum = splits.size();
    }
    T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);
    // sort the splits into order based on size, so that the biggest
    // go first
    Arrays.sort(array, new NewSplitComparator());
    DataOutputStream out = writeSplitsFileHeader(confs, submitSplitFile, array.length);
    try {
        if (array.length != 0) {
            DataOutputBuffer buffer = new DataOutputBuffer();
            RawSplit rawSplit = new RawSplit();
            SerializationFactory factory = new SerializationFactory(confs);
            Serializer<T> serializer = factory.getSerializer((Class<T>) array[0].getClass());
            serializer.open(buffer);
            for (T split : array) {
                rawSplit.setClassName(split.getClass().getName());
                buffer.reset();
                serializer.serialize(split);
                rawSplit.setDataLength(split.getLength());
                rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
                rawSplit.setLocations(split.getLocations());
                rawSplit.write(out);
            }
            serializer.close();
        }
    } finally {
        out.close();
    }
    return splitNum;
}

From source file:com.chinamobile.bcbsp.ml.HashMLWritePartition.java

License:Apache License

/**
 * This method is used to partition graph vertexes. Writing Each vertex to the
 * corresponding partition. In this method calls recordParse method to create
 * an HeadNode object. The last call partitioner's getPartitionId method to
 * calculate the HeadNode belongs to partition's id. If the HeadNode belongs
 * local partition then written to the local partition or send it to the
 * appropriate partition.//ww w. j av a  2  s  . c  o m
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int send = 0;
    int lost = 0;
    ThreadPool tpool = new ThreadPool(this.sendThreadNum);
    int bufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER)
            / (this.staff.getStaffNum() + this.sendThreadNum);
    byte[][] buffer = new byte[this.staff.getStaffNum()][bufferSize];
    int[] bufindex = new int[this.staff.getStaffNum()];
    BytesWritable kbytes = new BytesWritable();
    int ksize = 0;
    BytesWritable vbytes = new BytesWritable();
    int vsize = 0;
    DataOutputBuffer bb = new DataOutputBuffer();
    try {
        this.keyserializer.open(bb);
        this.valueserializer.open(bb);
    } catch (IOException e) {
        throw e;
    }
    try {
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            int pid = -1;
            if (key != null) {
                pid = this.partitioner.getPartitionID(key);
            } else {
                lost++;
                continue;
            }
            if (pid == this.staff.getPartition()) {
                local++;

                KeyValuePair pair = (KeyValuePair) this.recordParse.recordParse(key.toString(),
                        value.toString());

                if (pair == null) {
                    lost++;
                    continue;
                }
                staff.getGraphData().addForAll(pair);
            } else {
                send++;
                bb.reset();
                this.keyserializer.serialize(key);
                kbytes.set(bb.getData(), 0, bb.getLength());
                ksize = kbytes.getLength();
                bb.reset();
                this.valueserializer.serialize(value);
                vbytes.set(bb.getData(), 0, bb.getLength());
                vsize = vbytes.getLength();
                if ((buffer[pid].length - bufindex[pid]) > (ksize + vsize)) {
                    System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                    bufindex[pid] += ksize;
                    System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                    bufindex[pid] += vsize;
                } else if (buffer[pid].length < (ksize + vsize)) {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(pid);
                    BytesWritable data = new BytesWritable();
                    byte[] tmp = new byte[vsize + ksize];
                    System.arraycopy(kbytes.getBytes(), 0, tmp, 0, ksize);
                    System.arraycopy(vbytes.getBytes(), 0, tmp, ksize, vsize);
                    data.set(tmp, 0, (ksize + vsize));
                    t.setData(data);
                    tmp = null;
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    LOG.info("this is a super record");
                    t.setStatus(true);
                } else {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(pid);
                    BytesWritable data = new BytesWritable();
                    data.set(buffer[pid], 0, bufindex[pid]);
                    t.setData(data);
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    t.setStatus(true);
                    bufindex[pid] = 0;
                    // store data
                    System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                    bufindex[pid] += ksize;
                    System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                    bufindex[pid] += vsize;
                }
            }
        }
        for (int i = 0; i < this.staff.getStaffNum(); i++) {
            if (bufindex[i] != 0) {
                ThreadSignle t = tpool.getThread();
                while (t == null) {
                    t = tpool.getThread();
                }
                t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i));
                t.setJobId(staff.getJobID());
                t.setTaskId(staff.getStaffID());
                t.setBelongPartition(i);
                BytesWritable data = new BytesWritable();
                data.set(buffer[i], 0, bufindex[i]);
                t.setData(data);
                LOG.info("Using Thread is: " + t.getThreadNumber());
                t.setStatus(true);
            }
        }
        tpool.cleanup();
        tpool = null;
        buffer = null;
        bufindex = null;
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of vertices that were sent to other partitions: " + send);
        LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    }
}

From source file:com.chinamobile.bcbsp.partition.HashWithBalancerWritePartition.java

License:Apache License

/**
 * This method is used to partition graph vertexes. Writing Each vertex to the
 * corresponding partition. In this method calls recordParse method to create
 * an HeadNode object. The last call partitioner's getPartitionId method to
 * calculate the HeadNode belongs to partition's id. If the HeadNode belongs
 * local partition then written to the local partition or send it to the
 * appropriate partition.//from ww w  .  j  av a  2  s.com
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int send = 0;
    int lost = 0;
    ThreadPool tpool = new ThreadPool(this.sendThreadNum);
    int staffNum = this.staff.getStaffNum();
    BytesWritable kbytes = new BytesWritable();
    int ksize = 0;
    BytesWritable vbytes = new BytesWritable();
    int vsize = 0;
    DataOutputBuffer bb = new DataOutputBuffer();
    int bufferSize = (int) ((this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) * PART);
    int dataBufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER)
            / (this.staff.getStaffNum() + this.sendThreadNum);
    byte[] buffer = new byte[bufferSize];
    int bufindex = 0;
    SerializationFactory sFactory = new SerializationFactory(new Configuration());
    Serializer<IntWritable> psserializer = sFactory.getSerializer(IntWritable.class);
    byte[] pidandsize = new byte[TIME * CONTAINERNUMBER * CONTAINERNUMBER];
    int psindex = 0;
    BytesWritable pidbytes = new BytesWritable();
    int psize = 0;
    BytesWritable sizebytes = new BytesWritable();
    int ssize = 0;
    try {
        this.keyserializer.open(bb);
        this.valueserializer.open(bb);
        psserializer.open(bb);
    } catch (IOException e) {
        throw e;
    }
    String path = "/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID();
    File dir = new File("/tmp/bcbsp/" + this.staff.getJobID());
    dir.mkdir();
    dir = new File("/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID());
    dir.mkdir();
    ArrayList<File> files = new ArrayList<File>();
    try {
        File file = new File(path + "/" + "data" + ".txt");
        files.add(file);
        DataOutputStream dataWriter = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(path + "/" + "data" + ".txt", true)));
        DataInputStream dataReader = new DataInputStream(
                new BufferedInputStream(new FileInputStream(path + "/" + "data" + ".txt")));
        File filet = new File(path + "/" + "pidandsize" + ".txt");
        files.add(filet);
        DataOutputStream psWriter = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(path + "/" + "pidandsize" + ".txt", true)));
        DataInputStream psReader = new DataInputStream(
                new BufferedInputStream(new FileInputStream(path + "/" + "pidandsize" + ".txt")));
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            int pid = -1;
            Text vertexID = this.recordParse.getVertexID(key);
            if (vertexID != null) {
                pid = this.partitioner.getPartitionID(vertexID);
            } else {
                lost++;
                continue;
            }
            if (this.counter.containsKey(pid)) {
                this.counter.put(pid, (this.counter.get(pid) + 1));
            } else {
                this.counter.put(pid, 1);
            }
            bb.reset();
            this.keyserializer.serialize(key);
            kbytes.set(bb.getData(), 0, bb.getLength());
            ksize = kbytes.getLength();
            bb.reset();
            this.valueserializer.serialize(value);
            vbytes.set(bb.getData(), 0, bb.getLength());
            vsize = vbytes.getLength();
            bb.reset();
            psserializer.serialize(new IntWritable(ksize + vsize));
            sizebytes.set(bb.getData(), 0, bb.getLength());
            ssize = sizebytes.getLength();
            bb.reset();
            psserializer.serialize(new IntWritable(pid));
            pidbytes.set(bb.getData(), 0, bb.getLength());
            psize = pidbytes.getLength();
            if ((pidandsize.length - psindex) > (ssize + psize)) {
                System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize);
                psindex += ssize;
                System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize);
                psindex += psize;
            } else {
                psWriter.write(pidandsize, 0, psindex);
                psindex = 0;
                System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize);
                psindex += ssize;
                System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize);
                psindex += psize;
            }
            if ((buffer.length - bufindex) > (ksize + vsize)) {
                System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize);
                bufindex += ksize;
                System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize);
                bufindex += vsize;
            } else if (buffer.length < (ksize + vsize)) {
                dataWriter.write(buffer, 0, bufindex);
                bufindex = 0;
                LOG.info("This is a super record");
                dataWriter.write(kbytes.getBytes(), 0, ksize);
                dataWriter.write(vbytes.getBytes(), 0, vsize);
            } else {
                dataWriter.write(buffer, 0, bufindex);
                bufindex = 0;
                System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize);
                bufindex += ksize;
                System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize);
                bufindex += vsize;
            }
        }
        if (psindex != 0) {
            psWriter.write(pidandsize, 0, psindex);
        }
        if (bufindex != 0) {
            dataWriter.write(buffer, 0, bufindex);
            bufindex = 0;
        }
        dataWriter.close();
        dataWriter = null;
        psWriter.close();
        psWriter = null;
        buffer = null;
        pidandsize = null;
        this.ssrc.setDirFlag(new String[] { "3" });
        this.ssrc.setCounter(this.counter);
        HashMap<Integer, Integer> hashBucketToPartition = this.sssc.loadDataInBalancerBarrier(ssrc,
                Constants.PARTITION_TYPE.HASH);
        this.staff.setHashBucketToPartition(hashBucketToPartition);
        byte[][] databuf = new byte[staffNum][dataBufferSize];
        int[] databufindex = new int[staffNum];
        try {
            IntWritable pid = new IntWritable();
            IntWritable size = new IntWritable();
            int belongPid = 0;
            while (true) {
                size.readFields(psReader);
                pid.readFields(psReader);
                belongPid = hashBucketToPartition.get(pid.get());
                if (belongPid != this.staff.getPartition()) {
                    send++;
                } else {
                    local++;
                }
                if ((databuf[belongPid].length - databufindex[belongPid]) > size.get()) {
                    dataReader.read(databuf[belongPid], databufindex[belongPid], size.get());
                    databufindex[belongPid] += size.get();
                } else if (databuf[belongPid].length < size.get()) {
                    LOG.info("This is a super record");
                    byte[] tmp = new byte[size.get()];
                    dataReader.read(tmp, 0, size.get());
                    if (belongPid == this.staff.getPartition()) {
                        DataInputStream reader = new DataInputStream(
                                new BufferedInputStream(new ByteArrayInputStream(tmp)));
                        try {
                            boolean stop = true;
                            while (stop) {
                                Text key = new Text();
                                key.readFields(reader);
                                Text value = new Text();
                                value.readFields(reader);
                                if (key.getLength() > 0 && value.getLength() > 0) {
                                    Vertex vertex = this.recordParse.recordParse(key.toString(),
                                            value.toString());
                                    if (vertex == null) {
                                        lost++;
                                        continue;
                                    }
                                    this.staff.getGraphData().addForAll(vertex);
                                } else {
                                    stop = false;
                                }
                            }
                        } catch (IOException e) {
                            LOG.info("IO exception: " + e.getStackTrace());
                        }
                    } else {
                        ThreadSignle t = tpool.getThread();
                        while (t == null) {
                            t = tpool.getThread();
                        }
                        t.setWorker(
                                this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid));
                        t.setJobId(staff.getJobID());
                        t.setTaskId(staff.getStaffID());
                        t.setBelongPartition(belongPid);
                        BytesWritable data = new BytesWritable();
                        data.set(tmp, 0, size.get());
                        t.setData(data);
                        LOG.info("Using Thread is: " + t.getThreadNumber());
                        t.setStatus(true);
                    }
                    tmp = null;
                } else {
                    if (belongPid == this.staff.getPartition()) {
                        DataInputStream reader = new DataInputStream(new BufferedInputStream(
                                new ByteArrayInputStream(databuf[belongPid], 0, databufindex[belongPid])));
                        try {
                            boolean stop = true;
                            while (stop) {
                                Text key = new Text();
                                key.readFields(reader);
                                Text value = new Text();
                                value.readFields(reader);
                                if (key.getLength() > 0 && value.getLength() > 0) {
                                    Vertex vertex = this.recordParse.recordParse(key.toString(),
                                            value.toString());
                                    if (vertex == null) {
                                        lost++;
                                        continue;
                                    }
                                    this.staff.getGraphData().addForAll(vertex);
                                } else {
                                    stop = false;
                                }
                            }
                        } catch (IOException e) {
                            LOG.info("IO exception: " + e.getStackTrace());
                        }
                    } else {
                        ThreadSignle t = tpool.getThread();
                        while (t == null) {
                            t = tpool.getThread();
                        }
                        t.setWorker(
                                this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid));
                        t.setJobId(staff.getJobID());
                        t.setTaskId(staff.getStaffID());
                        t.setBelongPartition(belongPid);
                        BytesWritable data = new BytesWritable();
                        data.set(databuf[belongPid], 0, databufindex[belongPid]);
                        t.setData(data);
                        LOG.info("Using Thread is: " + t.getThreadNumber());
                        t.setStatus(true);
                    }
                    databufindex[belongPid] = 0;
                    dataReader.read(databuf[belongPid], databufindex[belongPid], size.get());
                    databufindex[belongPid] += size.get();
                }
            }
        } catch (EOFException ex) {
            LOG.error("[write]", ex);
        }
        for (int i = 0; i < staffNum; i++) {
            if (databufindex[i] != 0) {
                if (i == this.staff.getPartition()) {
                    DataInputStream reader = new DataInputStream(
                            new BufferedInputStream(new ByteArrayInputStream(databuf[i], 0, databufindex[i])));
                    try {
                        boolean stop = true;
                        while (stop) {
                            Text key = new Text();
                            key.readFields(reader);
                            Text value = new Text();
                            value.readFields(reader);
                            if (key.getLength() > 0 && value.getLength() > 0) {
                                Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString());
                                if (vertex == null) {
                                    lost++;
                                    continue;
                                }
                                this.staff.getGraphData().addForAll(vertex);
                            } else {
                                stop = false;
                            }
                        }
                    } catch (IOException e) {
                        LOG.info("IO exception: " + e.getStackTrace());
                    }
                } else {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(i);
                    BytesWritable data = new BytesWritable();
                    data.set(databuf[i], 0, databufindex[i]);
                    t.setData(data);
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    t.setStatus(true);
                }
            }
        }
        dataReader.close();
        dataReader = null;
        psReader.close();
        psReader = null;
        for (File f : files) {
            f.delete();
        }
        dir.delete();
        dir = new File(path.substring(0, path.lastIndexOf('/')));
        dir.delete();
        tpool.cleanup();
        tpool = null;
        databuf = null;
        databufindex = null;
        this.counter = null;
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of vertices that were sent to other partitions: " + send);
        LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    } finally {
        for (File f : files) {
            f.delete();
        }
        dir.delete();
        dir = new File(path.substring(0, path.lastIndexOf('/')));
        dir.delete();
    }
}

From source file:com.chinamobile.bcbsp.partition.HashWritePartition.java

License:Apache License

/**
 * This method is used to partition graph vertexes. Writing Each vertex to the
 * corresponding partition. In this method calls recordParse method to create
 * an HeadNode object. The last call partitioner's getPartitionId method to
 * calculate the HeadNode belongs to partition's id. If the HeadNode belongs
 * local partition then written to the local partition or send it to the
 * appropriate partition.//w w w . jav  a 2  s .  c o  m
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int send = 0;
    int lost = 0;
    ThreadPool tpool = new ThreadPool(this.sendThreadNum);
    int bufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER)
            / (this.staff.getStaffNum() + this.sendThreadNum);
    byte[][] buffer = new byte[this.staff.getStaffNum()][bufferSize];
    int[] bufindex = new int[this.staff.getStaffNum()];
    BytesWritable kbytes = new BytesWritable();
    int ksize = 0;
    BytesWritable vbytes = new BytesWritable();
    int vsize = 0;
    DataOutputBuffer bb = new DataOutputBuffer();
    try {
        this.keyserializer.open(bb);
        this.valueserializer.open(bb);
    } catch (IOException e) {
        throw e;
    }
    try {
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            int pid = -1;
            Text vertexID = this.recordParse.getVertexID(key);
            if (vertexID != null) {
                pid = this.partitioner.getPartitionID(vertexID);
            } else {
                lost++;
                continue;
            }
            if (pid == this.staff.getPartition()) {
                local++;
                Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString());
                if (vertex == null) {
                    lost++;
                    continue;
                }
                staff.getGraphData().addForAll(vertex);
            } else {
                send++;
                bb.reset();
                this.keyserializer.serialize(key);
                kbytes.set(bb.getData(), 0, bb.getLength());
                ksize = kbytes.getLength();
                bb.reset();
                this.valueserializer.serialize(value);
                vbytes.set(bb.getData(), 0, bb.getLength());
                vsize = vbytes.getLength();
                if ((buffer[pid].length - bufindex[pid]) > (ksize + vsize)) {
                    System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                    bufindex[pid] += ksize;
                    System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                    bufindex[pid] += vsize;
                } else if (buffer[pid].length < (ksize + vsize)) {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(pid);
                    BytesWritable data = new BytesWritable();
                    byte[] tmp = new byte[vsize + ksize];
                    System.arraycopy(kbytes.getBytes(), 0, tmp, 0, ksize);
                    System.arraycopy(vbytes.getBytes(), 0, tmp, ksize, vsize);
                    data.set(tmp, 0, (ksize + vsize));
                    t.setData(data);
                    tmp = null;
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    LOG.info("this is a super record");
                    t.setStatus(true);
                } else {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(pid);
                    BytesWritable data = new BytesWritable();
                    data.set(buffer[pid], 0, bufindex[pid]);
                    t.setData(data);
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    t.setStatus(true);
                    bufindex[pid] = 0;
                    // store data
                    System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                    bufindex[pid] += ksize;
                    System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                    bufindex[pid] += vsize;
                }
            }
        }
        for (int i = 0; i < this.staff.getStaffNum(); i++) {
            if (bufindex[i] != 0) {
                ThreadSignle t = tpool.getThread();
                while (t == null) {
                    t = tpool.getThread();
                }
                t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i));
                t.setJobId(staff.getJobID());
                t.setTaskId(staff.getStaffID());
                t.setBelongPartition(i);
                BytesWritable data = new BytesWritable();
                data.set(buffer[i], 0, bufindex[i]);
                t.setData(data);
                LOG.info("Using Thread is: " + t.getThreadNumber());
                t.setStatus(true);
            }
        }
        tpool.cleanup();
        tpool = null;
        buffer = null;
        bufindex = null;
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of vertices that were sent to other partitions: " + send);
        LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    }
}

From source file:com.datasalt.pangool.utils.TupleToAvroRecordConverter.java

License:Apache License

/**
 * Moves data between a Tuple and an Avro Record
 *///from  w  ww.  j a v  a 2 s . c o  m
@SuppressWarnings({ "unchecked", "rawtypes" })
public Record toRecord(ITuple tuple, Record reuse) throws IOException {
    Record record = reuse;
    if (record == null) {
        record = new Record(avroSchema);
    }
    if (schemaValidation && !tuple.getSchema().equals(pangoolSchema)) {
        throw new IOException("Tuple '" + tuple + "' " + "contains schema not expected." + "Expected schema '"
                + pangoolSchema + " and actual: " + tuple.getSchema());
    }
    for (int i = 0; i < pangoolSchema.getFields().size(); i++) {
        Object obj = tuple.get(i);
        Field field = pangoolSchema.getField(i);
        if (obj == null) {
            throw new IOException("Field '" + field.getName() + "' can't be null in tuple:" + tuple);
        }

        switch (field.getType()) {
        case INT:
        case LONG:
        case FLOAT:
        case BOOLEAN:
        case DOUBLE:
        case BYTES:
            record.put(i, obj); //optimistic
            break;
        case OBJECT:
            Serializer customSer = customSerializers[i];
            DataOutputBuffer buffer = buffers[i];
            buffer.reset();
            if (customSer != null) {
                customSer.open(buffer);
                customSer.serialize(obj);
                customSer.close(); //TODO is this safe ?
            } else {
                hadoopSer.ser(obj, buffer);
            }
            //TODO this byteBuffer instances should be cached and reused
            ByteBuffer byteBuffer = ByteBuffer.wrap(buffer.getData(), 0, buffer.getLength());
            record.put(i, byteBuffer);
            break;
        case ENUM:
            record.put(i, obj.toString());
            break;
        case STRING:
            record.put(i, new Utf8(obj.toString())); //could be directly String ?
            break;
        default:
            throw new IOException("Not correspondence to Avro type from Pangool type " + field.getType());
        }
    }
    return record;
}

From source file:com.ibm.jaql.lang.expr.hadoop.ChainedMapFn.java

License:Apache License

public JsonValue eval(final Context context) throws Exception {
    JsonRecord args = baseSetup(context);

    JsonValue state = args.getRequired(new JsonString("init"));
    Function mapFn = (Function) args.getRequired(new JsonString("map"));
    JsonValue schema = args.get(new JsonString("schema"));

    JaqlUtil.enforceNonNull(mapFn);// w  w w.j  a va  2s .  co m

    conf.setNumReduceTasks(0);
    conf.setMapRunnerClass(MapEval.class);

    // setup serialization
    setupSerialization(false);
    if (schema != null) {
        conf.set(SCHEMA_NAME, schema.toString());
    }

    prepareFunction("map", 2, mapFn, 0);

    InputSplit[] splits = conf.getInputFormat().getSplits(conf, conf.getNumMapTasks());

    // Override the input format to select one partition
    int targetSplits = conf.getNumMapTasks();
    String oldFormat = conf.get("mapred.input.format.class");
    conf.set(SelectSplitInputFormat.INPUT_FORMAT, oldFormat);
    // It would be nice to know how many splits we are generating to avoid 
    // using an exception to quit...
    // int numSplits = oldFormat.getSplits(conf, ??);
    // This parameter is avoided in the new API
    conf.setInputFormat(SelectSplitInputFormat.class);
    conf.setNumMapTasks(1);

    DataOutputBuffer buffer = new DataOutputBuffer();
    for (int i = 0; i < splits.length; i++) {
        // TODO: we should move the model around using hdfs files instead of serializing
        conf.setClass(SelectSplitInputFormat.SPLIT_CLASS, splits[i].getClass(), InputSplit.class);
        conf.set(SelectSplitInputFormat.STATE, state.toString());
        buffer.reset();
        splits[i].write(buffer);
        ConfUtil.writeBinary(conf, SelectSplitInputFormat.SPLIT, buffer.getData(), 0, buffer.getLength());
        conf.setJobName("chainedMap " + (i + 1) + "/" + splits.length);

        // This causes the output file to be deleted.
        HadoopOutputAdapter outAdapter = (HadoopOutputAdapter) JaqlUtil.getAdapterStore().output
                .getAdapter(outArgs);
        outAdapter.setParallel(conf);

        try {
            JobClient.runJob(conf);
        } catch (EOFException ex) {
            // Thrown when we've processed all of the splits
            break;
        }

        // Read the new state
        final InputAdapter adapter = (InputAdapter) JaqlUtil.getAdapterStore().input.getAdapter(outArgs);
        adapter.open();
        ClosableJsonIterator reader = adapter.iter();
        state = null;
        if (reader.moveNext()) {
            state = reader.current();
        }
        reader.close();
    }

    return state;
}

From source file:com.ibm.jaql.lang.expr.index.HashtableServer.java

License:Apache License

@Override
public void run() {
    JsonValue readKey = null;//from   www .j  ava  2  s . c o m
    JsonValue[] keys = new JsonValue[0];

    try {
        while (true) {
            byte command = in.readByte();
            switch (command) {
            // GET Key -> FOUND Value | NOT_FOUND
            case GET_CMD: {
                readKey = table.keySerializer.read(in, readKey);
                byte[] value = table.table.get(readKey);
                if (value == null) {
                    out.write(NOT_FOUND_CMD);
                } else {
                    out.write(FOUND_CMD);
                    out.write(value);
                }
                break;
            }
            // GETN n, [Key]*n -> OK n [FOUND Value | NOT_FOUND]*n  OK
            case GETN_CMD: {
                int n = BaseUtil.readVUInt(in);
                if (n > keys.length || // bigger array required
                        3 * n < keys.length) // array is way too big
                {
                    keys = new JsonValue[n];
                }
                for (int i = 0; i < n; i++) {
                    keys[i] = table.keySerializer.read(in, keys[i]);
                }
                out.write(OK_CMD);
                BaseUtil.writeVUInt(out, n);
                for (int i = 0; i < n; i++) {
                    byte[] value = table.table.get(keys[i]);
                    if (value == null) {
                        out.write(NOT_FOUND_CMD);
                    } else {
                        out.write(FOUND_CMD);
                        out.write(value);
                    }
                }
                out.write(OK_CMD);
                break;
            }
            // USE tableId string, age msec, lease msec
            //   -> OK lease, schema [ Key, Value ], 
            //    | BUILD 
            case USE_CMD: {
                if (table != null) {
                    HashMapCache.instance.release(table);
                    table = null;
                }
                JsonString tableId = (JsonString) defaultSerializer.read(in, null);
                long ageMS = BaseUtil.readVSLong(in);
                long leaseMS = BaseUtil.readVSLong(in);

                table = HashMapCache.instance.get(tableId.toString(), ageMS, leaseMS);
                if (table.isBuilt()) // The table is good to go
                {
                    out.write(OK_CMD);
                    BaseUtil.writeVSLong(out, 0); // TODO: implement leases
                    defaultSerializer.write(out, table.schema);
                } else // We need to build the table
                {
                    out.write(BUILD_CMD);
                    out.flush();

                    // SCHEMA schema [Key,Value] (PUT key, value)* OK -> OK
                    command = in.readByte();
                    if (command == RELEASE_CMD) {
                        // The client couldn't build the table, so just release it
                        HashMapCache.instance.release(table);
                        break;
                    }
                    if (command != SCHEMA_CMD) {
                        throw new ProtocolException("expected SCHEMA");
                    }
                    table.setSchema((JsonSchema) defaultSerializer.read(in, null));
                    DataOutputBuffer buf = new DataOutputBuffer();

                    System.err.println("building hashtable " + table.tableId);

                    while ((command = in.readByte()) == PUT_CMD) {
                        // TODO: we need to use a spilling hashtable to avoid memory overflows...
                        // TODO: we could at least pack the values more tightly 
                        buf.reset();
                        JsonValue key = table.keySerializer.read(in, null); // Be sure NOT to reuse the key here!
                        table.valueSerializer.copy(in, buf);
                        byte[] val = new byte[buf.getLength()];
                        System.arraycopy(buf.getData(), 0, val, 0, val.length);
                        table.table.put(key, val);
                    }
                    if (command != OK_CMD) {
                        throw new ProtocolException("expected OK");
                    }
                    HashMapCache.instance.doneBuilding(table);
                    out.write(OK_CMD);
                    System.err.println("built hashtable " + table.tableId);
                }
                break;
            }
            // RELEASE -> OK
            case RELEASE_CMD: {
                if (table != null) {
                    HashMapCache.instance.release(table);
                    table = null;
                }
                out.write(OK_CMD);
                break;
            }
            // LIST_TABLES -> (FOUND tableId built age lease schema numEntries)* OK
            // GET_ALL -> (FOUND key value)* OK
            // UNDEFINE tableId -> OK | NOT_FOUND
            // UNDEFINE_ALL -> OK
            default:
                throw new ProtocolException("invalid command code");
            }
            out.flush();
        }
    } catch (EOFException e) {
        // ignored
    } catch (Exception e) {
        // log and exit thread
        e.printStackTrace();
    } finally {
        if (table != null) {
            HashMapCache.instance.release(table);
        }
        try {
            socket.close();
        } catch (Exception e) {
            // log and exit thread
            e.printStackTrace();
        }
    }
}