Example usage for org.apache.hadoop.io BytesWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable set.

Prototype

public void set(byte[] newData, int offset, int length)

Source Link

Document

Set the value to a copy of the given byte range

Usage

From source file:cascading.avro.CascadingToAvroTest.java

License:Apache License

@Test
public void testToAvroFixedUsesValidRangeOfBytesWritable() {
    Schema fieldSchema = schema.getField("aFixed").schema();
    BytesWritable bytes = new BytesWritable();
    byte[] old_buffer_value = { 0, 1, 2, 3 };
    bytes.set(old_buffer_value, 0, old_buffer_value.length);

    byte[] buffer_value = { 4, 5, 6 };
    bytes.set(buffer_value, 0, buffer_value.length);
    byte[] outBytes = ((Fixed) CascadingToAvro.toAvroFixed(bytes, fieldSchema)).bytes();

    assertThat(outBytes, is(buffer_value));
}

From source file:cascading.avro.CascadingToAvroTest.java

License:Apache License

@Test
public void testToAvroBytesUsesValidRangeOfBytesWritable() {
    Schema fieldSchema = schema.getField("aBytes").schema();
    BytesWritable bytes = (BytesWritable) tupleEntry.getObject("aBytes");
    byte[] old_buffer_value = { 0, 1, 2, 3 };
    bytes.set(old_buffer_value, 0, old_buffer_value.length);

    byte[] buffer_value = { 4, 5, 6 };
    ByteBuffer result = ByteBuffer.wrap(buffer_value);
    bytes.set(buffer_value, 0, buffer_value.length);
    ByteBuffer outBytes = (ByteBuffer) CascadingToAvro.toAvro(bytes, fieldSchema);

    assertThat(outBytes, is(result));//from  w  w w  .j  a va2s  . com
}

From source file:co.cask.cdap.data.stream.decoder.BytesStreamEventDecoder.java

License:Apache License

private BytesWritable getEventBody(StreamEvent event, BytesWritable result) {
    ByteBuffer body = event.getBody();
    if (body.hasArray()) {
        // If the ByteBuffer is backed by an array, which is exactly the same as exposed by the ByteBuffer
        // simply use the back array. No copying is needed (because read from stream->mapper is synchronous).
        // Creating a new BytesWritable is more efficient as it doesn't need to do array copy,
        // which BytesWritable.set() does.
        if (body.array().length == body.remaining()) {
            return new BytesWritable(body.array());
        }//from   ww  w .jav  a  2 s. c o m
        // Otherwise, need to copy the byte[], done by the BytesWritable.set() method
        result.set(body.array(), body.arrayOffset() + body.position(), body.remaining());
        return result;
    }

    // Otherwise, need to copy to a new array
    byte[] copy = new byte[body.remaining()];
    body.mark();
    body.get(copy);
    body.reset();
    return new BytesWritable(copy);
}

From source file:com.bixolabs.cascading.avro.AvroScheme.java

License:Apache License

private Object convertFromAvroPrimitive(Object inObj) {
    if (inObj == null) {
        return null;
    } else if (inObj instanceof Utf8) {
        String convertedObj = ((Utf8) inObj).toString();
        return convertedObj;
    } else if (inObj instanceof BytesWritable) {
        // TODO KKr - this is very inefficient, since we make a copy of
        // the ByteBuffer array in the call to BytesWritable.set(buffer, pos, length).
        // A potentially small win is to check if buffer.position() == 0, and if
        // so then do result = new BytesWritable(buffer.array()), followed by
        // result.setSize(buffer.limit())
        ByteBuffer buffer = (ByteBuffer) inObj;
        BytesWritable result = new BytesWritable();
        result.set(buffer.array(), buffer.position(), buffer.limit());
        return result;
    } else if (inObj instanceof Enum) {
        return inObj.toString();
    } else {//from   w  ww . j a v a  2  s.c o m
        return inObj;
    }
}

From source file:com.blackberry.logdriver.mapred.avro.AvroBlockRecordReader.java

License:Apache License

@Override
public boolean next(AvroFileHeader key, BytesWritable value) throws IOException {
    while (pos >= end) {
        if (in != null) {
            in.close();//from   ww w  .ja  va2 s  . c o m
        }
        currentFile++;
        if (split.getNumPaths() > currentFile) {
            try {
                initCurrentFile();
            } catch (IOException e) {
                LOG.error("Error reading Boom file header.  Skipping this file.", e);
                return false;
            }
        } else {
            return false;
        }
    }

    key.set(header);

    // Get the number of entries in the next block
    int entries = AvroUtils.readInt(in);
    byte[] block = null;
    try {
        block = AvroUtils.readBytes(in);
    } catch (IOException e) {
        LOG.error("Error reading block from file.  Skipping the rest of this file.", e);
        return false;
    }

    // Check that the sync marker is what we expect
    LOG.trace("Verifying sync marker");
    byte[] syncMarker = null;
    try {
        syncMarker = AvroUtils.readBytes(in, DataFileConstants.SYNC_SIZE);
    } catch (IOException e) {
        LOG.error("Error reading sync marker from file.  Skipping the rest of this file.", e);
        return false;
    }
    if (!Arrays.equals(syncMarker, header.getSyncMarker())) {
        LOG.error("Sync marker does not match");
        return false;
    }

    // Now, pack it all back into a byte[], and set the value of value
    {
        ByteBuffer bb = ByteBuffer.allocate(10 + 10 + block.length);
        bb.put(AvroUtils.encodeLong(entries));
        bb.put(AvroUtils.encodeLong(block.length));
        bb.put(block);
        byte[] result = new byte[bb.position()];
        bb.rewind();
        bb.get(result);
        value.set(result, 0, result.length);

        pos = in.getPos();
    }

    return true;
}

From source file:com.chinamobile.bcbsp.ml.HashMLWritePartition.java

License:Apache License

/**
 * This method is used to partition graph vertexes. Writing Each vertex to the
 * corresponding partition. In this method calls recordParse method to create
 * an HeadNode object. The last call partitioner's getPartitionId method to
 * calculate the HeadNode belongs to partition's id. If the HeadNode belongs
 * local partition then written to the local partition or send it to the
 * appropriate partition./* w  w  w . ja v  a  2  s  . c  om*/
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int send = 0;
    int lost = 0;
    ThreadPool tpool = new ThreadPool(this.sendThreadNum);
    int bufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER)
            / (this.staff.getStaffNum() + this.sendThreadNum);
    byte[][] buffer = new byte[this.staff.getStaffNum()][bufferSize];
    int[] bufindex = new int[this.staff.getStaffNum()];
    BytesWritable kbytes = new BytesWritable();
    int ksize = 0;
    BytesWritable vbytes = new BytesWritable();
    int vsize = 0;
    DataOutputBuffer bb = new DataOutputBuffer();
    try {
        this.keyserializer.open(bb);
        this.valueserializer.open(bb);
    } catch (IOException e) {
        throw e;
    }
    try {
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            int pid = -1;
            if (key != null) {
                pid = this.partitioner.getPartitionID(key);
            } else {
                lost++;
                continue;
            }
            if (pid == this.staff.getPartition()) {
                local++;

                KeyValuePair pair = (KeyValuePair) this.recordParse.recordParse(key.toString(),
                        value.toString());

                if (pair == null) {
                    lost++;
                    continue;
                }
                staff.getGraphData().addForAll(pair);
            } else {
                send++;
                bb.reset();
                this.keyserializer.serialize(key);
                kbytes.set(bb.getData(), 0, bb.getLength());
                ksize = kbytes.getLength();
                bb.reset();
                this.valueserializer.serialize(value);
                vbytes.set(bb.getData(), 0, bb.getLength());
                vsize = vbytes.getLength();
                if ((buffer[pid].length - bufindex[pid]) > (ksize + vsize)) {
                    System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                    bufindex[pid] += ksize;
                    System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                    bufindex[pid] += vsize;
                } else if (buffer[pid].length < (ksize + vsize)) {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(pid);
                    BytesWritable data = new BytesWritable();
                    byte[] tmp = new byte[vsize + ksize];
                    System.arraycopy(kbytes.getBytes(), 0, tmp, 0, ksize);
                    System.arraycopy(vbytes.getBytes(), 0, tmp, ksize, vsize);
                    data.set(tmp, 0, (ksize + vsize));
                    t.setData(data);
                    tmp = null;
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    LOG.info("this is a super record");
                    t.setStatus(true);
                } else {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(pid);
                    BytesWritable data = new BytesWritable();
                    data.set(buffer[pid], 0, bufindex[pid]);
                    t.setData(data);
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    t.setStatus(true);
                    bufindex[pid] = 0;
                    // store data
                    System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                    bufindex[pid] += ksize;
                    System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                    bufindex[pid] += vsize;
                }
            }
        }
        for (int i = 0; i < this.staff.getStaffNum(); i++) {
            if (bufindex[i] != 0) {
                ThreadSignle t = tpool.getThread();
                while (t == null) {
                    t = tpool.getThread();
                }
                t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i));
                t.setJobId(staff.getJobID());
                t.setTaskId(staff.getStaffID());
                t.setBelongPartition(i);
                BytesWritable data = new BytesWritable();
                data.set(buffer[i], 0, bufindex[i]);
                t.setData(data);
                LOG.info("Using Thread is: " + t.getThreadNumber());
                t.setStatus(true);
            }
        }
        tpool.cleanup();
        tpool = null;
        buffer = null;
        bufindex = null;
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of vertices that were sent to other partitions: " + send);
        LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    }
}

From source file:com.chinamobile.bcbsp.partition.HashWithBalancerWritePartition.java

License:Apache License

/**
 * This method is used to partition graph vertexes. Writing Each vertex to the
 * corresponding partition. In this method calls recordParse method to create
 * an HeadNode object. The last call partitioner's getPartitionId method to
 * calculate the HeadNode belongs to partition's id. If the HeadNode belongs
 * local partition then written to the local partition or send it to the
 * appropriate partition.//from   w  w  w .ja  v  a  2  s  .co  m
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int send = 0;
    int lost = 0;
    ThreadPool tpool = new ThreadPool(this.sendThreadNum);
    int staffNum = this.staff.getStaffNum();
    BytesWritable kbytes = new BytesWritable();
    int ksize = 0;
    BytesWritable vbytes = new BytesWritable();
    int vsize = 0;
    DataOutputBuffer bb = new DataOutputBuffer();
    int bufferSize = (int) ((this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) * PART);
    int dataBufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER)
            / (this.staff.getStaffNum() + this.sendThreadNum);
    byte[] buffer = new byte[bufferSize];
    int bufindex = 0;
    SerializationFactory sFactory = new SerializationFactory(new Configuration());
    Serializer<IntWritable> psserializer = sFactory.getSerializer(IntWritable.class);
    byte[] pidandsize = new byte[TIME * CONTAINERNUMBER * CONTAINERNUMBER];
    int psindex = 0;
    BytesWritable pidbytes = new BytesWritable();
    int psize = 0;
    BytesWritable sizebytes = new BytesWritable();
    int ssize = 0;
    try {
        this.keyserializer.open(bb);
        this.valueserializer.open(bb);
        psserializer.open(bb);
    } catch (IOException e) {
        throw e;
    }
    String path = "/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID();
    File dir = new File("/tmp/bcbsp/" + this.staff.getJobID());
    dir.mkdir();
    dir = new File("/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID());
    dir.mkdir();
    ArrayList<File> files = new ArrayList<File>();
    try {
        File file = new File(path + "/" + "data" + ".txt");
        files.add(file);
        DataOutputStream dataWriter = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(path + "/" + "data" + ".txt", true)));
        DataInputStream dataReader = new DataInputStream(
                new BufferedInputStream(new FileInputStream(path + "/" + "data" + ".txt")));
        File filet = new File(path + "/" + "pidandsize" + ".txt");
        files.add(filet);
        DataOutputStream psWriter = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(path + "/" + "pidandsize" + ".txt", true)));
        DataInputStream psReader = new DataInputStream(
                new BufferedInputStream(new FileInputStream(path + "/" + "pidandsize" + ".txt")));
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            int pid = -1;
            Text vertexID = this.recordParse.getVertexID(key);
            if (vertexID != null) {
                pid = this.partitioner.getPartitionID(vertexID);
            } else {
                lost++;
                continue;
            }
            if (this.counter.containsKey(pid)) {
                this.counter.put(pid, (this.counter.get(pid) + 1));
            } else {
                this.counter.put(pid, 1);
            }
            bb.reset();
            this.keyserializer.serialize(key);
            kbytes.set(bb.getData(), 0, bb.getLength());
            ksize = kbytes.getLength();
            bb.reset();
            this.valueserializer.serialize(value);
            vbytes.set(bb.getData(), 0, bb.getLength());
            vsize = vbytes.getLength();
            bb.reset();
            psserializer.serialize(new IntWritable(ksize + vsize));
            sizebytes.set(bb.getData(), 0, bb.getLength());
            ssize = sizebytes.getLength();
            bb.reset();
            psserializer.serialize(new IntWritable(pid));
            pidbytes.set(bb.getData(), 0, bb.getLength());
            psize = pidbytes.getLength();
            if ((pidandsize.length - psindex) > (ssize + psize)) {
                System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize);
                psindex += ssize;
                System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize);
                psindex += psize;
            } else {
                psWriter.write(pidandsize, 0, psindex);
                psindex = 0;
                System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize);
                psindex += ssize;
                System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize);
                psindex += psize;
            }
            if ((buffer.length - bufindex) > (ksize + vsize)) {
                System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize);
                bufindex += ksize;
                System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize);
                bufindex += vsize;
            } else if (buffer.length < (ksize + vsize)) {
                dataWriter.write(buffer, 0, bufindex);
                bufindex = 0;
                LOG.info("This is a super record");
                dataWriter.write(kbytes.getBytes(), 0, ksize);
                dataWriter.write(vbytes.getBytes(), 0, vsize);
            } else {
                dataWriter.write(buffer, 0, bufindex);
                bufindex = 0;
                System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize);
                bufindex += ksize;
                System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize);
                bufindex += vsize;
            }
        }
        if (psindex != 0) {
            psWriter.write(pidandsize, 0, psindex);
        }
        if (bufindex != 0) {
            dataWriter.write(buffer, 0, bufindex);
            bufindex = 0;
        }
        dataWriter.close();
        dataWriter = null;
        psWriter.close();
        psWriter = null;
        buffer = null;
        pidandsize = null;
        this.ssrc.setDirFlag(new String[] { "3" });
        this.ssrc.setCounter(this.counter);
        HashMap<Integer, Integer> hashBucketToPartition = this.sssc.loadDataInBalancerBarrier(ssrc,
                Constants.PARTITION_TYPE.HASH);
        this.staff.setHashBucketToPartition(hashBucketToPartition);
        byte[][] databuf = new byte[staffNum][dataBufferSize];
        int[] databufindex = new int[staffNum];
        try {
            IntWritable pid = new IntWritable();
            IntWritable size = new IntWritable();
            int belongPid = 0;
            while (true) {
                size.readFields(psReader);
                pid.readFields(psReader);
                belongPid = hashBucketToPartition.get(pid.get());
                if (belongPid != this.staff.getPartition()) {
                    send++;
                } else {
                    local++;
                }
                if ((databuf[belongPid].length - databufindex[belongPid]) > size.get()) {
                    dataReader.read(databuf[belongPid], databufindex[belongPid], size.get());
                    databufindex[belongPid] += size.get();
                } else if (databuf[belongPid].length < size.get()) {
                    LOG.info("This is a super record");
                    byte[] tmp = new byte[size.get()];
                    dataReader.read(tmp, 0, size.get());
                    if (belongPid == this.staff.getPartition()) {
                        DataInputStream reader = new DataInputStream(
                                new BufferedInputStream(new ByteArrayInputStream(tmp)));
                        try {
                            boolean stop = true;
                            while (stop) {
                                Text key = new Text();
                                key.readFields(reader);
                                Text value = new Text();
                                value.readFields(reader);
                                if (key.getLength() > 0 && value.getLength() > 0) {
                                    Vertex vertex = this.recordParse.recordParse(key.toString(),
                                            value.toString());
                                    if (vertex == null) {
                                        lost++;
                                        continue;
                                    }
                                    this.staff.getGraphData().addForAll(vertex);
                                } else {
                                    stop = false;
                                }
                            }
                        } catch (IOException e) {
                            LOG.info("IO exception: " + e.getStackTrace());
                        }
                    } else {
                        ThreadSignle t = tpool.getThread();
                        while (t == null) {
                            t = tpool.getThread();
                        }
                        t.setWorker(
                                this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid));
                        t.setJobId(staff.getJobID());
                        t.setTaskId(staff.getStaffID());
                        t.setBelongPartition(belongPid);
                        BytesWritable data = new BytesWritable();
                        data.set(tmp, 0, size.get());
                        t.setData(data);
                        LOG.info("Using Thread is: " + t.getThreadNumber());
                        t.setStatus(true);
                    }
                    tmp = null;
                } else {
                    if (belongPid == this.staff.getPartition()) {
                        DataInputStream reader = new DataInputStream(new BufferedInputStream(
                                new ByteArrayInputStream(databuf[belongPid], 0, databufindex[belongPid])));
                        try {
                            boolean stop = true;
                            while (stop) {
                                Text key = new Text();
                                key.readFields(reader);
                                Text value = new Text();
                                value.readFields(reader);
                                if (key.getLength() > 0 && value.getLength() > 0) {
                                    Vertex vertex = this.recordParse.recordParse(key.toString(),
                                            value.toString());
                                    if (vertex == null) {
                                        lost++;
                                        continue;
                                    }
                                    this.staff.getGraphData().addForAll(vertex);
                                } else {
                                    stop = false;
                                }
                            }
                        } catch (IOException e) {
                            LOG.info("IO exception: " + e.getStackTrace());
                        }
                    } else {
                        ThreadSignle t = tpool.getThread();
                        while (t == null) {
                            t = tpool.getThread();
                        }
                        t.setWorker(
                                this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid));
                        t.setJobId(staff.getJobID());
                        t.setTaskId(staff.getStaffID());
                        t.setBelongPartition(belongPid);
                        BytesWritable data = new BytesWritable();
                        data.set(databuf[belongPid], 0, databufindex[belongPid]);
                        t.setData(data);
                        LOG.info("Using Thread is: " + t.getThreadNumber());
                        t.setStatus(true);
                    }
                    databufindex[belongPid] = 0;
                    dataReader.read(databuf[belongPid], databufindex[belongPid], size.get());
                    databufindex[belongPid] += size.get();
                }
            }
        } catch (EOFException ex) {
            LOG.error("[write]", ex);
        }
        for (int i = 0; i < staffNum; i++) {
            if (databufindex[i] != 0) {
                if (i == this.staff.getPartition()) {
                    DataInputStream reader = new DataInputStream(
                            new BufferedInputStream(new ByteArrayInputStream(databuf[i], 0, databufindex[i])));
                    try {
                        boolean stop = true;
                        while (stop) {
                            Text key = new Text();
                            key.readFields(reader);
                            Text value = new Text();
                            value.readFields(reader);
                            if (key.getLength() > 0 && value.getLength() > 0) {
                                Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString());
                                if (vertex == null) {
                                    lost++;
                                    continue;
                                }
                                this.staff.getGraphData().addForAll(vertex);
                            } else {
                                stop = false;
                            }
                        }
                    } catch (IOException e) {
                        LOG.info("IO exception: " + e.getStackTrace());
                    }
                } else {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(i);
                    BytesWritable data = new BytesWritable();
                    data.set(databuf[i], 0, databufindex[i]);
                    t.setData(data);
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    t.setStatus(true);
                }
            }
        }
        dataReader.close();
        dataReader = null;
        psReader.close();
        psReader = null;
        for (File f : files) {
            f.delete();
        }
        dir.delete();
        dir = new File(path.substring(0, path.lastIndexOf('/')));
        dir.delete();
        tpool.cleanup();
        tpool = null;
        databuf = null;
        databufindex = null;
        this.counter = null;
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of vertices that were sent to other partitions: " + send);
        LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    } finally {
        for (File f : files) {
            f.delete();
        }
        dir.delete();
        dir = new File(path.substring(0, path.lastIndexOf('/')));
        dir.delete();
    }
}

From source file:com.chinamobile.bcbsp.partition.HashWritePartition.java

License:Apache License

/**
 * This method is used to partition graph vertexes. Writing Each vertex to the
 * corresponding partition. In this method calls recordParse method to create
 * an HeadNode object. The last call partitioner's getPartitionId method to
 * calculate the HeadNode belongs to partition's id. If the HeadNode belongs
 * local partition then written to the local partition or send it to the
 * appropriate partition./*from w w w  .jav  a  2  s .  c  o  m*/
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int send = 0;
    int lost = 0;
    ThreadPool tpool = new ThreadPool(this.sendThreadNum);
    int bufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER)
            / (this.staff.getStaffNum() + this.sendThreadNum);
    byte[][] buffer = new byte[this.staff.getStaffNum()][bufferSize];
    int[] bufindex = new int[this.staff.getStaffNum()];
    BytesWritable kbytes = new BytesWritable();
    int ksize = 0;
    BytesWritable vbytes = new BytesWritable();
    int vsize = 0;
    DataOutputBuffer bb = new DataOutputBuffer();
    try {
        this.keyserializer.open(bb);
        this.valueserializer.open(bb);
    } catch (IOException e) {
        throw e;
    }
    try {
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            int pid = -1;
            Text vertexID = this.recordParse.getVertexID(key);
            if (vertexID != null) {
                pid = this.partitioner.getPartitionID(vertexID);
            } else {
                lost++;
                continue;
            }
            if (pid == this.staff.getPartition()) {
                local++;
                Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString());
                if (vertex == null) {
                    lost++;
                    continue;
                }
                staff.getGraphData().addForAll(vertex);
            } else {
                send++;
                bb.reset();
                this.keyserializer.serialize(key);
                kbytes.set(bb.getData(), 0, bb.getLength());
                ksize = kbytes.getLength();
                bb.reset();
                this.valueserializer.serialize(value);
                vbytes.set(bb.getData(), 0, bb.getLength());
                vsize = vbytes.getLength();
                if ((buffer[pid].length - bufindex[pid]) > (ksize + vsize)) {
                    System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                    bufindex[pid] += ksize;
                    System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                    bufindex[pid] += vsize;
                } else if (buffer[pid].length < (ksize + vsize)) {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(pid);
                    BytesWritable data = new BytesWritable();
                    byte[] tmp = new byte[vsize + ksize];
                    System.arraycopy(kbytes.getBytes(), 0, tmp, 0, ksize);
                    System.arraycopy(vbytes.getBytes(), 0, tmp, ksize, vsize);
                    data.set(tmp, 0, (ksize + vsize));
                    t.setData(data);
                    tmp = null;
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    LOG.info("this is a super record");
                    t.setStatus(true);
                } else {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(pid);
                    BytesWritable data = new BytesWritable();
                    data.set(buffer[pid], 0, bufindex[pid]);
                    t.setData(data);
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    t.setStatus(true);
                    bufindex[pid] = 0;
                    // store data
                    System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                    bufindex[pid] += ksize;
                    System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                    bufindex[pid] += vsize;
                }
            }
        }
        for (int i = 0; i < this.staff.getStaffNum(); i++) {
            if (bufindex[i] != 0) {
                ThreadSignle t = tpool.getThread();
                while (t == null) {
                    t = tpool.getThread();
                }
                t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i));
                t.setJobId(staff.getJobID());
                t.setTaskId(staff.getStaffID());
                t.setBelongPartition(i);
                BytesWritable data = new BytesWritable();
                data.set(buffer[i], 0, bufindex[i]);
                t.setData(data);
                LOG.info("Using Thread is: " + t.getThreadNumber());
                t.setStatus(true);
            }
        }
        tpool.cleanup();
        tpool = null;
        buffer = null;
        bufindex = null;
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of vertices that were sent to other partitions: " + send);
        LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    }
}

From source file:com.cloudera.recordservice.examples.terasort.TeraValidate.java

License:Apache License

private static String textifyBytes(Text t) {
    BytesWritable b = new BytesWritable();
    b.set(t.getBytes(), 0, t.getLength());
    return b.toString();
}

From source file:com.ebay.nest.io.sede.binarysortable.BinarySortableSerDe.java

License:Apache License

static Object deserialize(InputByteBuffer buffer, TypeInfo type, boolean invert, Object reuse)
        throws IOException {

    // Is this field a null?
    byte isNull = buffer.read(invert);
    if (isNull == 0) {
        return null;
    }/*ww  w .ja  v a 2 s.c o m*/
    assert (isNull == 1);

    switch (type.getCategory()) {
    case PRIMITIVE: {
        PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type;
        switch (ptype.getPrimitiveCategory()) {
        case VOID: {
            return null;
        }
        case BOOLEAN: {
            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
            byte b = buffer.read(invert);
            assert (b == 1 || b == 2);
            r.set(b == 2);
            return r;
        }
        case BYTE: {
            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
            r.set((byte) (buffer.read(invert) ^ 0x80));
            return r;
        }
        case SHORT: {
            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
            int v = buffer.read(invert) ^ 0x80;
            v = (v << 8) + (buffer.read(invert) & 0xff);
            r.set((short) v);
            return r;
        }
        case INT: {
            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
            r.set(deserializeInt(buffer, invert));
            return r;
        }
        case LONG: {
            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
            long v = buffer.read(invert) ^ 0x80;
            for (int i = 0; i < 7; i++) {
                v = (v << 8) + (buffer.read(invert) & 0xff);
            }
            r.set(v);
            return r;
        }
        case FLOAT: {
            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
            int v = 0;
            for (int i = 0; i < 4; i++) {
                v = (v << 8) + (buffer.read(invert) & 0xff);
            }
            if ((v & (1 << 31)) == 0) {
                // negative number, flip all bits
                v = ~v;
            } else {
                // positive number, flip the first bit
                v = v ^ (1 << 31);
            }
            r.set(Float.intBitsToFloat(v));
            return r;
        }
        case DOUBLE: {
            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
            long v = 0;
            for (int i = 0; i < 8; i++) {
                v = (v << 8) + (buffer.read(invert) & 0xff);
            }
            if ((v & (1L << 63)) == 0) {
                // negative number, flip all bits
                v = ~v;
            } else {
                // positive number, flip the first bit
                v = v ^ (1L << 63);
            }
            r.set(Double.longBitsToDouble(v));
            return r;
        }
        case STRING: {
            Text r = reuse == null ? new Text() : (Text) reuse;
            return deserializeText(buffer, invert, r);
        }

        case VARCHAR: {
            HiveVarcharWritable r = reuse == null ? new HiveVarcharWritable() : (HiveVarcharWritable) reuse;
            // Use HiveVarchar's internal Text member to read the value.
            deserializeText(buffer, invert, r.getTextValue());
            // If we cache helper data for deserialization we could avoid having
            // to call getVarcharMaxLength() on every deserialize call.
            r.enforceMaxLength(getVarcharMaxLength(type));
            return r;
        }

        case BINARY: {
            BytesWritable bw = new BytesWritable();
            // Get the actual length first
            int start = buffer.tell();
            int length = 0;
            do {
                byte b = buffer.read(invert);
                if (b == 0) {
                    // end of string
                    break;
                }
                if (b == 1) {
                    // the last char is an escape char. read the actual char
                    buffer.read(invert);
                }
                length++;
            } while (true);

            if (length == buffer.tell() - start) {
                // No escaping happened, so we are already done.
                bw.set(buffer.getData(), start, length);
            } else {
                // Escaping happened, we need to copy byte-by-byte.
                // 1. Set the length first.
                bw.set(buffer.getData(), start, length);
                // 2. Reset the pointer.
                buffer.seek(start);
                // 3. Copy the data.
                byte[] rdata = bw.getBytes();
                for (int i = 0; i < length; i++) {
                    byte b = buffer.read(invert);
                    if (b == 1) {
                        // The last char is an escape char, read the actual char.
                        // The serialization format escape \0 to \1, and \1 to \2,
                        // to make sure the string is null-terminated.
                        b = (byte) (buffer.read(invert) - 1);
                    }
                    rdata[i] = b;
                }
                // 4. Read the null terminator.
                byte b = buffer.read(invert);
                assert (b == 0);
            }
            return bw;
        }

        case DATE: {
            DateWritable d = reuse == null ? new DateWritable() : (DateWritable) reuse;
            d.set(deserializeInt(buffer, invert));
            return d;
        }

        case TIMESTAMP:
            TimestampWritable t = (reuse == null ? new TimestampWritable() : (TimestampWritable) reuse);
            byte[] bytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH];

            for (int i = 0; i < bytes.length; i++) {
                bytes[i] = buffer.read(invert);
            }
            t.setBinarySortable(bytes, 0);
            return t;

        case DECIMAL: {
            // See serialization of decimal for explanation (below)

            HiveDecimalWritable bdw = (reuse == null ? new HiveDecimalWritable() : (HiveDecimalWritable) reuse);

            int b = buffer.read(invert) - 1;
            assert (b == 1 || b == -1 || b == 0);
            boolean positive = b != -1;

            int factor = buffer.read(invert) ^ 0x80;
            for (int i = 0; i < 3; i++) {
                factor = (factor << 8) + (buffer.read(invert) & 0xff);
            }

            if (!positive) {
                factor = -factor;
            }

            int start = buffer.tell();
            int length = 0;

            do {
                b = buffer.read(positive ? invert : !invert);
                assert (b != 1);

                if (b == 0) {
                    // end of digits
                    break;
                }

                length++;
            } while (true);

            if (decimalBuffer == null || decimalBuffer.length < length) {
                decimalBuffer = new byte[length];
            }

            buffer.seek(start);
            for (int i = 0; i < length; ++i) {
                decimalBuffer[i] = buffer.read(positive ? invert : !invert);
            }

            // read the null byte again
            buffer.read(positive ? invert : !invert);

            String digits = new String(decimalBuffer, 0, length, decimalCharSet);
            BigInteger bi = new BigInteger(digits);
            HiveDecimal bd = new HiveDecimal(bi).scaleByPowerOfTen(factor - length);

            if (!positive) {
                bd = bd.negate();
            }

            bdw.set(bd);
            return bdw;
        }

        default: {
            throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory());
        }
        }
    }

    case LIST: {
        ListTypeInfo ltype = (ListTypeInfo) type;
        TypeInfo etype = ltype.getListElementTypeInfo();

        // Create the list if needed
        ArrayList<Object> r = reuse == null ? new ArrayList<Object>() : (ArrayList<Object>) reuse;

        // Read the list
        int size = 0;
        while (true) {
            int more = buffer.read(invert);
            if (more == 0) {
                // \0 to terminate
                break;
            }
            // \1 followed by each element
            assert (more == 1);
            if (size == r.size()) {
                r.add(null);
            }
            r.set(size, deserialize(buffer, etype, invert, r.get(size)));
            size++;
        }
        // Remove additional elements if the list is reused
        while (r.size() > size) {
            r.remove(r.size() - 1);
        }
        return r;
    }
    case MAP: {
        MapTypeInfo mtype = (MapTypeInfo) type;
        TypeInfo ktype = mtype.getMapKeyTypeInfo();
        TypeInfo vtype = mtype.getMapValueTypeInfo();

        // Create the map if needed
        Map<Object, Object> r;
        if (reuse == null) {
            r = new HashMap<Object, Object>();
        } else {
            r = (HashMap<Object, Object>) reuse;
            r.clear();
        }

        while (true) {
            int more = buffer.read(invert);
            if (more == 0) {
                // \0 to terminate
                break;
            }
            // \1 followed by each key and then each value
            assert (more == 1);
            Object k = deserialize(buffer, ktype, invert, null);
            Object v = deserialize(buffer, vtype, invert, null);
            r.put(k, v);
        }
        return r;
    }
    case STRUCT: {
        StructTypeInfo stype = (StructTypeInfo) type;
        List<TypeInfo> fieldTypes = stype.getAllStructFieldTypeInfos();
        int size = fieldTypes.size();
        // Create the struct if needed
        ArrayList<Object> r = reuse == null ? new ArrayList<Object>(size) : (ArrayList<Object>) reuse;
        assert (r.size() <= size);
        // Set the size of the struct
        while (r.size() < size) {
            r.add(null);
        }
        // Read one field by one field
        for (int eid = 0; eid < size; eid++) {
            r.set(eid, deserialize(buffer, fieldTypes.get(eid), invert, r.get(eid)));
        }
        return r;
    }
    case UNION: {
        UnionTypeInfo utype = (UnionTypeInfo) type;
        StandardUnion r = reuse == null ? new StandardUnion() : (StandardUnion) reuse;
        // Read the tag
        byte tag = buffer.read(invert);
        r.setTag(tag);
        r.setObject(deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), invert, null));
        return r;
    }
    default: {
        throw new RuntimeException("Unrecognized type: " + type.getCategory());
    }
    }
}