Example usage for org.apache.hadoop.io BytesWritable set

List of usage examples for org.apache.hadoop.io BytesWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable set.

Prototype

public void set(byte[] newData, int offset, int length) 

Source Link

Document

Set the value to a copy of the given byte range

Usage

From source file:cascading.avro.CascadingToAvroTest.java

License:Apache License

@Test
public void testToAvroFixedUsesValidRangeOfBytesWritable() {
    Schema fieldSchema = schema.getField("aFixed").schema();
    BytesWritable bytes = new BytesWritable();
    byte[] old_buffer_value = { 0, 1, 2, 3 };
    bytes.set(old_buffer_value, 0, old_buffer_value.length);

    byte[] buffer_value = { 4, 5, 6 };
    bytes.set(buffer_value, 0, buffer_value.length);
    byte[] outBytes = ((Fixed) CascadingToAvro.toAvroFixed(bytes, fieldSchema)).bytes();

    assertThat(outBytes, is(buffer_value));
}

From source file:cascading.avro.CascadingToAvroTest.java

License:Apache License

@Test
public void testToAvroBytesUsesValidRangeOfBytesWritable() {
    Schema fieldSchema = schema.getField("aBytes").schema();
    BytesWritable bytes = (BytesWritable) tupleEntry.getObject("aBytes");
    byte[] old_buffer_value = { 0, 1, 2, 3 };
    bytes.set(old_buffer_value, 0, old_buffer_value.length);

    byte[] buffer_value = { 4, 5, 6 };
    ByteBuffer result = ByteBuffer.wrap(buffer_value);
    bytes.set(buffer_value, 0, buffer_value.length);
    ByteBuffer outBytes = (ByteBuffer) CascadingToAvro.toAvro(bytes, fieldSchema);

    assertThat(outBytes, is(result));//from  w  w w  .j  a va2s  . com
}

From source file:co.cask.cdap.data.stream.decoder.BytesStreamEventDecoder.java

License:Apache License

private BytesWritable getEventBody(StreamEvent event, BytesWritable result) {
    ByteBuffer body = event.getBody();
    if (body.hasArray()) {
        // If the ByteBuffer is backed by an array, which is exactly the same as exposed by the ByteBuffer
        // simply use the back array. No copying is needed (because read from stream->mapper is synchronous).
        // Creating a new BytesWritable is more efficient as it doesn't need to do array copy,
        // which BytesWritable.set() does.
        if (body.array().length == body.remaining()) {
            return new BytesWritable(body.array());
        }//from   ww  w .jav  a  2 s. c o m
        // Otherwise, need to copy the byte[], done by the BytesWritable.set() method
        result.set(body.array(), body.arrayOffset() + body.position(), body.remaining());
        return result;
    }

    // Otherwise, need to copy to a new array
    byte[] copy = new byte[body.remaining()];
    body.mark();
    body.get(copy);
    body.reset();
    return new BytesWritable(copy);
}

From source file:com.bixolabs.cascading.avro.AvroScheme.java

License:Apache License

private Object convertFromAvroPrimitive(Object inObj) {
    if (inObj == null) {
        return null;
    } else if (inObj instanceof Utf8) {
        String convertedObj = ((Utf8) inObj).toString();
        return convertedObj;
    } else if (inObj instanceof BytesWritable) {
        // TODO KKr - this is very inefficient, since we make a copy of
        // the ByteBuffer array in the call to BytesWritable.set(buffer, pos, length).
        // A potentially small win is to check if buffer.position() == 0, and if
        // so then do result = new BytesWritable(buffer.array()), followed by
        // result.setSize(buffer.limit())
        ByteBuffer buffer = (ByteBuffer) inObj;
        BytesWritable result = new BytesWritable();
        result.set(buffer.array(), buffer.position(), buffer.limit());
        return result;
    } else if (inObj instanceof Enum) {
        return inObj.toString();
    } else {//from   w  ww . j a v a  2  s.c o m
        return inObj;
    }
}

From source file:com.blackberry.logdriver.mapred.avro.AvroBlockRecordReader.java

License:Apache License

@Override
public boolean next(AvroFileHeader key, BytesWritable value) throws IOException {
    while (pos >= end) {
        if (in != null) {
            in.close();//from   ww w  .ja  va2 s  . c o m
        }
        currentFile++;
        if (split.getNumPaths() > currentFile) {
            try {
                initCurrentFile();
            } catch (IOException e) {
                LOG.error("Error reading Boom file header.  Skipping this file.", e);
                return false;
            }
        } else {
            return false;
        }
    }

    key.set(header);

    // Get the number of entries in the next block
    int entries = AvroUtils.readInt(in);
    byte[] block = null;
    try {
        block = AvroUtils.readBytes(in);
    } catch (IOException e) {
        LOG.error("Error reading block from file.  Skipping the rest of this file.", e);
        return false;
    }

    // Check that the sync marker is what we expect
    LOG.trace("Verifying sync marker");
    byte[] syncMarker = null;
    try {
        syncMarker = AvroUtils.readBytes(in, DataFileConstants.SYNC_SIZE);
    } catch (IOException e) {
        LOG.error("Error reading sync marker from file.  Skipping the rest of this file.", e);
        return false;
    }
    if (!Arrays.equals(syncMarker, header.getSyncMarker())) {
        LOG.error("Sync marker does not match");
        return false;
    }

    // Now, pack it all back into a byte[], and set the value of value
    {
        ByteBuffer bb = ByteBuffer.allocate(10 + 10 + block.length);
        bb.put(AvroUtils.encodeLong(entries));
        bb.put(AvroUtils.encodeLong(block.length));
        bb.put(block);
        byte[] result = new byte[bb.position()];
        bb.rewind();
        bb.get(result);
        value.set(result, 0, result.length);

        pos = in.getPos();
    }

    return true;
}

From source file:com.chinamobile.bcbsp.ml.HashMLWritePartition.java

License:Apache License

/**
 * This method is used to partition graph vertexes. Writing Each vertex to the
 * corresponding partition. In this method calls recordParse method to create
 * an HeadNode object. The last call partitioner's getPartitionId method to
 * calculate the HeadNode belongs to partition's id. If the HeadNode belongs
 * local partition then written to the local partition or send it to the
 * appropriate partition./* w  w  w . ja v  a  2  s  . c  om*/
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int send = 0;
    int lost = 0;
    ThreadPool tpool = new ThreadPool(this.sendThreadNum);
    int bufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER)
            / (this.staff.getStaffNum() + this.sendThreadNum);
    byte[][] buffer = new byte[this.staff.getStaffNum()][bufferSize];
    int[] bufindex = new int[this.staff.getStaffNum()];
    BytesWritable kbytes = new BytesWritable();
    int ksize = 0;
    BytesWritable vbytes = new BytesWritable();
    int vsize = 0;
    DataOutputBuffer bb = new DataOutputBuffer();
    try {
        this.keyserializer.open(bb);
        this.valueserializer.open(bb);
    } catch (IOException e) {
        throw e;
    }
    try {
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            int pid = -1;
            if (key != null) {
                pid = this.partitioner.getPartitionID(key);
            } else {
                lost++;
                continue;
            }
            if (pid == this.staff.getPartition()) {
                local++;

                KeyValuePair pair = (KeyValuePair) this.recordParse.recordParse(key.toString(),
                        value.toString());

                if (pair == null) {
                    lost++;
                    continue;
                }
                staff.getGraphData().addForAll(pair);
            } else {
                send++;
                bb.reset();
                this.keyserializer.serialize(key);
                kbytes.set(bb.getData(), 0, bb.getLength());
                ksize = kbytes.getLength();
                bb.reset();
                this.valueserializer.serialize(value);
                vbytes.set(bb.getData(), 0, bb.getLength());
                vsize = vbytes.getLength();
                if ((buffer[pid].length - bufindex[pid]) > (ksize + vsize)) {
                    System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                    bufindex[pid] += ksize;
                    System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                    bufindex[pid] += vsize;
                } else if (buffer[pid].length < (ksize + vsize)) {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(pid);
                    BytesWritable data = new BytesWritable();
                    byte[] tmp = new byte[vsize + ksize];
                    System.arraycopy(kbytes.getBytes(), 0, tmp, 0, ksize);
                    System.arraycopy(vbytes.getBytes(), 0, tmp, ksize, vsize);
                    data.set(tmp, 0, (ksize + vsize));
                    t.setData(data);
                    tmp = null;
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    LOG.info("this is a super record");
                    t.setStatus(true);
                } else {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(pid);
                    BytesWritable data = new BytesWritable();
                    data.set(buffer[pid], 0, bufindex[pid]);
                    t.setData(data);
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    t.setStatus(true);
                    bufindex[pid] = 0;
                    // store data
                    System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                    bufindex[pid] += ksize;
                    System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                    bufindex[pid] += vsize;
                }
            }
        }
        for (int i = 0; i < this.staff.getStaffNum(); i++) {
            if (bufindex[i] != 0) {
                ThreadSignle t = tpool.getThread();
                while (t == null) {
                    t = tpool.getThread();
                }
                t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i));
                t.setJobId(staff.getJobID());
                t.setTaskId(staff.getStaffID());
                t.setBelongPartition(i);
                BytesWritable data = new BytesWritable();
                data.set(buffer[i], 0, bufindex[i]);
                t.setData(data);
                LOG.info("Using Thread is: " + t.getThreadNumber());
                t.setStatus(true);
            }
        }
        tpool.cleanup();
        tpool = null;
        buffer = null;
        bufindex = null;
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of vertices that were sent to other partitions: " + send);
        LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    }
}

From source file:com.chinamobile.bcbsp.partition.HashWithBalancerWritePartition.java

License:Apache License

/**
 * This method is used to partition graph vertexes. Writing Each vertex to the
 * corresponding partition. In this method calls recordParse method to create
 * an HeadNode object. The last call partitioner's getPartitionId method to
 * calculate the HeadNode belongs to partition's id. If the HeadNode belongs
 * local partition then written to the local partition or send it to the
 * appropriate partition.//from   w  w  w .ja  v  a  2  s  .co  m
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int send = 0;
    int lost = 0;
    ThreadPool tpool = new ThreadPool(this.sendThreadNum);
    int staffNum = this.staff.getStaffNum();
    BytesWritable kbytes = new BytesWritable();
    int ksize = 0;
    BytesWritable vbytes = new BytesWritable();
    int vsize = 0;
    DataOutputBuffer bb = new DataOutputBuffer();
    int bufferSize = (int) ((this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) * PART);
    int dataBufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER)
            / (this.staff.getStaffNum() + this.sendThreadNum);
    byte[] buffer = new byte[bufferSize];
    int bufindex = 0;
    SerializationFactory sFactory = new SerializationFactory(new Configuration());
    Serializer<IntWritable> psserializer = sFactory.getSerializer(IntWritable.class);
    byte[] pidandsize = new byte[TIME * CONTAINERNUMBER * CONTAINERNUMBER];
    int psindex = 0;
    BytesWritable pidbytes = new BytesWritable();
    int psize = 0;
    BytesWritable sizebytes = new BytesWritable();
    int ssize = 0;
    try {
        this.keyserializer.open(bb);
        this.valueserializer.open(bb);
        psserializer.open(bb);
    } catch (IOException e) {
        throw e;
    }
    String path = "/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID();
    File dir = new File("/tmp/bcbsp/" + this.staff.getJobID());
    dir.mkdir();
    dir = new File("/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID());
    dir.mkdir();
    ArrayList<File> files = new ArrayList<File>();
    try {
        File file = new File(path + "/" + "data" + ".txt");
        files.add(file);
        DataOutputStream dataWriter = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(path + "/" + "data" + ".txt", true)));
        DataInputStream dataReader = new DataInputStream(
                new BufferedInputStream(new FileInputStream(path + "/" + "data" + ".txt")));
        File filet = new File(path + "/" + "pidandsize" + ".txt");
        files.add(filet);
        DataOutputStream psWriter = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(path + "/" + "pidandsize" + ".txt", true)));
        DataInputStream psReader = new DataInputStream(
                new BufferedInputStream(new FileInputStream(path + "/" + "pidandsize" + ".txt")));
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            int pid = -1;
            Text vertexID = this.recordParse.getVertexID(key);
            if (vertexID != null) {
                pid = this.partitioner.getPartitionID(vertexID);
            } else {
                lost++;
                continue;
            }
            if (this.counter.containsKey(pid)) {
                this.counter.put(pid, (this.counter.get(pid) + 1));
            } else {
                this.counter.put(pid, 1);
            }
            bb.reset();
            this.keyserializer.serialize(key);
            kbytes.set(bb.getData(), 0, bb.getLength());
            ksize = kbytes.getLength();
            bb.reset();
            this.valueserializer.serialize(value);
            vbytes.set(bb.getData(), 0, bb.getLength());
            vsize = vbytes.getLength();
            bb.reset();
            psserializer.serialize(new IntWritable(ksize + vsize));
            sizebytes.set(bb.getData(), 0, bb.getLength());
            ssize = sizebytes.getLength();
            bb.reset();
            psserializer.serialize(new IntWritable(pid));
            pidbytes.set(bb.getData(), 0, bb.getLength());
            psize = pidbytes.getLength();
            if ((pidandsize.length - psindex) > (ssize + psize)) {
                System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize);
                psindex += ssize;
                System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize);
                psindex += psize;
            } else {
                psWriter.write(pidandsize, 0, psindex);
                psindex = 0;
                System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize);
                psindex += ssize;
                System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize);
                psindex += psize;
            }
            if ((buffer.length - bufindex) > (ksize + vsize)) {
                System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize);
                bufindex += ksize;
                System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize);
                bufindex += vsize;
            } else if (buffer.length < (ksize + vsize)) {
                dataWriter.write(buffer, 0, bufindex);
                bufindex = 0;
                LOG.info("This is a super record");
                dataWriter.write(kbytes.getBytes(), 0, ksize);
                dataWriter.write(vbytes.getBytes(), 0, vsize);
            } else {
                dataWriter.write(buffer, 0, bufindex);
                bufindex = 0;
                System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize);
                bufindex += ksize;
                System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize);
                bufindex += vsize;
            }
        }
        if (psindex != 0) {
            psWriter.write(pidandsize, 0, psindex);
        }
        if (bufindex != 0) {
            dataWriter.write(buffer, 0, bufindex);
            bufindex = 0;
        }
        dataWriter.close();
        dataWriter = null;
        psWriter.close();
        psWriter = null;
        buffer = null;
        pidandsize = null;
        this.ssrc.setDirFlag(new String[] { "3" });
        this.ssrc.setCounter(this.counter);
        HashMap<Integer, Integer> hashBucketToPartition = this.sssc.loadDataInBalancerBarrier(ssrc,
                Constants.PARTITION_TYPE.HASH);
        this.staff.setHashBucketToPartition(hashBucketToPartition);
        byte[][] databuf = new byte[staffNum][dataBufferSize];
        int[] databufindex = new int[staffNum];
        try {
            IntWritable pid = new IntWritable();
            IntWritable size = new IntWritable();
            int belongPid = 0;
            while (true) {
                size.readFields(psReader);
                pid.readFields(psReader);
                belongPid = hashBucketToPartition.get(pid.get());
                if (belongPid != this.staff.getPartition()) {
                    send++;
                } else {
                    local++;
                }
                if ((databuf[belongPid].length - databufindex[belongPid]) > size.get()) {
                    dataReader.read(databuf[belongPid], databufindex[belongPid], size.get());
                    databufindex[belongPid] += size.get();
                } else if (databuf[belongPid].length < size.get()) {
                    LOG.info("This is a super record");
                    byte[] tmp = new byte[size.get()];
                    dataReader.read(tmp, 0, size.get());
                    if (belongPid == this.staff.getPartition()) {
                        DataInputStream reader = new DataInputStream(
                                new BufferedInputStream(new ByteArrayInputStream(tmp)));
                        try {
                            boolean stop = true;
                            while (stop) {
                                Text key = new Text();
                                key.readFields(reader);
                                Text value = new Text();
                                value.readFields(reader);
                                if (key.getLength() > 0 && value.getLength() > 0) {
                                    Vertex vertex = this.recordParse.recordParse(key.toString(),
                                            value.toString());
                                    if (vertex == null) {
                                        lost++;
                                        continue;
                                    }
                                    this.staff.getGraphData().addForAll(vertex);
                                } else {
                                    stop = false;
                                }
                            }
                        } catch (IOException e) {
                            LOG.info("IO exception: " + e.getStackTrace());
                        }
                    } else {
                        ThreadSignle t = tpool.getThread();
                        while (t == null) {
                            t = tpool.getThread();
                        }
                        t.setWorker(
                                this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid));
                        t.setJobId(staff.getJobID());
                        t.setTaskId(staff.getStaffID());
                        t.setBelongPartition(belongPid);
                        BytesWritable data = new BytesWritable();
                        data.set(tmp, 0, size.get());
                        t.setData(data);
                        LOG.info("Using Thread is: " + t.getThreadNumber());
                        t.setStatus(true);
                    }
                    tmp = null;
                } else {
                    if (belongPid == this.staff.getPartition()) {
                        DataInputStream reader = new DataInputStream(new BufferedInputStream(
                                new ByteArrayInputStream(databuf[belongPid], 0, databufindex[belongPid])));
                        try {
                            boolean stop = true;
                            while (stop) {
                                Text key = new Text();
                                key.readFields(reader);
                                Text value = new Text();
                                value.readFields(reader);
                                if (key.getLength() > 0 && value.getLength() > 0) {
                                    Vertex vertex = this.recordParse.recordParse(key.toString(),
                                            value.toString());
                                    if (vertex == null) {
                                        lost++;
                                        continue;
                                    }
                                    this.staff.getGraphData().addForAll(vertex);
                                } else {
                                    stop = false;
                                }
                            }
                        } catch (IOException e) {
                            LOG.info("IO exception: " + e.getStackTrace());
                        }
                    } else {
                        ThreadSignle t = tpool.getThread();
                        while (t == null) {
                            t = tpool.getThread();
                        }
                        t.setWorker(
                                this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid));
                        t.setJobId(staff.getJobID());
                        t.setTaskId(staff.getStaffID());
                        t.setBelongPartition(belongPid);
                        BytesWritable data = new BytesWritable();
                        data.set(databuf[belongPid], 0, databufindex[belongPid]);
                        t.setData(data);
                        LOG.info("Using Thread is: " + t.getThreadNumber());
                        t.setStatus(true);
                    }
                    databufindex[belongPid] = 0;
                    dataReader.read(databuf[belongPid], databufindex[belongPid], size.get());
                    databufindex[belongPid] += size.get();
                }
            }
        } catch (EOFException ex) {
            LOG.error("[write]", ex);
        }
        for (int i = 0; i < staffNum; i++) {
            if (databufindex[i] != 0) {
                if (i == this.staff.getPartition()) {
                    DataInputStream reader = new DataInputStream(
                            new BufferedInputStream(new ByteArrayInputStream(databuf[i], 0, databufindex[i])));
                    try {
                        boolean stop = true;
                        while (stop) {
                            Text key = new Text();
                            key.readFields(reader);
                            Text value = new Text();
                            value.readFields(reader);
                            if (key.getLength() > 0 && value.getLength() > 0) {
                                Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString());
                                if (vertex == null) {
                                    lost++;
                                    continue;
                                }
                                this.staff.getGraphData().addForAll(vertex);
                            } else {
                                stop = false;
                            }
                        }
                    } catch (IOException e) {
                        LOG.info("IO exception: " + e.getStackTrace());
                    }
                } else {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(i);
                    BytesWritable data = new BytesWritable();
                    data.set(databuf[i], 0, databufindex[i]);
                    t.setData(data);
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    t.setStatus(true);
                }
            }
        }
        dataReader.close();
        dataReader = null;
        psReader.close();
        psReader = null;
        for (File f : files) {
            f.delete();
        }
        dir.delete();
        dir = new File(path.substring(0, path.lastIndexOf('/')));
        dir.delete();
        tpool.cleanup();
        tpool = null;
        databuf = null;
        databufindex = null;
        this.counter = null;
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of vertices that were sent to other partitions: " + send);
        LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    } finally {
        for (File f : files) {
            f.delete();
        }
        dir.delete();
        dir = new File(path.substring(0, path.lastIndexOf('/')));
        dir.delete();
    }
}

From source file:com.chinamobile.bcbsp.partition.HashWritePartition.java

License:Apache License

/**
 * This method is used to partition graph vertexes. Writing Each vertex to the
 * corresponding partition. In this method calls recordParse method to create
 * an HeadNode object. The last call partitioner's getPartitionId method to
 * calculate the HeadNode belongs to partition's id. If the HeadNode belongs
 * local partition then written to the local partition or send it to the
 * appropriate partition./*from w w w  .jav  a  2  s .  c  o  m*/
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int send = 0;
    int lost = 0;
    ThreadPool tpool = new ThreadPool(this.sendThreadNum);
    int bufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER)
            / (this.staff.getStaffNum() + this.sendThreadNum);
    byte[][] buffer = new byte[this.staff.getStaffNum()][bufferSize];
    int[] bufindex = new int[this.staff.getStaffNum()];
    BytesWritable kbytes = new BytesWritable();
    int ksize = 0;
    BytesWritable vbytes = new BytesWritable();
    int vsize = 0;
    DataOutputBuffer bb = new DataOutputBuffer();
    try {
        this.keyserializer.open(bb);
        this.valueserializer.open(bb);
    } catch (IOException e) {
        throw e;
    }
    try {
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            int pid = -1;
            Text vertexID = this.recordParse.getVertexID(key);
            if (vertexID != null) {
                pid = this.partitioner.getPartitionID(vertexID);
            } else {
                lost++;
                continue;
            }
            if (pid == this.staff.getPartition()) {
                local++;
                Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString());
                if (vertex == null) {
                    lost++;
                    continue;
                }
                staff.getGraphData().addForAll(vertex);
            } else {
                send++;
                bb.reset();
                this.keyserializer.serialize(key);
                kbytes.set(bb.getData(), 0, bb.getLength());
                ksize = kbytes.getLength();
                bb.reset();
                this.valueserializer.serialize(value);
                vbytes.set(bb.getData(), 0, bb.getLength());
                vsize = vbytes.getLength();
                if ((buffer[pid].length - bufindex[pid]) > (ksize + vsize)) {
                    System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                    bufindex[pid] += ksize;
                    System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                    bufindex[pid] += vsize;
                } else if (buffer[pid].length < (ksize + vsize)) {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(pid);
                    BytesWritable data = new BytesWritable();
                    byte[] tmp = new byte[vsize + ksize];
                    System.arraycopy(kbytes.getBytes(), 0, tmp, 0, ksize);
                    System.arraycopy(vbytes.getBytes(), 0, tmp, ksize, vsize);
                    data.set(tmp, 0, (ksize + vsize));
                    t.setData(data);
                    tmp = null;
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    LOG.info("this is a super record");
                    t.setStatus(true);
                } else {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), pid));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(pid);
                    BytesWritable data = new BytesWritable();
                    data.set(buffer[pid], 0, bufindex[pid]);
                    t.setData(data);
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    t.setStatus(true);
                    bufindex[pid] = 0;
                    // store data
                    System.arraycopy(kbytes.getBytes(), 0, buffer[pid], bufindex[pid], ksize);
                    bufindex[pid] += ksize;
                    System.arraycopy(vbytes.getBytes(), 0, buffer[pid], bufindex[pid], vsize);
                    bufindex[pid] += vsize;
                }
            }
        }
        for (int i = 0; i < this.staff.getStaffNum(); i++) {
            if (bufindex[i] != 0) {
                ThreadSignle t = tpool.getThread();
                while (t == null) {
                    t = tpool.getThread();
                }
                t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i));
                t.setJobId(staff.getJobID());
                t.setTaskId(staff.getStaffID());
                t.setBelongPartition(i);
                BytesWritable data = new BytesWritable();
                data.set(buffer[i], 0, bufindex[i]);
                t.setData(data);
                LOG.info("Using Thread is: " + t.getThreadNumber());
                t.setStatus(true);
            }
        }
        tpool.cleanup();
        tpool = null;
        buffer = null;
        bufindex = null;
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of vertices that were sent to other partitions: " + send);
        LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    }
}

From source file:com.cloudera.recordservice.examples.terasort.TeraValidate.java

License:Apache License

private static String textifyBytes(Text t) {
    BytesWritable b = new BytesWritable();
    b.set(t.getBytes(), 0, t.getLength());
    return b.toString();
}

From source file:com.ebay.nest.io.sede.binarysortable.BinarySortableSerDe.java

License:Apache License

static Object deserialize(InputByteBuffer buffer, TypeInfo type, boolean invert, Object reuse)
        throws IOException {

    // Is this field a null?
    byte isNull = buffer.read(invert);
    if (isNull == 0) {
        return null;
    }/*ww  w .ja  v a 2 s.c o m*/
    assert (isNull == 1);

    switch (type.getCategory()) {
    case PRIMITIVE: {
        PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type;
        switch (ptype.getPrimitiveCategory()) {
        case VOID: {
            return null;
        }
        case BOOLEAN: {
            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
            byte b = buffer.read(invert);
            assert (b == 1 || b == 2);
            r.set(b == 2);
            return r;
        }
        case BYTE: {
            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
            r.set((byte) (buffer.read(invert) ^ 0x80));
            return r;
        }
        case SHORT: {
            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
            int v = buffer.read(invert) ^ 0x80;
            v = (v << 8) + (buffer.read(invert) & 0xff);
            r.set((short) v);
            return r;
        }
        case INT: {
            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
            r.set(deserializeInt(buffer, invert));
            return r;
        }
        case LONG: {
            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
            long v = buffer.read(invert) ^ 0x80;
            for (int i = 0; i < 7; i++) {
                v = (v << 8) + (buffer.read(invert) & 0xff);
            }
            r.set(v);
            return r;
        }
        case FLOAT: {
            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
            int v = 0;
            for (int i = 0; i < 4; i++) {
                v = (v << 8) + (buffer.read(invert) & 0xff);
            }
            if ((v & (1 << 31)) == 0) {
                // negative number, flip all bits
                v = ~v;
            } else {
                // positive number, flip the first bit
                v = v ^ (1 << 31);
            }
            r.set(Float.intBitsToFloat(v));
            return r;
        }
        case DOUBLE: {
            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
            long v = 0;
            for (int i = 0; i < 8; i++) {
                v = (v << 8) + (buffer.read(invert) & 0xff);
            }
            if ((v & (1L << 63)) == 0) {
                // negative number, flip all bits
                v = ~v;
            } else {
                // positive number, flip the first bit
                v = v ^ (1L << 63);
            }
            r.set(Double.longBitsToDouble(v));
            return r;
        }
        case STRING: {
            Text r = reuse == null ? new Text() : (Text) reuse;
            return deserializeText(buffer, invert, r);
        }

        case VARCHAR: {
            HiveVarcharWritable r = reuse == null ? new HiveVarcharWritable() : (HiveVarcharWritable) reuse;
            // Use HiveVarchar's internal Text member to read the value.
            deserializeText(buffer, invert, r.getTextValue());
            // If we cache helper data for deserialization we could avoid having
            // to call getVarcharMaxLength() on every deserialize call.
            r.enforceMaxLength(getVarcharMaxLength(type));
            return r;
        }

        case BINARY: {
            BytesWritable bw = new BytesWritable();
            // Get the actual length first
            int start = buffer.tell();
            int length = 0;
            do {
                byte b = buffer.read(invert);
                if (b == 0) {
                    // end of string
                    break;
                }
                if (b == 1) {
                    // the last char is an escape char. read the actual char
                    buffer.read(invert);
                }
                length++;
            } while (true);

            if (length == buffer.tell() - start) {
                // No escaping happened, so we are already done.
                bw.set(buffer.getData(), start, length);
            } else {
                // Escaping happened, we need to copy byte-by-byte.
                // 1. Set the length first.
                bw.set(buffer.getData(), start, length);
                // 2. Reset the pointer.
                buffer.seek(start);
                // 3. Copy the data.
                byte[] rdata = bw.getBytes();
                for (int i = 0; i < length; i++) {
                    byte b = buffer.read(invert);
                    if (b == 1) {
                        // The last char is an escape char, read the actual char.
                        // The serialization format escape \0 to \1, and \1 to \2,
                        // to make sure the string is null-terminated.
                        b = (byte) (buffer.read(invert) - 1);
                    }
                    rdata[i] = b;
                }
                // 4. Read the null terminator.
                byte b = buffer.read(invert);
                assert (b == 0);
            }
            return bw;
        }

        case DATE: {
            DateWritable d = reuse == null ? new DateWritable() : (DateWritable) reuse;
            d.set(deserializeInt(buffer, invert));
            return d;
        }

        case TIMESTAMP:
            TimestampWritable t = (reuse == null ? new TimestampWritable() : (TimestampWritable) reuse);
            byte[] bytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH];

            for (int i = 0; i < bytes.length; i++) {
                bytes[i] = buffer.read(invert);
            }
            t.setBinarySortable(bytes, 0);
            return t;

        case DECIMAL: {
            // See serialization of decimal for explanation (below)

            HiveDecimalWritable bdw = (reuse == null ? new HiveDecimalWritable() : (HiveDecimalWritable) reuse);

            int b = buffer.read(invert) - 1;
            assert (b == 1 || b == -1 || b == 0);
            boolean positive = b != -1;

            int factor = buffer.read(invert) ^ 0x80;
            for (int i = 0; i < 3; i++) {
                factor = (factor << 8) + (buffer.read(invert) & 0xff);
            }

            if (!positive) {
                factor = -factor;
            }

            int start = buffer.tell();
            int length = 0;

            do {
                b = buffer.read(positive ? invert : !invert);
                assert (b != 1);

                if (b == 0) {
                    // end of digits
                    break;
                }

                length++;
            } while (true);

            if (decimalBuffer == null || decimalBuffer.length < length) {
                decimalBuffer = new byte[length];
            }

            buffer.seek(start);
            for (int i = 0; i < length; ++i) {
                decimalBuffer[i] = buffer.read(positive ? invert : !invert);
            }

            // read the null byte again
            buffer.read(positive ? invert : !invert);

            String digits = new String(decimalBuffer, 0, length, decimalCharSet);
            BigInteger bi = new BigInteger(digits);
            HiveDecimal bd = new HiveDecimal(bi).scaleByPowerOfTen(factor - length);

            if (!positive) {
                bd = bd.negate();
            }

            bdw.set(bd);
            return bdw;
        }

        default: {
            throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory());
        }
        }
    }

    case LIST: {
        ListTypeInfo ltype = (ListTypeInfo) type;
        TypeInfo etype = ltype.getListElementTypeInfo();

        // Create the list if needed
        ArrayList<Object> r = reuse == null ? new ArrayList<Object>() : (ArrayList<Object>) reuse;

        // Read the list
        int size = 0;
        while (true) {
            int more = buffer.read(invert);
            if (more == 0) {
                // \0 to terminate
                break;
            }
            // \1 followed by each element
            assert (more == 1);
            if (size == r.size()) {
                r.add(null);
            }
            r.set(size, deserialize(buffer, etype, invert, r.get(size)));
            size++;
        }
        // Remove additional elements if the list is reused
        while (r.size() > size) {
            r.remove(r.size() - 1);
        }
        return r;
    }
    case MAP: {
        MapTypeInfo mtype = (MapTypeInfo) type;
        TypeInfo ktype = mtype.getMapKeyTypeInfo();
        TypeInfo vtype = mtype.getMapValueTypeInfo();

        // Create the map if needed
        Map<Object, Object> r;
        if (reuse == null) {
            r = new HashMap<Object, Object>();
        } else {
            r = (HashMap<Object, Object>) reuse;
            r.clear();
        }

        while (true) {
            int more = buffer.read(invert);
            if (more == 0) {
                // \0 to terminate
                break;
            }
            // \1 followed by each key and then each value
            assert (more == 1);
            Object k = deserialize(buffer, ktype, invert, null);
            Object v = deserialize(buffer, vtype, invert, null);
            r.put(k, v);
        }
        return r;
    }
    case STRUCT: {
        StructTypeInfo stype = (StructTypeInfo) type;
        List<TypeInfo> fieldTypes = stype.getAllStructFieldTypeInfos();
        int size = fieldTypes.size();
        // Create the struct if needed
        ArrayList<Object> r = reuse == null ? new ArrayList<Object>(size) : (ArrayList<Object>) reuse;
        assert (r.size() <= size);
        // Set the size of the struct
        while (r.size() < size) {
            r.add(null);
        }
        // Read one field by one field
        for (int eid = 0; eid < size; eid++) {
            r.set(eid, deserialize(buffer, fieldTypes.get(eid), invert, r.get(eid)));
        }
        return r;
    }
    case UNION: {
        UnionTypeInfo utype = (UnionTypeInfo) type;
        StandardUnion r = reuse == null ? new StandardUnion() : (StandardUnion) reuse;
        // Read the tag
        byte tag = buffer.read(invert);
        r.setTag(tag);
        r.setObject(deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), invert, null));
        return r;
    }
    default: {
        throw new RuntimeException("Unrecognized type: " + type.getCategory());
    }
    }
}