Example usage for org.apache.hadoop.io.serializer Serializer open

List of usage examples for org.apache.hadoop.io.serializer Serializer open

Introduction

In this page you can find the example usage for org.apache.hadoop.io.serializer Serializer open.

Prototype

void open(OutputStream out) throws IOException;

Source Link

Document

Prepare the serializer for writing.

Usage

From source file:cascading.tuple.hadoop.SerializationElementWriter.java

License:Open Source License

public void write(DataOutputStream outputStream, Object object) throws IOException {
    Class<?> type = object.getClass();
    String className = type.getName();
    Integer token = tupleSerialization.getTokenFor(className);

    if (token == null) {
        if (LOG.isDebugEnabled())
            LOG.debug("no serialization token found for classname: " + className);

        WritableUtils.writeVInt(outputStream, TupleOutputStream.WRITABLE_TOKEN); // denotes to punt to hadoop serialization
        WritableUtils.writeString(outputStream, className);
    } else//from   www .  j  a v  a 2s.c  o m
        WritableUtils.writeVInt(outputStream, token);

    Serializer serializer = serializers.get(type);

    if (serializer == null) {
        serializer = tupleSerialization.getNewSerializer(type);
        serializer.open(outputStream);
        serializers.put(type, serializer);
    }

    try {
        serializer.serialize(object);
    } catch (IOException exception) {
        LOG.error("failed serializing token: " + token + " with classname: " + className, exception);

        throw exception;
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.TaggedInputSplit.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from w w  w  . j  av a  2 s .c o  m*/
public void write(DataOutput out) throws IOException {
    Text.writeString(out, name);
    Text.writeString(out, inputSplitClass.getName());
    Text.writeString(out, inputFormatClass.getName());
    Text.writeString(out, mapperClassName);
    Text.writeString(out, GSON.toJson(inputConfigs));
    SerializationFactory factory = new SerializationFactory(conf);
    Serializer serializer = factory.getSerializer(inputSplitClass);
    serializer.open((DataOutputStream) out);
    serializer.serialize(inputSplit);
}

From source file:com.ambiata.ivory.operation.hadoop.TaggedInputSplit.java

License:Apache License

@SuppressWarnings("unchecked")
public void write(DataOutput out) throws IOException {
    Text.writeString(out, inputSplitClass.getName());
    Text.writeString(out, inputFormatClass.getName());
    Text.writeString(out, mapperClass.getName());
    SerializationFactory factory = new SerializationFactory(conf);
    Serializer serializer = factory.getSerializer(inputSplitClass);
    serializer.open((DataOutputStream) out);
    serializer.serialize(inputSplit);//  w ww .  j a v a 2 s .  com
}

From source file:com.chinamobile.bcbsp.client.BSPJobClient.java

License:Apache License

/**
 * Write splits.//w w w .  j  a v a 2  s. c  o  m
 * @param job BSPJob
 * @param submitSplitFile Path
 * @param <T> org.apache.hadoop.mapreduce.InputSplit
 * @return splitNum the count of split
 */
@SuppressWarnings("unchecked")
private <T extends org.apache.hadoop.mapreduce.InputSplit> int writeSplits(BSPJob job, Path submitSplitFile)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration confs = job.getConf();
    com.chinamobile.bcbsp.io.InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(),
            confs);
    input.initialize(job.getConf());
    List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(job);
    int maxSplits = job.getNumPartition();
    int splitNum = splits.size();
    double factor = splitNum / (float) maxSplits;
    if (factor > 1.0) {
        job.setInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, (int) Math.ceil(factor));
        LOG.info("[Split Adjust Factor] " + (int) Math.ceil(factor));
        LOG.info("[Partition Num] " + maxSplits);
        splits = input.getSplits(job);
        splitNum = splits.size();
    }
    T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);
    // sort the splits into order based on size, so that the biggest
    // go first
    Arrays.sort(array, new NewSplitComparator());
    DataOutputStream out = writeSplitsFileHeader(confs, submitSplitFile, array.length);
    try {
        if (array.length != 0) {
            DataOutputBuffer buffer = new DataOutputBuffer();
            RawSplit rawSplit = new RawSplit();
            SerializationFactory factory = new SerializationFactory(confs);
            Serializer<T> serializer = factory.getSerializer((Class<T>) array[0].getClass());
            serializer.open(buffer);
            for (T split : array) {
                rawSplit.setClassName(split.getClass().getName());
                buffer.reset();
                serializer.serialize(split);
                rawSplit.setDataLength(split.getLength());
                rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
                rawSplit.setLocations(split.getLocations());
                rawSplit.write(out);
            }
            serializer.close();
        }
    } finally {
        out.close();
    }
    return splitNum;
}

From source file:com.chinamobile.bcbsp.partition.HashWithBalancerWritePartition.java

License:Apache License

/**
 * This method is used to partition graph vertexes. Writing Each vertex to the
 * corresponding partition. In this method calls recordParse method to create
 * an HeadNode object. The last call partitioner's getPartitionId method to
 * calculate the HeadNode belongs to partition's id. If the HeadNode belongs
 * local partition then written to the local partition or send it to the
 * appropriate partition./*from w ww  .  j a v  a2s .c o m*/
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int send = 0;
    int lost = 0;
    ThreadPool tpool = new ThreadPool(this.sendThreadNum);
    int staffNum = this.staff.getStaffNum();
    BytesWritable kbytes = new BytesWritable();
    int ksize = 0;
    BytesWritable vbytes = new BytesWritable();
    int vsize = 0;
    DataOutputBuffer bb = new DataOutputBuffer();
    int bufferSize = (int) ((this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) * PART);
    int dataBufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER)
            / (this.staff.getStaffNum() + this.sendThreadNum);
    byte[] buffer = new byte[bufferSize];
    int bufindex = 0;
    SerializationFactory sFactory = new SerializationFactory(new Configuration());
    Serializer<IntWritable> psserializer = sFactory.getSerializer(IntWritable.class);
    byte[] pidandsize = new byte[TIME * CONTAINERNUMBER * CONTAINERNUMBER];
    int psindex = 0;
    BytesWritable pidbytes = new BytesWritable();
    int psize = 0;
    BytesWritable sizebytes = new BytesWritable();
    int ssize = 0;
    try {
        this.keyserializer.open(bb);
        this.valueserializer.open(bb);
        psserializer.open(bb);
    } catch (IOException e) {
        throw e;
    }
    String path = "/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID();
    File dir = new File("/tmp/bcbsp/" + this.staff.getJobID());
    dir.mkdir();
    dir = new File("/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID());
    dir.mkdir();
    ArrayList<File> files = new ArrayList<File>();
    try {
        File file = new File(path + "/" + "data" + ".txt");
        files.add(file);
        DataOutputStream dataWriter = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(path + "/" + "data" + ".txt", true)));
        DataInputStream dataReader = new DataInputStream(
                new BufferedInputStream(new FileInputStream(path + "/" + "data" + ".txt")));
        File filet = new File(path + "/" + "pidandsize" + ".txt");
        files.add(filet);
        DataOutputStream psWriter = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(path + "/" + "pidandsize" + ".txt", true)));
        DataInputStream psReader = new DataInputStream(
                new BufferedInputStream(new FileInputStream(path + "/" + "pidandsize" + ".txt")));
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            int pid = -1;
            Text vertexID = this.recordParse.getVertexID(key);
            if (vertexID != null) {
                pid = this.partitioner.getPartitionID(vertexID);
            } else {
                lost++;
                continue;
            }
            if (this.counter.containsKey(pid)) {
                this.counter.put(pid, (this.counter.get(pid) + 1));
            } else {
                this.counter.put(pid, 1);
            }
            bb.reset();
            this.keyserializer.serialize(key);
            kbytes.set(bb.getData(), 0, bb.getLength());
            ksize = kbytes.getLength();
            bb.reset();
            this.valueserializer.serialize(value);
            vbytes.set(bb.getData(), 0, bb.getLength());
            vsize = vbytes.getLength();
            bb.reset();
            psserializer.serialize(new IntWritable(ksize + vsize));
            sizebytes.set(bb.getData(), 0, bb.getLength());
            ssize = sizebytes.getLength();
            bb.reset();
            psserializer.serialize(new IntWritable(pid));
            pidbytes.set(bb.getData(), 0, bb.getLength());
            psize = pidbytes.getLength();
            if ((pidandsize.length - psindex) > (ssize + psize)) {
                System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize);
                psindex += ssize;
                System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize);
                psindex += psize;
            } else {
                psWriter.write(pidandsize, 0, psindex);
                psindex = 0;
                System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize);
                psindex += ssize;
                System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize);
                psindex += psize;
            }
            if ((buffer.length - bufindex) > (ksize + vsize)) {
                System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize);
                bufindex += ksize;
                System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize);
                bufindex += vsize;
            } else if (buffer.length < (ksize + vsize)) {
                dataWriter.write(buffer, 0, bufindex);
                bufindex = 0;
                LOG.info("This is a super record");
                dataWriter.write(kbytes.getBytes(), 0, ksize);
                dataWriter.write(vbytes.getBytes(), 0, vsize);
            } else {
                dataWriter.write(buffer, 0, bufindex);
                bufindex = 0;
                System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize);
                bufindex += ksize;
                System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize);
                bufindex += vsize;
            }
        }
        if (psindex != 0) {
            psWriter.write(pidandsize, 0, psindex);
        }
        if (bufindex != 0) {
            dataWriter.write(buffer, 0, bufindex);
            bufindex = 0;
        }
        dataWriter.close();
        dataWriter = null;
        psWriter.close();
        psWriter = null;
        buffer = null;
        pidandsize = null;
        this.ssrc.setDirFlag(new String[] { "3" });
        this.ssrc.setCounter(this.counter);
        HashMap<Integer, Integer> hashBucketToPartition = this.sssc.loadDataInBalancerBarrier(ssrc,
                Constants.PARTITION_TYPE.HASH);
        this.staff.setHashBucketToPartition(hashBucketToPartition);
        byte[][] databuf = new byte[staffNum][dataBufferSize];
        int[] databufindex = new int[staffNum];
        try {
            IntWritable pid = new IntWritable();
            IntWritable size = new IntWritable();
            int belongPid = 0;
            while (true) {
                size.readFields(psReader);
                pid.readFields(psReader);
                belongPid = hashBucketToPartition.get(pid.get());
                if (belongPid != this.staff.getPartition()) {
                    send++;
                } else {
                    local++;
                }
                if ((databuf[belongPid].length - databufindex[belongPid]) > size.get()) {
                    dataReader.read(databuf[belongPid], databufindex[belongPid], size.get());
                    databufindex[belongPid] += size.get();
                } else if (databuf[belongPid].length < size.get()) {
                    LOG.info("This is a super record");
                    byte[] tmp = new byte[size.get()];
                    dataReader.read(tmp, 0, size.get());
                    if (belongPid == this.staff.getPartition()) {
                        DataInputStream reader = new DataInputStream(
                                new BufferedInputStream(new ByteArrayInputStream(tmp)));
                        try {
                            boolean stop = true;
                            while (stop) {
                                Text key = new Text();
                                key.readFields(reader);
                                Text value = new Text();
                                value.readFields(reader);
                                if (key.getLength() > 0 && value.getLength() > 0) {
                                    Vertex vertex = this.recordParse.recordParse(key.toString(),
                                            value.toString());
                                    if (vertex == null) {
                                        lost++;
                                        continue;
                                    }
                                    this.staff.getGraphData().addForAll(vertex);
                                } else {
                                    stop = false;
                                }
                            }
                        } catch (IOException e) {
                            LOG.info("IO exception: " + e.getStackTrace());
                        }
                    } else {
                        ThreadSignle t = tpool.getThread();
                        while (t == null) {
                            t = tpool.getThread();
                        }
                        t.setWorker(
                                this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid));
                        t.setJobId(staff.getJobID());
                        t.setTaskId(staff.getStaffID());
                        t.setBelongPartition(belongPid);
                        BytesWritable data = new BytesWritable();
                        data.set(tmp, 0, size.get());
                        t.setData(data);
                        LOG.info("Using Thread is: " + t.getThreadNumber());
                        t.setStatus(true);
                    }
                    tmp = null;
                } else {
                    if (belongPid == this.staff.getPartition()) {
                        DataInputStream reader = new DataInputStream(new BufferedInputStream(
                                new ByteArrayInputStream(databuf[belongPid], 0, databufindex[belongPid])));
                        try {
                            boolean stop = true;
                            while (stop) {
                                Text key = new Text();
                                key.readFields(reader);
                                Text value = new Text();
                                value.readFields(reader);
                                if (key.getLength() > 0 && value.getLength() > 0) {
                                    Vertex vertex = this.recordParse.recordParse(key.toString(),
                                            value.toString());
                                    if (vertex == null) {
                                        lost++;
                                        continue;
                                    }
                                    this.staff.getGraphData().addForAll(vertex);
                                } else {
                                    stop = false;
                                }
                            }
                        } catch (IOException e) {
                            LOG.info("IO exception: " + e.getStackTrace());
                        }
                    } else {
                        ThreadSignle t = tpool.getThread();
                        while (t == null) {
                            t = tpool.getThread();
                        }
                        t.setWorker(
                                this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid));
                        t.setJobId(staff.getJobID());
                        t.setTaskId(staff.getStaffID());
                        t.setBelongPartition(belongPid);
                        BytesWritable data = new BytesWritable();
                        data.set(databuf[belongPid], 0, databufindex[belongPid]);
                        t.setData(data);
                        LOG.info("Using Thread is: " + t.getThreadNumber());
                        t.setStatus(true);
                    }
                    databufindex[belongPid] = 0;
                    dataReader.read(databuf[belongPid], databufindex[belongPid], size.get());
                    databufindex[belongPid] += size.get();
                }
            }
        } catch (EOFException ex) {
            LOG.error("[write]", ex);
        }
        for (int i = 0; i < staffNum; i++) {
            if (databufindex[i] != 0) {
                if (i == this.staff.getPartition()) {
                    DataInputStream reader = new DataInputStream(
                            new BufferedInputStream(new ByteArrayInputStream(databuf[i], 0, databufindex[i])));
                    try {
                        boolean stop = true;
                        while (stop) {
                            Text key = new Text();
                            key.readFields(reader);
                            Text value = new Text();
                            value.readFields(reader);
                            if (key.getLength() > 0 && value.getLength() > 0) {
                                Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString());
                                if (vertex == null) {
                                    lost++;
                                    continue;
                                }
                                this.staff.getGraphData().addForAll(vertex);
                            } else {
                                stop = false;
                            }
                        }
                    } catch (IOException e) {
                        LOG.info("IO exception: " + e.getStackTrace());
                    }
                } else {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(i);
                    BytesWritable data = new BytesWritable();
                    data.set(databuf[i], 0, databufindex[i]);
                    t.setData(data);
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    t.setStatus(true);
                }
            }
        }
        dataReader.close();
        dataReader = null;
        psReader.close();
        psReader = null;
        for (File f : files) {
            f.delete();
        }
        dir.delete();
        dir = new File(path.substring(0, path.lastIndexOf('/')));
        dir.delete();
        tpool.cleanup();
        tpool = null;
        databuf = null;
        databufindex = null;
        this.counter = null;
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of vertices that were sent to other partitions: " + send);
        LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    } finally {
        for (File f : files) {
            f.delete();
        }
        dir.delete();
        dir = new File(path.substring(0, path.lastIndexOf('/')));
        dir.delete();
    }
}

From source file:com.cloudera.crunch.impl.mr.run.CrunchInputSplit.java

License:Apache License

public void write(DataOutput out) throws IOException {
    out.writeInt(nodeIndex);/*  w  ww  . j av a2  s.c o  m*/
    Text.writeString(out, inputFormatClass.getName());
    Text.writeString(out, inputSplit.getClass().getName());
    SerializationFactory factory = new SerializationFactory(conf);
    Serializer serializer = factory.getSerializer(inputSplit.getClass());
    serializer.open((DataOutputStream) out);
    serializer.serialize(inputSplit);
}

From source file:com.datasalt.pangool.serialization.HadoopSerialization.java

License:Apache License

/**
 * Serializes the given object using the Hadoop serialization system.
 *//*from w w  w .  j a v  a 2 s. c o m*/
public void ser(Object datum, OutputStream output) throws IOException {
    Map<Class, Serializer> serializers = cachedSerializers.get();
    Serializer ser = serializers.get(datum.getClass());
    if (ser == null) {
        ser = serialization.getSerializer(datum.getClass());
        if (ser == null) {
            throw new IOException("Serializer for class " + datum.getClass() + " not found");
        }
        serializers.put(datum.getClass(), ser);
    }
    ser.open(output);
    ser.serialize(datum);
    ser.close();
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.TaggedInputSplit.java

License:Apache License

@SuppressWarnings("unchecked")
public void write(DataOutput out) throws IOException {
    Text.writeString(out, inputSplitClass.getName());
    Text.writeString(out, inputFormatFile);
    Text.writeString(out, inputProcessorFile);
    SerializationFactory factory = new SerializationFactory(conf);
    Serializer serializer = factory.getSerializer(inputSplitClass);
    serializer.open((DataOutputStream) out);
    serializer.serialize(inputSplit);/*w  w w. j  ava 2 s . c  o m*/
}

From source file:com.datasalt.pangool.tuplemr.mapred.SerializerComparator.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
public int compare(Object o1, Serializer ser1, Object o2, Serializer ser2) {
    try {//ww w  .  j a  va  2 s. com
        if (o1 == null) {
            return (o2 == null) ? 0 : -1;
        } else if (o2 == null) {
            return 1;
        }

        buf1.reset();
        if (ser1 == null) {
            hadoopSer.ser(o1, buf1);
        } else {
            ser1.open(buf1);
            ser1.serialize(o1);
            ser1.close();
        }
        buf2.reset();
        if (ser2 == null) {
            hadoopSer.ser(o2, buf2);
        } else {
            ser2.open(buf2);
            ser2.serialize(o2);
            ser2.close();
        }

        return WritableComparator.compareBytes(buf1.getData(), 0, buf1.getLength(), buf2.getData(), 0,
                buf2.getLength());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.datasalt.pangool.tuplemr.serialization.SimpleTupleSerializer.java

License:Apache License

private void writeCustomObject(Object element, DataOutput output, Serializer customSer)
        throws CustomObjectSerializationException {
    try {//w ww  .jav  a 2  s  .c o m
        tmpOutputBuffer.reset();
        if (customSer != null) {
            customSer.open(tmpOutputBuffer);
            customSer.serialize(element);
            customSer.close();
        } else {
            // If no custom serializer defined then use Hadoop Serialization by default
            ser.ser(element, tmpOutputBuffer);
        }
        WritableUtils.writeVInt(output, tmpOutputBuffer.getLength());
        output.write(tmpOutputBuffer.getData(), 0, tmpOutputBuffer.getLength());
    } catch (Throwable e) {
        throw new CustomObjectSerializationException(e);
    }
}