Example usage for org.apache.hadoop.io BytesWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable set.

Prototype

public void set(byte[] newData, int offset, int length)

Source Link

Document

Set the value to a copy of the given byte range

Usage

From source file:uk.bl.wa.hadoop.mapred.ByteBlockRecordReader.java

License:Open Source License

@Override
public boolean next(Path path, BytesWritable buf) throws IOException {
    int buf_size;
    long remaining = file_length - bytes_read;
    if (remaining < Integer.MAX_VALUE) {
        buf_size = (int) remaining;
    } else {//from   w  w w  .j a  va2  s  .  c o m
        buf_size = Integer.MAX_VALUE;
    }
    byte[] bytes = new byte[buf_size];

    // Attempt to read a big chunk (n.b. using a single .read() can require
    // multiple reads):
    int count = IOUtils.read(fsdis, bytes);

    // If we're out of bytes, report that:
    if (count == -1) {
        log.info("Read " + count + " bytes into RAM, total read: " + bytes_read);
        buf.set(new byte[] {}, 0, 0);
        return false;
    } else {
        log.info("Read " + count + " bytes into RAM, total read: " + bytes_read);
        bytes_read += count;
        // Otherwise, push the new bytes into the BytesWritable:
        buf.set(bytes, 0, count);
        return true;
    }
}

From source file:weka.distributed.hadoop.CorrelationMatrixHadoopMapper.java

License:Open Source License

@Override
public void cleanup(Context context) throws IOException, InterruptedException {

    // output all the rows in this partial matrix
    double[][] partialMatrix = m_task.getMatrix();
    int[][] coOcc = m_task.getCoOccurrenceCounts();

    for (int i = 0; i < partialMatrix.length; i++) {
        double[] row = partialMatrix[i];
        int[] co = null;
        if (coOcc != null) {
            co = coOcc[i];/*  w w w  .j a v a2s .c  o m*/
        }
        MatrixRowHolder rh = new MatrixRowHolder(i, row, co);
        byte[] bytes = rowHolderToBytes(rh);

        String sKey = ("" + i);
        Text key = new Text();
        key.set(sKey);

        BytesWritable value = new BytesWritable();
        value.set(bytes, 0, bytes.length);

        context.write(key, value);
    }
}

From source file:weka.distributed.hadoop.CSVToArffHeaderHadoopMapper.java

License:Open Source License

@Override
public void cleanup(Context context) throws IOException, InterruptedException {

    if (m_fatalMappingError != null) {
        throw m_fatalMappingError;
    }//from ww  w  .java 2  s  .c om

    HeaderAndQuantileDataHolder holder = null;
    Instances header = null;
    if (!m_estimateQuantiles) {
        header = m_task.getHeader();
    } else {
        try {
            holder = m_task.getHeaderAndQuantileEstimators();
        } catch (DistributedWekaException ex) {
            throw new IOException(ex);
        }
    }

    ByteArrayOutputStream ostream = new ByteArrayOutputStream();
    OutputStream os = ostream;
    ObjectOutputStream p;

    p = new ObjectOutputStream(new BufferedOutputStream(new GZIPOutputStream(os)));
    p.writeObject(header != null ? header : holder);
    p.flush();
    p.close();

    byte[] bytes = ostream.toByteArray();
    // make sure all headers go to the same reducer
    String contantKey = "header";

    Text key = new Text();
    key.set(contantKey);
    BytesWritable value = new BytesWritable();
    value.set(bytes, 0, bytes.length);
    context.write(key, value); // write the header
}

From source file:weka.distributed.hadoop.KMeansCentroidSketchHadoopMapper.java

License:Open Source License

@Override
public void cleanup(Context context) throws IOException, InterruptedException {
    // emit serialized sketch tasks with run number as key
    for (int i = 0; i < m_tasks.length; i++) {
        System.err.println("Number of instances in sketch: " + m_tasks[i].getCurrentSketch().numInstances());
        System.err.println(/*w w  w  .  j  a  v a  2 s  .  c  o  m*/
                "Number of instances in reservoir: " + m_tasks[i].getReservoirSample().getSample().size());
        byte[] bytes = sketchToBytes(m_tasks[i]);
        String runNum = "run" + i;
        Text key = new Text();
        key.set(runNum);
        BytesWritable value = new BytesWritable();
        value.set(bytes, 0, bytes.length);
        context.write(key, value);
    }
}

From source file:weka.distributed.hadoop.KMeansHadoopMapper.java

License:Open Source License

@Override
public void cleanup(Context context) throws IOException, InterruptedException {
    for (int i = 0; i < m_numRuns; i++) {
        if (!m_tasks[i].getConverged()) {
            // List<Instances> centroidStatsForRun = m_tasks[i].getCentroidStats();
            byte[] bytes = centroidStatsToBytes(m_tasks[i]);

            String runNum = "run" + i;
            Text key = new Text();
            key.set(runNum);//from  www  .ja  va2 s. c  o m

            BytesWritable value = new BytesWritable();
            value.set(bytes, 0, bytes.length);
            context.write(key, value);
        }
    }
}

From source file:weka.distributed.hadoop.WekaClassifierHadoopMapper.java

License:Open Source License

@Override
public void cleanup(Context context) throws IOException, InterruptedException {
    try {//from   w w  w.  j ava2  s . com
        m_task.finalizeTask();
        // System.err.println("Model after training:\n"
        // + m_task.getClassifier().toString());

        byte[] bytes = classifierToBytes(m_task.getClassifier(), m_task.getNumTrainingInstances());

        // make sure all classifiers go to the same reducer
        String constantKey = "classifier";

        Text key = new Text();
        key.set(constantKey);
        BytesWritable value = new BytesWritable();
        value.set(bytes, 0, bytes.length);

        context.write(key, value);
    } catch (Exception ex) {
        throw new IOException(ex);
    }
}

From source file:weka.distributed.hadoop.WekaFoldBasedClassifierEvaluationHadoopMapper.java

License:Open Source License

@Override
public void cleanup(Context context) throws IOException {
    try {/*  w ww.ja  v  a  2  s. c om*/
        // aggregate the stats over all folds in this chunk
        AggregateableEvaluation agg = null;
        for (int i = 0; i < m_totalFolds; i++) {
            if (!m_classifierIsUpdateable || m_forceBatch) {
                String modelToLoad = "" + (i + 1) + "_" + m_originalModelFileName;
                Classifier foldModel = WekaClassifierHadoopMapper.loadClassifier(modelToLoad);
                m_tasks[i].setClassifier(foldModel);
            }

            m_tasks[i].finalizeTask();
            Evaluation eval = m_tasks[i].getEvaluation();

            // save memory
            m_tasks[i] = null;

            if (agg == null) {
                agg = new AggregateableEvaluation(eval);
            }
            agg.aggregate(eval);
        }

        if (agg != null) {
            byte[] bytes = evalToBytes(agg);
            String constantKey = "evaluation";
            Text key = new Text();
            key.set(constantKey);

            BytesWritable value = new BytesWritable();
            value.set(bytes, 0, bytes.length);

            context.write(key, value);
        }
    } catch (Exception ex) {
        throw new IOException(ex);
    }
}

From source file:weka.distributed.hadoop.WekaFoldBasedClassifierHadoopMapper.java

License:Open Source License

@Override
public void cleanup(Context context) throws IOException, InterruptedException {
    try {/*from  w  w w .j  ava2  s.co  m*/
        for (int i = 0; i < m_totalFolds; i++) {
            m_tasks[i].finalizeTask();

            // System.err.println("Model after continued training on fold " + (i +
            // 1)
            // + ":\n" + m_tasks[i].getClassifier().toString());

            byte[] bytes = classifierToBytes(m_tasks[i].getClassifier(), m_tasks[i].getNumTrainingInstances());

            String constantKey = "classifier_fold_" + (i + 1);

            Text key = new Text();
            key.set(constantKey);
            BytesWritable value = new BytesWritable();
            value.set(bytes, 0, bytes.length);

            context.write(key, value);

            // save memory
            m_tasks[i] = null;
        }
    } catch (Exception ex) {
        throw new IOException(ex);
    }
}