Example usage for org.apache.hadoop.io BytesWritable set

List of usage examples for org.apache.hadoop.io BytesWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable set.

Prototype

public void set(byte[] newData, int offset, int length) 

Source Link

Document

Set the value to a copy of the given byte range

Usage

From source file:uk.bl.wa.hadoop.mapred.ByteBlockRecordReader.java

License:Open Source License

@Override
public boolean next(Path path, BytesWritable buf) throws IOException {
    int buf_size;
    long remaining = file_length - bytes_read;
    if (remaining < Integer.MAX_VALUE) {
        buf_size = (int) remaining;
    } else {//from   w  w w  .j a  va2  s  .  c o m
        buf_size = Integer.MAX_VALUE;
    }
    byte[] bytes = new byte[buf_size];

    // Attempt to read a big chunk (n.b. using a single .read() can require
    // multiple reads):
    int count = IOUtils.read(fsdis, bytes);

    // If we're out of bytes, report that:
    if (count == -1) {
        log.info("Read " + count + " bytes into RAM, total read: " + bytes_read);
        buf.set(new byte[] {}, 0, 0);
        return false;
    } else {
        log.info("Read " + count + " bytes into RAM, total read: " + bytes_read);
        bytes_read += count;
        // Otherwise, push the new bytes into the BytesWritable:
        buf.set(bytes, 0, count);
        return true;
    }
}

From source file:weka.distributed.hadoop.CorrelationMatrixHadoopMapper.java

License:Open Source License

@Override
public void cleanup(Context context) throws IOException, InterruptedException {

    // output all the rows in this partial matrix
    double[][] partialMatrix = m_task.getMatrix();
    int[][] coOcc = m_task.getCoOccurrenceCounts();

    for (int i = 0; i < partialMatrix.length; i++) {
        double[] row = partialMatrix[i];
        int[] co = null;
        if (coOcc != null) {
            co = coOcc[i];/*  w w w  .j a v a2s .c  o m*/
        }
        MatrixRowHolder rh = new MatrixRowHolder(i, row, co);
        byte[] bytes = rowHolderToBytes(rh);

        String sKey = ("" + i);
        Text key = new Text();
        key.set(sKey);

        BytesWritable value = new BytesWritable();
        value.set(bytes, 0, bytes.length);

        context.write(key, value);
    }
}

From source file:weka.distributed.hadoop.CSVToArffHeaderHadoopMapper.java

License:Open Source License

@Override
public void cleanup(Context context) throws IOException, InterruptedException {

    if (m_fatalMappingError != null) {
        throw m_fatalMappingError;
    }//from ww  w  .java 2  s  .c om

    HeaderAndQuantileDataHolder holder = null;
    Instances header = null;
    if (!m_estimateQuantiles) {
        header = m_task.getHeader();
    } else {
        try {
            holder = m_task.getHeaderAndQuantileEstimators();
        } catch (DistributedWekaException ex) {
            throw new IOException(ex);
        }
    }

    ByteArrayOutputStream ostream = new ByteArrayOutputStream();
    OutputStream os = ostream;
    ObjectOutputStream p;

    p = new ObjectOutputStream(new BufferedOutputStream(new GZIPOutputStream(os)));
    p.writeObject(header != null ? header : holder);
    p.flush();
    p.close();

    byte[] bytes = ostream.toByteArray();
    // make sure all headers go to the same reducer
    String contantKey = "header";

    Text key = new Text();
    key.set(contantKey);
    BytesWritable value = new BytesWritable();
    value.set(bytes, 0, bytes.length);
    context.write(key, value); // write the header
}

From source file:weka.distributed.hadoop.KMeansCentroidSketchHadoopMapper.java

License:Open Source License

@Override
public void cleanup(Context context) throws IOException, InterruptedException {
    // emit serialized sketch tasks with run number as key
    for (int i = 0; i < m_tasks.length; i++) {
        System.err.println("Number of instances in sketch: " + m_tasks[i].getCurrentSketch().numInstances());
        System.err.println(/*w w  w  .  j  a  v a  2 s  .  c  o  m*/
                "Number of instances in reservoir: " + m_tasks[i].getReservoirSample().getSample().size());
        byte[] bytes = sketchToBytes(m_tasks[i]);
        String runNum = "run" + i;
        Text key = new Text();
        key.set(runNum);
        BytesWritable value = new BytesWritable();
        value.set(bytes, 0, bytes.length);
        context.write(key, value);
    }
}

From source file:weka.distributed.hadoop.KMeansHadoopMapper.java

License:Open Source License

@Override
public void cleanup(Context context) throws IOException, InterruptedException {
    for (int i = 0; i < m_numRuns; i++) {
        if (!m_tasks[i].getConverged()) {
            // List<Instances> centroidStatsForRun = m_tasks[i].getCentroidStats();
            byte[] bytes = centroidStatsToBytes(m_tasks[i]);

            String runNum = "run" + i;
            Text key = new Text();
            key.set(runNum);//from  www  .ja  va2 s. c  o m

            BytesWritable value = new BytesWritable();
            value.set(bytes, 0, bytes.length);
            context.write(key, value);
        }
    }
}

From source file:weka.distributed.hadoop.WekaClassifierHadoopMapper.java

License:Open Source License

@Override
public void cleanup(Context context) throws IOException, InterruptedException {
    try {//from   w w  w.  j ava2  s . com
        m_task.finalizeTask();
        // System.err.println("Model after training:\n"
        // + m_task.getClassifier().toString());

        byte[] bytes = classifierToBytes(m_task.getClassifier(), m_task.getNumTrainingInstances());

        // make sure all classifiers go to the same reducer
        String constantKey = "classifier";

        Text key = new Text();
        key.set(constantKey);
        BytesWritable value = new BytesWritable();
        value.set(bytes, 0, bytes.length);

        context.write(key, value);
    } catch (Exception ex) {
        throw new IOException(ex);
    }
}

From source file:weka.distributed.hadoop.WekaFoldBasedClassifierEvaluationHadoopMapper.java

License:Open Source License

@Override
public void cleanup(Context context) throws IOException {
    try {/*  w ww.ja  v  a  2  s. c om*/
        // aggregate the stats over all folds in this chunk
        AggregateableEvaluation agg = null;
        for (int i = 0; i < m_totalFolds; i++) {
            if (!m_classifierIsUpdateable || m_forceBatch) {
                String modelToLoad = "" + (i + 1) + "_" + m_originalModelFileName;
                Classifier foldModel = WekaClassifierHadoopMapper.loadClassifier(modelToLoad);
                m_tasks[i].setClassifier(foldModel);
            }

            m_tasks[i].finalizeTask();
            Evaluation eval = m_tasks[i].getEvaluation();

            // save memory
            m_tasks[i] = null;

            if (agg == null) {
                agg = new AggregateableEvaluation(eval);
            }
            agg.aggregate(eval);
        }

        if (agg != null) {
            byte[] bytes = evalToBytes(agg);
            String constantKey = "evaluation";
            Text key = new Text();
            key.set(constantKey);

            BytesWritable value = new BytesWritable();
            value.set(bytes, 0, bytes.length);

            context.write(key, value);
        }
    } catch (Exception ex) {
        throw new IOException(ex);
    }
}

From source file:weka.distributed.hadoop.WekaFoldBasedClassifierHadoopMapper.java

License:Open Source License

@Override
public void cleanup(Context context) throws IOException, InterruptedException {
    try {/*from  w  w w .j  ava2  s.co  m*/
        for (int i = 0; i < m_totalFolds; i++) {
            m_tasks[i].finalizeTask();

            // System.err.println("Model after continued training on fold " + (i +
            // 1)
            // + ":\n" + m_tasks[i].getClassifier().toString());

            byte[] bytes = classifierToBytes(m_tasks[i].getClassifier(), m_tasks[i].getNumTrainingInstances());

            String constantKey = "classifier_fold_" + (i + 1);

            Text key = new Text();
            key.set(constantKey);
            BytesWritable value = new BytesWritable();
            value.set(bytes, 0, bytes.length);

            context.write(key, value);

            // save memory
            m_tasks[i] = null;
        }
    } catch (Exception ex) {
        throw new IOException(ex);
    }
}