List of usage examples for org.apache.hadoop.io BytesWritable set
public void set(byte[] newData, int offset, int length)
From source file:uk.bl.wa.hadoop.mapred.ByteBlockRecordReader.java
License:Open Source License
@Override public boolean next(Path path, BytesWritable buf) throws IOException { int buf_size; long remaining = file_length - bytes_read; if (remaining < Integer.MAX_VALUE) { buf_size = (int) remaining; } else {//from w w w .j a va2 s . c o m buf_size = Integer.MAX_VALUE; } byte[] bytes = new byte[buf_size]; // Attempt to read a big chunk (n.b. using a single .read() can require // multiple reads): int count = IOUtils.read(fsdis, bytes); // If we're out of bytes, report that: if (count == -1) { log.info("Read " + count + " bytes into RAM, total read: " + bytes_read); buf.set(new byte[] {}, 0, 0); return false; } else { log.info("Read " + count + " bytes into RAM, total read: " + bytes_read); bytes_read += count; // Otherwise, push the new bytes into the BytesWritable: buf.set(bytes, 0, count); return true; } }
From source file:weka.distributed.hadoop.CorrelationMatrixHadoopMapper.java
License:Open Source License
@Override public void cleanup(Context context) throws IOException, InterruptedException { // output all the rows in this partial matrix double[][] partialMatrix = m_task.getMatrix(); int[][] coOcc = m_task.getCoOccurrenceCounts(); for (int i = 0; i < partialMatrix.length; i++) { double[] row = partialMatrix[i]; int[] co = null; if (coOcc != null) { co = coOcc[i];/* w w w .j a v a2s .c o m*/ } MatrixRowHolder rh = new MatrixRowHolder(i, row, co); byte[] bytes = rowHolderToBytes(rh); String sKey = ("" + i); Text key = new Text(); key.set(sKey); BytesWritable value = new BytesWritable(); value.set(bytes, 0, bytes.length); context.write(key, value); } }
From source file:weka.distributed.hadoop.CSVToArffHeaderHadoopMapper.java
License:Open Source License
@Override public void cleanup(Context context) throws IOException, InterruptedException { if (m_fatalMappingError != null) { throw m_fatalMappingError; }//from ww w .java 2 s .c om HeaderAndQuantileDataHolder holder = null; Instances header = null; if (!m_estimateQuantiles) { header = m_task.getHeader(); } else { try { holder = m_task.getHeaderAndQuantileEstimators(); } catch (DistributedWekaException ex) { throw new IOException(ex); } } ByteArrayOutputStream ostream = new ByteArrayOutputStream(); OutputStream os = ostream; ObjectOutputStream p; p = new ObjectOutputStream(new BufferedOutputStream(new GZIPOutputStream(os))); p.writeObject(header != null ? header : holder); p.flush(); p.close(); byte[] bytes = ostream.toByteArray(); // make sure all headers go to the same reducer String contantKey = "header"; Text key = new Text(); key.set(contantKey); BytesWritable value = new BytesWritable(); value.set(bytes, 0, bytes.length); context.write(key, value); // write the header }
From source file:weka.distributed.hadoop.KMeansCentroidSketchHadoopMapper.java
License:Open Source License
@Override public void cleanup(Context context) throws IOException, InterruptedException { // emit serialized sketch tasks with run number as key for (int i = 0; i < m_tasks.length; i++) { System.err.println("Number of instances in sketch: " + m_tasks[i].getCurrentSketch().numInstances()); System.err.println(/*w w w . j a v a 2 s . c o m*/ "Number of instances in reservoir: " + m_tasks[i].getReservoirSample().getSample().size()); byte[] bytes = sketchToBytes(m_tasks[i]); String runNum = "run" + i; Text key = new Text(); key.set(runNum); BytesWritable value = new BytesWritable(); value.set(bytes, 0, bytes.length); context.write(key, value); } }
From source file:weka.distributed.hadoop.KMeansHadoopMapper.java
License:Open Source License
@Override public void cleanup(Context context) throws IOException, InterruptedException { for (int i = 0; i < m_numRuns; i++) { if (!m_tasks[i].getConverged()) { // List<Instances> centroidStatsForRun = m_tasks[i].getCentroidStats(); byte[] bytes = centroidStatsToBytes(m_tasks[i]); String runNum = "run" + i; Text key = new Text(); key.set(runNum);//from www .ja va2 s. c o m BytesWritable value = new BytesWritable(); value.set(bytes, 0, bytes.length); context.write(key, value); } } }
From source file:weka.distributed.hadoop.WekaClassifierHadoopMapper.java
License:Open Source License
@Override public void cleanup(Context context) throws IOException, InterruptedException { try {//from w w w. j ava2 s . com m_task.finalizeTask(); // System.err.println("Model after training:\n" // + m_task.getClassifier().toString()); byte[] bytes = classifierToBytes(m_task.getClassifier(), m_task.getNumTrainingInstances()); // make sure all classifiers go to the same reducer String constantKey = "classifier"; Text key = new Text(); key.set(constantKey); BytesWritable value = new BytesWritable(); value.set(bytes, 0, bytes.length); context.write(key, value); } catch (Exception ex) { throw new IOException(ex); } }
From source file:weka.distributed.hadoop.WekaFoldBasedClassifierEvaluationHadoopMapper.java
License:Open Source License
@Override public void cleanup(Context context) throws IOException { try {/* w ww.ja v a 2 s. c om*/ // aggregate the stats over all folds in this chunk AggregateableEvaluation agg = null; for (int i = 0; i < m_totalFolds; i++) { if (!m_classifierIsUpdateable || m_forceBatch) { String modelToLoad = "" + (i + 1) + "_" + m_originalModelFileName; Classifier foldModel = WekaClassifierHadoopMapper.loadClassifier(modelToLoad); m_tasks[i].setClassifier(foldModel); } m_tasks[i].finalizeTask(); Evaluation eval = m_tasks[i].getEvaluation(); // save memory m_tasks[i] = null; if (agg == null) { agg = new AggregateableEvaluation(eval); } agg.aggregate(eval); } if (agg != null) { byte[] bytes = evalToBytes(agg); String constantKey = "evaluation"; Text key = new Text(); key.set(constantKey); BytesWritable value = new BytesWritable(); value.set(bytes, 0, bytes.length); context.write(key, value); } } catch (Exception ex) { throw new IOException(ex); } }
From source file:weka.distributed.hadoop.WekaFoldBasedClassifierHadoopMapper.java
License:Open Source License
@Override public void cleanup(Context context) throws IOException, InterruptedException { try {/*from w w w .j ava2 s.co m*/ for (int i = 0; i < m_totalFolds; i++) { m_tasks[i].finalizeTask(); // System.err.println("Model after continued training on fold " + (i + // 1) // + ":\n" + m_tasks[i].getClassifier().toString()); byte[] bytes = classifierToBytes(m_tasks[i].getClassifier(), m_tasks[i].getNumTrainingInstances()); String constantKey = "classifier_fold_" + (i + 1); Text key = new Text(); key.set(constantKey); BytesWritable value = new BytesWritable(); value.set(bytes, 0, bytes.length); context.write(key, value); // save memory m_tasks[i] = null; } } catch (Exception ex) { throw new IOException(ex); } }