List of usage examples for org.apache.hadoop.io LongWritable get
public long get()
From source file:com.kylinolap.job.tools.ColumnCardinalityReducerTest.java
License:Apache License
@Test public void testReducer() throws IOException { IntWritable key1 = new IntWritable(1); List<BytesWritable> values1 = new ArrayList<BytesWritable>(); values1.add(new BytesWritable(getBytes(ColumnCardinalityMapperTest.strArr))); IntWritable key2 = new IntWritable(2); List<BytesWritable> values2 = new ArrayList<BytesWritable>(); values2.add(new BytesWritable(getBytes(ColumnCardinalityMapperTest.strArr + " x"))); IntWritable key3 = new IntWritable(3); List<BytesWritable> values3 = new ArrayList<BytesWritable>(); values3.add(new BytesWritable(getBytes(ColumnCardinalityMapperTest.strArr + " xx"))); IntWritable key4 = new IntWritable(4); List<BytesWritable> values4 = new ArrayList<BytesWritable>(); values4.add(new BytesWritable(getBytes(ColumnCardinalityMapperTest.strArr + " xxx"))); IntWritable key5 = new IntWritable(5); List<BytesWritable> values5 = new ArrayList<BytesWritable>(); values5.add(new BytesWritable(getBytes(ColumnCardinalityMapperTest.strArr + " xxxx"))); reduceDriver.withInput(key1, values1); reduceDriver.withInput(key2, values2); reduceDriver.withInput(key3, values3); reduceDriver.withInput(key4, values4); reduceDriver.withInput(key5, values5); List<Pair<IntWritable, LongWritable>> result = reduceDriver.run(); assertEquals(5, result.size());//from ww w . java 2 s. co m int outputKey1 = result.get(0).getFirst().get(); LongWritable value1 = result.get(0).getSecond(); assertTrue(outputKey1 == 1); assertTrue((10 == value1.get()) || (9 == value1.get())); }
From source file:com.lightboxtechnologies.spectrum.ExtractDataMapper.java
License:Apache License
LongWritable seekToMapBlock(SequenceFile.Reader extents, long startOffset) throws IOException { final LongWritable cur = new LongWritable(); while (extents.next(cur)) { if (cur.get() >= startOffset) { return cur; }// www. j a v a2s . c om } return null; }
From source file:com.lightboxtechnologies.spectrum.ExtractDataMapper.java
License:Apache License
protected int process_extents(FileSystem fs, Path path, SequenceFile.Reader extents, LongWritable offset, long endOffset, Context context) throws IOException, InterruptedException { int numFiles = 0; long cur = offset.get(); final JsonWritable attrs = new JsonWritable(); final Path outPath = new Path("/texaspete/ev/tmp", UUID.randomUUID().toString()); try {/*from w w w . ja v a 2 s . co m*/ openImgFile(path, fs); extents.getCurrentValue(attrs); do { ++numFiles; @SuppressWarnings("unchecked") final Map<String, ?> map = (Map<String, ?>) attrs.get(); process_extent(ImgFile, fs, outPath, map, context); } while (extents.next(offset, attrs) && (cur = offset.get()) < endOffset); } catch (IOException io) { throw io; } catch (InterruptedException interrupt) { throw interrupt; } catch (Exception e) { LOG.error("Extraction exception:", e); } return numFiles; }
From source file:com.lightboxtechnologies.spectrum.ExtractDataMapper.java
License:Apache License
@Override protected void map(NullWritable k, FileSplit split, Context context) throws IOException, InterruptedException { final long startOffset = split.getStart(), endOffset = startOffset + split.getLength(); LOG.info("startOffset = " + startOffset + "; endOffset = " + endOffset); context.setStatus("Offset " + startOffset); final Configuration conf = context.getConfiguration(); final FileSystem fs = FileSystem.get(conf); int numFiles = 0; try {/*www . j a v a 2 s .co m*/ Extents = openExtentsFile(fs, conf); final LongWritable offset = seekToMapBlock(Extents, startOffset); if (offset != null && offset.get() < endOffset) { numFiles = process_extents(fs, split.getPath(), Extents, offset, endOffset, context); } LOG.info("This split had " + numFiles + " files in it"); } finally { IOUtils.closeQuietly(Extents); } }
From source file:com.lightboxtechnologies.spectrum.ExtractMapper.java
License:Apache License
protected int process_extents(FileSystem fs, Path path, SequenceFile.Reader extents, LongWritable offset, long endOffset, Context context) throws IOException, InterruptedException { int numFiles = 0; long cur = offset.get(); final JsonWritable attrs = new JsonWritable(); final Path outPath = new Path("/texaspete/ev/tmp", UUID.randomUUID().toString()); try {/*from w w w .j a v a 2 s. c o m*/ openImgFile(path, fs); extents.getCurrentValue(attrs); do { ++numFiles; @SuppressWarnings("unchecked") final Map<String, ?> map = (Map<String, ?>) attrs.get(); process_extent(ImgFile, fs, outPath, map, context); } while (extents.next(offset, attrs) && (cur = offset.get()) < endOffset); } catch (IOException io) { throw io; } catch (InterruptedException interrupt) { throw interrupt; } catch (Exception e) { LOG.error("Extraction exception " + e); } return numFiles; }
From source file:com.linkedin.cubert.io.CompactWritablesDeserializer.java
License:Open Source License
@SuppressWarnings("unchecked") @Override//w w w. j a v a2 s . c o m public K deserialize(K object) throws IOException { if (in.available() == 0) throw new IOException(); Tuple t = (Tuple) object; if (t == null) { t = TupleFactory.getInstance().newTuple(datatypes.length); } for (int i = 0; i < datatypes.length; i++) { Object field = null; switch (datatypes[i]) { case BOOLEAN: { IntWritable iw = VariableLengthEncoder.decodeInteger(in); if (iw != null) { ((BooleanWritable) writables[i]).set(iw.get() == 1); field = writables[i]; } break; } case BYTE: { IntWritable iw = VariableLengthEncoder.decodeInteger(in); if (iw != null) { ((ByteWritable) writables[i]).set((byte) iw.get()); field = writables[i]; } break; } case DOUBLE: { DoubleWritable dw = VariableLengthEncoder.decodeDouble(in); if (dw != null) { ((DoubleWritable) writables[i]).set(dw.get()); field = writables[i]; } break; } case FLOAT: { FloatWritable fw = VariableLengthEncoder.decodeFloat(in); if (fw != null) { ((FloatWritable) writables[i]).set(fw.get()); field = writables[i]; } break; } case INT: { IntWritable iw = VariableLengthEncoder.decodeInteger(in); if (iw != null) { ((IntWritable) writables[i]).set(iw.get()); field = writables[i]; } break; } case LONG: { LongWritable lw = VariableLengthEncoder.decodeLong(in); if (lw != null) { ((LongWritable) writables[i]).set(lw.get()); field = writables[i]; } break; } case STRING: { IntWritable iw = VariableLengthEncoder.decodeInteger(in); if (iw != null) { int length = iw.get(); if (length > buffer.length) buffer = new byte[2 * buffer.length]; in.read(buffer, 0, length); ((Text) writables[i]).set(new String(buffer, 0, length)); field = writables[i]; } break; } default: throw new RuntimeException("Cannot deserialize column of type " + datatypes[i]); } t.set(i, field); } return (K) t; }
From source file:com.microsoft.canberra.tf.util.SparseVectorRecordReader.java
License:Open Source License
@Override public boolean next(final LongWritable recordId, final Document data) throws IOException { if (!this.textRecordReader.next(this.offset, this.text)) { return false; }//from w w w. ja v a 2s. c om // LOG.log(Level.FINEST, "RecordReader: {0} :: {1}", new Object[] { this.offset, this.text }); final String[] fields = this.text.toString().split("\\s+"); if (fields.length <= 1) { return false; } // First element contains the unique document ID: recordId.set(Long.parseLong(fields[0])); data.clearTokens(recordId.get(), fields.length - 2); // Second element contains the number of unique items in the document: // assert(Integer.parseInt(fields[1]) == fields.length - 2); for (int i = 2; i < fields.length; ++i) { // Each element is colon-separated pair of integers, item_hash:item_frequency final String[] pair = fields[i].split(":", 2); data.add(Integer.valueOf(pair[0]), Double.valueOf(pair[1])); } // LOG.log(Level.FINEST, "RecordReader: {0} :: {1}", new Object[] { recordId, data }); return true; }
From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringReduce.java
@Override // The word '@Override' is necessary otherwise it runs default reduce() public void reduce(LongWritable key, Iterable<FloatArrayWritable> values, Context context) throws IOException, InterruptedException { int[] num_of_members_in_a_cluster = new int[number_of_clusters]; int key_index = (int) key.get(); for (FloatArrayWritable val : values) { num_of_members_in_a_cluster[key_index] += (int) ((FloatWritable) (val.get())[0]).get(); Float[] temp = new Float[feature_size]; for (int i = 0; i < feature_size; i++) { temp[i] = sum_of_members_of_a_cluster.get(key_index)[i] + (Float) ((FloatWritable) (val.get())[i + 1]).get(); }//from w w w . ja v a2s . c o m sum_of_members_of_a_cluster.set(key_index, temp); } Float[] temp = new Float[feature_size]; for (int i = 0; i < feature_size; i++) { temp[i] = sum_of_members_of_a_cluster.get(key_index)[i] / num_of_members_in_a_cluster[key_index]; } cetroid_of_a_cluster.set(key_index, temp); FloatWritable[] t = new FloatWritable[feature_size]; for (int i = 0; i < feature_size; i++) { t[i] = new FloatWritable(cetroid_of_a_cluster.get(key_index)[i]); } cetroids_of_all_clusters.add(new FloatArrayWritable(t)); context.write(key, new FloatArrayWritable(t)); }
From source file:com.ML_Hadoop.MultipleLinearRegression.MultipleLinearRegressionReduce.java
@Override // necessary otherwise it runs default reduce() public void reduce(LongWritable key, Iterable<FloatWritable> values, Context context) throws IOException, InterruptedException { // decode keys, key=0 is the cost and rest are values for theta if ((int) key.get() == 0) { //key==new LongWritable(0)) is also correct Float cost = 0.0f;/*from ww w . ja va 2 s . c o m*/ for (FloatWritable val : values) cost += val.get(); prediction_error = cost; context.write(key, new FloatWritable(cost)); } else { // extracts theta Float cost = 0.0f; for (FloatWritable val : values) cost += val.get(); // update theta System.out.println("cost for key: " + cost); System.out.println("cost " + cost * alpha / input_data_size); int key_index = (int) key.get() - 1; System.out.println("key_index: " + key_index); theta[key_index] -= cost * alpha / input_data_size; context.write(key, new FloatWritable(cost)); } }
From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierReduce_Continuous_Features.java
@Override // necessary otherwise it runs default reduce() public void reduce(LongWritable key, Iterable<MapArrayWritable> values, Context context) throws IOException, InterruptedException { int key_index = (int) key.get(); Float[] sigma_x2 = new Float[number_of_features]; Float[] mu_x = new Float[number_of_features]; Float[] num_x = new Float[number_of_features]; Float[] partial_num_x = new Float[number_of_features]; Float[] total_num_x = new Float[number_of_features]; Float[] class_id_mu = new Float[number_of_features]; Float[] class_id_std = new Float[number_of_features]; MapWritable[] t = new MapWritable[number_of_features]; // It is a MUST to initilize all arrays before usage. for (int i = 0; i < number_of_features; i++) { t[i] = new MapWritable(); // each member of an array (including MapWritable[] ) MUST be initilized before use sigma_x2[i] = 0.0f;//from w w w .ja v a 2 s . c o m mu_x[i] = 0.0f; num_x[i] = 0.0f; partial_num_x[i] = 0.0f; total_num_x[i] = 0.0f; class_id_mu[i] = 0.0f; class_id_std[i] = 0.0f; } for (MapArrayWritable val : values) { for (int i = 0; i < number_of_features; i++) { num_x[i] = ((FloatWritable) ((MapWritable) (val.get()[i])).get(new Text("num_x_local"))).get(); sigma_x2[i] += ((FloatWritable) ((MapWritable) (val.get()[i])).get(new Text("sigma_x2"))).get(); mu_x[i] = ((FloatWritable) ((MapWritable) (val.get()[i])).get(new Text("mu_x_local"))).get(); partial_num_x[i] += mu_x[i] * num_x[i]; // calculates mu(i)*N(i) total_num_x[i] += num_x[i]; // calculates total N=N1+N2+...+Nk } } for (int i = 0; i < number_of_features & total_num_x[0] != 0; i++) { class_id_mu[i] = partial_num_x[i] / total_num_x[i]; class_id_std[i] = sigma_x2[i] / total_num_x[i] - (class_id_mu[i] * class_id_mu[i]); } for (int i = 0; i < number_of_features & total_num_x[0] != 0; i++) { t[i].put(new Text("class_id_mu"), new FloatWritable(class_id_mu[i])); t[i].put(new Text("class_id_std"), new FloatWritable(class_id_std[i])); } probablity_info_output.set(key_index, t); }