List of usage examples for org.apache.hadoop.io NullWritable get
public static NullWritable get()
From source file:org.apache.gobblin.compaction.mapreduce.orc.OrcKeyDedupReducer.java
License:Apache License
@Override protected void initReusableObject() { outKey = NullWritable.get(); outValue = new OrcValue(); }
From source file:org.apache.gobblin.compaction.mapreduce.orc.OrcValueMapper.java
License:Apache License
@Override protected void map(NullWritable key, OrcStruct orcStruct, Context context) throws IOException, InterruptedException { if (context.getNumReduceTasks() == 0) { this.outKey.key = orcStruct; context.write(this.outKey, NullWritable.get()); } else {// ww w . ja va 2 s .co m this.outValue.value = orcStruct; context.write(getDedupKey(orcStruct), this.outValue); } context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1); }
From source file:org.apache.gobblin.compaction.mapreduce.OrcCompactionTaskTest.java
License:Apache License
public void writeOrcRecordsInFile(Path path, TypeDescription schema, List<OrcStruct> orcStructs) throws Exception { Configuration configuration = new Configuration(); OrcFile.WriterOptions options = OrcFile.writerOptions(configuration).setSchema(schema); Writer writer = OrcFile.createWriter(path, options); OrcMapreduceRecordWriter recordWriter = new OrcMapreduceRecordWriter(writer); for (OrcStruct orcRecord : orcStructs) { recordWriter.write(NullWritable.get(), orcRecord); }//from ww w. j av a 2 s. c o m recordWriter.close(new TaskAttemptContextImpl(configuration, new TaskAttemptID())); }
From source file:org.apache.hama.ml.kmeans.KMeansBSP.java
License:Apache License
@Override public final void setup(BSPPeer<VectorWritable, NullWritable, IntWritable, VectorWritable, CenterMessage> peer) throws IOException, InterruptedException { conf = peer.getConfiguration();//from w w w .j av a 2 s . com Path centroids = new Path(peer.getConfiguration().get(CENTER_IN_PATH)); FileSystem fs = FileSystem.get(peer.getConfiguration()); final ArrayList<DoubleVector> centers = new ArrayList<DoubleVector>(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, centroids, peer.getConfiguration()); VectorWritable key = new VectorWritable(); NullWritable value = NullWritable.get(); while (reader.next(key, value)) { DoubleVector center = key.getVector(); centers.add(center); } } catch (IOException e) { throw new RuntimeException(e); } finally { if (reader != null) { reader.close(); } } Preconditions.checkArgument(centers.size() > 0, "Centers file must contain at least a single center!"); this.centers = centers.toArray(new DoubleVector[centers.size()]); String distanceClass = peer.getConfiguration().get(DISTANCE_MEASURE_CLASS); if (distanceClass != null) { try { distanceMeasurer = ReflectionUtils.newInstance(distanceClass); } catch (ClassNotFoundException e) { throw new RuntimeException("Wrong DistanceMeasurer implementation " + distanceClass + " provided"); } } else { distanceMeasurer = new EuclidianDistance(); } maxIterations = peer.getConfiguration().getInt(MAX_ITERATIONS_KEY, -1); // normally we want to rely on OS caching, but if not, we can cache in heap if (peer.getConfiguration().getBoolean(CACHING_ENABLED_KEY, false)) { cache = new ArrayList<DoubleVector>(); } }
From source file:org.apache.hama.ml.kmeans.KMeansBSP.java
License:Apache License
private void assignCenters( BSPPeer<VectorWritable, NullWritable, IntWritable, VectorWritable, CenterMessage> peer) throws IOException { // each task has all the centers, if a center has been updated it // needs to be broadcasted. final DoubleVector[] newCenterArray = new DoubleVector[centers.length]; final int[] summationCount = new int[centers.length]; // if our cache is not enabled, iterate over the disk items if (cache == null) { // we have an assignment step final NullWritable value = NullWritable.get(); final VectorWritable key = new VectorWritable(); while (peer.readNext(key, value)) { assignCentersInternal(newCenterArray, summationCount, key.getVector().deepCopy()); }//from w w w. ja v a 2 s .c o m } else { // if our cache is enabled but empty, we have to read it from disk first if (cache.isEmpty()) { final NullWritable value = NullWritable.get(); final VectorWritable key = new VectorWritable(); while (peer.readNext(key, value)) { DoubleVector deepCopy = key.getVector().deepCopy(); cache.add(deepCopy); // but do the assignment directly assignCentersInternal(newCenterArray, summationCount, deepCopy); } } else { // now we can iterate in memory and check against the centers for (DoubleVector v : cache) { assignCentersInternal(newCenterArray, summationCount, v); } } } // now send messages about the local updates to each other peer for (int i = 0; i < newCenterArray.length; i++) { if (newCenterArray[i] != null) { for (String peerName : peer.getAllPeerNames()) { peer.send(peerName, new CenterMessage(i, summationCount[i], newCenterArray[i])); } } } }
From source file:org.apache.hama.ml.kmeans.KMeansBSP.java
License:Apache License
private void recalculateAssignmentsAndWrite( BSPPeer<VectorWritable, NullWritable, IntWritable, VectorWritable, CenterMessage> peer) throws IOException { final NullWritable value = NullWritable.get(); // also use our cache to speed up the final writes if exists if (cache == null) { final VectorWritable key = new VectorWritable(); IntWritable keyWrite = new IntWritable(); while (peer.readNext(key, value)) { final int lowestDistantCenter = getNearestCenter(key.getVector()); keyWrite.set(lowestDistantCenter); peer.write(keyWrite, key);//from w w w . j a va2 s.c o m } } else { IntWritable keyWrite = new IntWritable(); for (DoubleVector v : cache) { final int lowestDistantCenter = getNearestCenter(v); keyWrite.set(lowestDistantCenter); peer.write(keyWrite, new VectorWritable(v)); } } // just on the first task write the centers to filesystem to prevent // collisions if (peer.getPeerName().equals(peer.getPeerName(0))) { String pathString = conf.get(CENTER_OUT_PATH); if (pathString != null) { final SequenceFile.Writer dataWriter = SequenceFile.createWriter(FileSystem.get(conf), conf, new Path(pathString), VectorWritable.class, NullWritable.class, CompressionType.NONE); for (DoubleVector center : centers) { dataWriter.append(new VectorWritable(center), value); } dataWriter.close(); } } }
From source file:org.apache.hama.ml.kmeans.KMeansBSP.java
License:Apache License
/** * Reads the cluster centers.//from www . j a v a 2 s. c om * * @return an index on the key dimension, and a cluster center on the value. */ public static HashMap<Integer, DoubleVector> readClusterCenters(Configuration conf, Path out, Path centerPath, FileSystem fs) throws IOException { HashMap<Integer, DoubleVector> centerMap = new HashMap<Integer, DoubleVector>(); SequenceFile.Reader centerReader = new SequenceFile.Reader(fs, centerPath, conf); int index = 0; VectorWritable center = new VectorWritable(); while (centerReader.next(center, NullWritable.get())) { centerMap.put(index++, center.getVector()); } centerReader.close(); return centerMap; }
From source file:org.apache.hama.ml.kmeans.KMeansBSP.java
License:Apache License
/** * Reads input text files and writes it to a sequencefile. * /*from w w w .j a v a2s. c om*/ * @param k * @param conf * @param txtIn * @param center * @param out * @param fs * @param hasKey true if first column is required to be the key. * @return the path of a sequencefile. * @throws IOException */ public static Path prepareInputText(int k, Configuration conf, Path txtIn, Path center, Path out, FileSystem fs, boolean hasKey) throws IOException { Path in; if (fs.isFile(txtIn)) { in = new Path(txtIn.getParent(), "textinput/in.seq"); } else { in = new Path(txtIn, "textinput/in.seq"); } if (fs.exists(out)) fs.delete(out, true); if (fs.exists(center)) fs.delete(center, true); if (fs.exists(in)) fs.delete(in, true); final NullWritable value = NullWritable.get(); Writer centerWriter = new SequenceFile.Writer(fs, conf, center, VectorWritable.class, NullWritable.class); final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, in, VectorWritable.class, NullWritable.class, CompressionType.NONE); int i = 0; BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(txtIn))); String line; while ((line = br.readLine()) != null) { String[] split = line.split("\t"); int columnLength = split.length; int indexPos = 0; if (hasKey) { columnLength = columnLength - 1; indexPos++; } DenseDoubleVector vec = new DenseDoubleVector(columnLength); for (int j = 0; j < columnLength; j++) { vec.set(j, Double.parseDouble(split[j + indexPos])); } VectorWritable vector; if (hasKey) { NamedDoubleVector named = new NamedDoubleVector(split[0], vec); vector = new VectorWritable(named); } else { vector = new VectorWritable(vec); } dataWriter.append(vector, value); if (k > i) { centerWriter.append(vector, value); } i++; } br.close(); centerWriter.close(); dataWriter.close(); return in; }
From source file:org.apache.hama.ml.kmeans.KMeansBSP.java
License:Apache License
/** * Create some random vectors as input and assign the first k vectors as * intial centers.// ww w . java 2s . co m */ public static void prepareInput(int count, int k, int dimension, Configuration conf, Path in, Path center, Path out, FileSystem fs) throws IOException { if (fs.exists(out)) fs.delete(out, true); if (fs.exists(center)) fs.delete(center, true); if (fs.exists(in)) fs.delete(in, true); final SequenceFile.Writer centerWriter = SequenceFile.createWriter(fs, conf, center, VectorWritable.class, NullWritable.class, CompressionType.NONE); final NullWritable value = NullWritable.get(); final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, in, VectorWritable.class, NullWritable.class, CompressionType.NONE); Random r = new Random(); for (int i = 0; i < count; i++) { double[] arr = new double[dimension]; for (int d = 0; d < dimension; d++) { arr[d] = r.nextInt(count); } VectorWritable vector = new VectorWritable(new DenseDoubleVector(arr)); dataWriter.append(vector, value); if (k > i) { centerWriter.append(vector, value); } } centerWriter.close(); dataWriter.close(); }
From source file:org.apache.hama.pipes.TestPipes.java
License:Apache License
static void verifyOutput(HamaConfiguration conf, Path outputPath, double expectedResult, double delta) throws IOException { FileStatus[] listStatus = fs.listStatus(outputPath); for (FileStatus status : listStatus) { if (!status.isDir()) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, status.getPath(), conf); NullWritable key = NullWritable.get(); DoubleWritable value = new DoubleWritable(); if (reader.next(key, value)) { LOG.info("Output File: " + status.getPath()); LOG.info("key: '" + key + "' value: '" + value + "' expected: '" + expectedResult + "'"); assertEquals("Expected value: '" + expectedResult + "' != '" + value + "'", expectedResult, value.get(), delta); }//ww w . j a v a 2 s. com reader.close(); } } }