List of usage examples for org.apache.hadoop.io IntWritable set
public void set(int value)
From source file:org.apache.hama.computemodel.mapreduce.Mapper.java
License:Apache License
@Override protected void compute( BSPPeer<K1, V1, K2, V2, WritableKeyValues<? extends WritableComparable<?>, ? extends Writable>> peer) throws IOException { this.memoryQueue = new PriorityQueue<WritableKeyValues<K2, V2>>(); this.globalKeyDistribution = new long[peer.getNumPeers()][peer.getNumPeers()]; int myId = peer.getPeerId(); OutputCollector<K2, V2> collector = new BSPMapperOutputCollector<K1, V1, K2, V2>(peer, memoryQueue, globalKeyDistribution[myId]); KeyValuePair<K1, V1> record = null; while ((record = peer.readNext()) != null) { map(record.getKey(), record.getValue(), collector); }/* w w w. j a v a 2s . co m*/ Comparator<V2> valComparator = null; Configuration conf = peer.getConfiguration(); Class<?> comparatorClass = conf.getClass(VALUE_COMPARATOR_CLASS, null); if (comparatorClass != null) { valComparator = (Comparator<V2>) ReflectionUtils.newInstance(comparatorClass, conf); } Reducer<K2, V2, K2, V2> combiner = null; Class<?> combinerClass = conf.getClass(COMBINER_CLASS, null); if (combinerClass != null) { combiner = (Reducer<K2, V2, K2, V2>) ReflectionUtils.newInstance(combinerClass, conf); } ExecutorService service = Executors.newFixedThreadPool(1); Future<Integer> future = service.submit(new CombineAndSortThread<K2, V2>(peer.getConfiguration(), this.memoryQueue, valComparator, combiner)); String[] peers = peer.getAllPeerNames(); IntWritable keyPartition = new IntWritable(); LongWritable value = new LongWritable(); WritableKeyValues<IntWritable, IntWritable> myIdTuple = new WritableKeyValues<IntWritable, IntWritable>( new IntWritable(peer.getPeerId()), new IntWritable(-1)); int peerId = peer.getPeerId(); for (int keyNumber = 0; keyNumber < globalKeyDistribution[0].length; ++keyNumber) { keyPartition.set(keyNumber); value.set(globalKeyDistribution[peerId][keyNumber]); myIdTuple.setValue(keyPartition); for (String peerName : peers) { peer.send(peerName, new WritableKeyValues<WritableKeyValues<IntWritable, IntWritable>, LongWritable>(myIdTuple, value)); } } peer.save(KEY_DIST, this.globalKeyDistribution); peer.save(COMBINER_FUTURE, future); peer.save(MESSAGE_QUEUE, this.memoryQueue); }
From source file:org.apache.hama.ml.kmeans.KMeansBSP.java
License:Apache License
private void recalculateAssignmentsAndWrite( BSPPeer<VectorWritable, NullWritable, IntWritable, VectorWritable, CenterMessage> peer) throws IOException { final NullWritable value = NullWritable.get(); // also use our cache to speed up the final writes if exists if (cache == null) { final VectorWritable key = new VectorWritable(); IntWritable keyWrite = new IntWritable(); while (peer.readNext(key, value)) { final int lowestDistantCenter = getNearestCenter(key.getVector()); keyWrite.set(lowestDistantCenter); peer.write(keyWrite, key);//from w ww . j a v a2 s.c om } } else { IntWritable keyWrite = new IntWritable(); for (DoubleVector v : cache) { final int lowestDistantCenter = getNearestCenter(v); keyWrite.set(lowestDistantCenter); peer.write(keyWrite, new VectorWritable(v)); } } // just on the first task write the centers to filesystem to prevent // collisions if (peer.getPeerName().equals(peer.getPeerName(0))) { String pathString = conf.get(CENTER_OUT_PATH); if (pathString != null) { final SequenceFile.Writer dataWriter = SequenceFile.createWriter(FileSystem.get(conf), conf, new Path(pathString), VectorWritable.class, NullWritable.class, CompressionType.NONE); for (DoubleVector center : centers) { dataWriter.append(new VectorWritable(center), value); } dataWriter.close(); } } }
From source file:org.apache.ignite.internal.processors.hadoop.impl.shuffle.collections.HadoopConcurrentHashMultimapSelftest.java
License:Apache License
/** * @throws Exception if failed.//from w ww. j a v a 2s.c o m */ public void testMultiThreaded() throws Exception { GridUnsafeMemory mem = new GridUnsafeMemory(0); X.println("___ Started"); Random rnd = new GridRandom(); for (int i = 0; i < 20; i++) { HadoopJobInfo job = new JobInfo(); final HadoopTaskContext taskCtx = new TaskContext(); final HadoopConcurrentHashMultimap m = new HadoopConcurrentHashMultimap(job, mem, 16); final ConcurrentMap<Integer, Collection<Integer>> mm = new ConcurrentHashMap<>(); X.println("___ MT"); multithreaded(new Callable<Object>() { @Override public Object call() throws Exception { X.println("___ TH in"); Random rnd = new GridRandom(); IntWritable key = new IntWritable(); IntWritable val = new IntWritable(); HadoopMultimap.Adder a = m.startAdding(taskCtx); for (int i = 0; i < 50000; i++) { int k = rnd.nextInt(32000); int v = rnd.nextInt(); key.set(k); val.set(v); a.write(key, val); Collection<Integer> list = mm.get(k); if (list == null) { list = new ConcurrentLinkedQueue<>(); Collection<Integer> old = mm.putIfAbsent(k, list); if (old != null) list = old; } list.add(v); } a.close(); X.println("___ TH out"); return null; } }, 3 + rnd.nextInt(27)); X.println("___ Check: " + m.capacity()); assertEquals(mm.size(), m.keys()); assertTrue(m.capacity() > 32000); HadoopTaskInput in = m.input(taskCtx); while (in.next()) { IntWritable key = (IntWritable) in.key(); Iterator<?> valsIter = in.values(); Collection<Integer> vals = mm.remove(key.get()); assertNotNull(vals); while (valsIter.hasNext()) { IntWritable val = (IntWritable) valsIter.next(); assertTrue(vals.remove(val.get())); } assertTrue(vals.isEmpty()); } in.close(); m.close(); assertEquals(0, mem.allocatedSize()); } }
From source file:org.apache.ignite.internal.processors.hadoop.impl.shuffle.collections.HadoopSkipListSelfTest.java
License:Apache License
/** * @throws Exception if failed.// w w w. jav a 2s. c om */ public void testMultiThreaded() throws Exception { GridUnsafeMemory mem = new GridUnsafeMemory(0); X.println("___ Started"); Random rnd = new GridRandom(); for (int i = 0; i < 20; i++) { HadoopJobInfo job = new JobInfo(); final HadoopTaskContext taskCtx = new TaskContext(); final HadoopMultimap m = new HadoopSkipList(job, mem); final ConcurrentMap<Integer, Collection<Integer>> mm = new ConcurrentHashMap<>(); X.println("___ MT"); multithreaded(new Callable<Object>() { @Override public Object call() throws Exception { X.println("___ TH in"); Random rnd = new GridRandom(); IntWritable key = new IntWritable(); IntWritable val = new IntWritable(); HadoopMultimap.Adder a = m.startAdding(taskCtx); for (int i = 0; i < 50000; i++) { int k = rnd.nextInt(32000); int v = rnd.nextInt(); key.set(k); val.set(v); a.write(key, val); Collection<Integer> list = mm.get(k); if (list == null) { list = new ConcurrentLinkedQueue<>(); Collection<Integer> old = mm.putIfAbsent(k, list); if (old != null) list = old; } list.add(v); } a.close(); X.println("___ TH out"); return null; } }, 3 + rnd.nextInt(27)); HadoopTaskInput in = m.input(taskCtx); int prevKey = Integer.MIN_VALUE; while (in.next()) { IntWritable key = (IntWritable) in.key(); assertTrue(key.get() > prevKey); prevKey = key.get(); Iterator<?> valsIter = in.values(); Collection<Integer> vals = mm.remove(key.get()); assertNotNull(vals); while (valsIter.hasNext()) { IntWritable val = (IntWritable) valsIter.next(); assertTrue(vals.remove(val.get())); } assertTrue(vals.isEmpty()); } in.close(); m.close(); assertEquals(0, mem.allocatedSize()); } }
From source file:org.apache.ignite.internal.processors.hadoop.shuffle.collections.GridHadoopConcurrentHashMultimapSelftest.java
License:Apache License
/** * @throws Exception if failed./*from ww w . j a v a 2 s. com*/ */ public void testMultiThreaded() throws Exception { GridUnsafeMemory mem = new GridUnsafeMemory(0); X.println("___ Started"); Random rnd = new GridRandom(); for (int i = 0; i < 20; i++) { GridHadoopJobInfo job = new JobInfo(); final GridHadoopTaskContext taskCtx = new TaskContext(); final GridHadoopConcurrentHashMultimap m = new GridHadoopConcurrentHashMultimap(job, mem, 16); final ConcurrentMap<Integer, Collection<Integer>> mm = new ConcurrentHashMap<>(); X.println("___ MT"); multithreaded(new Callable<Object>() { @Override public Object call() throws Exception { X.println("___ TH in"); Random rnd = new GridRandom(); IntWritable key = new IntWritable(); IntWritable val = new IntWritable(); GridHadoopMultimap.Adder a = m.startAdding(taskCtx); for (int i = 0; i < 50000; i++) { int k = rnd.nextInt(32000); int v = rnd.nextInt(); key.set(k); val.set(v); a.write(key, val); Collection<Integer> list = mm.get(k); if (list == null) { list = new ConcurrentLinkedQueue<>(); Collection<Integer> old = mm.putIfAbsent(k, list); if (old != null) list = old; } list.add(v); } a.close(); X.println("___ TH out"); return null; } }, 3 + rnd.nextInt(27)); X.println("___ Check: " + m.capacity()); assertEquals(mm.size(), m.keys()); assertTrue(m.capacity() > 32000); GridHadoopTaskInput in = m.input(taskCtx); while (in.next()) { IntWritable key = (IntWritable) in.key(); Iterator<?> valsIter = in.values(); Collection<Integer> vals = mm.remove(key.get()); assertNotNull(vals); while (valsIter.hasNext()) { IntWritable val = (IntWritable) valsIter.next(); assertTrue(vals.remove(val.get())); } assertTrue(vals.isEmpty()); } in.close(); m.close(); assertEquals(0, mem.allocatedSize()); } }
From source file:org.apache.ignite.internal.processors.hadoop.shuffle.collections.GridHadoopSkipListSelfTest.java
License:Apache License
/** * @throws Exception if failed.//from w w w. j av a 2s . com */ public void testMultiThreaded() throws Exception { GridUnsafeMemory mem = new GridUnsafeMemory(0); X.println("___ Started"); Random rnd = new GridRandom(); for (int i = 0; i < 20; i++) { GridHadoopJobInfo job = new JobInfo(); final GridHadoopTaskContext taskCtx = new TaskContext(); final GridHadoopMultimap m = new GridHadoopSkipList(job, mem); final ConcurrentMap<Integer, Collection<Integer>> mm = new ConcurrentHashMap<>(); X.println("___ MT"); multithreaded(new Callable<Object>() { @Override public Object call() throws Exception { X.println("___ TH in"); Random rnd = new GridRandom(); IntWritable key = new IntWritable(); IntWritable val = new IntWritable(); GridHadoopMultimap.Adder a = m.startAdding(taskCtx); for (int i = 0; i < 50000; i++) { int k = rnd.nextInt(32000); int v = rnd.nextInt(); key.set(k); val.set(v); a.write(key, val); Collection<Integer> list = mm.get(k); if (list == null) { list = new ConcurrentLinkedQueue<>(); Collection<Integer> old = mm.putIfAbsent(k, list); if (old != null) list = old; } list.add(v); } a.close(); X.println("___ TH out"); return null; } }, 3 + rnd.nextInt(27)); GridHadoopTaskInput in = m.input(taskCtx); int prevKey = Integer.MIN_VALUE; while (in.next()) { IntWritable key = (IntWritable) in.key(); assertTrue(key.get() > prevKey); prevKey = key.get(); Iterator<?> valsIter = in.values(); Collection<Integer> vals = mm.remove(key.get()); assertNotNull(vals); while (valsIter.hasNext()) { IntWritable val = (IntWritable) valsIter.next(); assertTrue(vals.remove(val.get())); } assertTrue(vals.isEmpty()); } in.close(); m.close(); assertEquals(0, mem.allocatedSize()); } }
From source file:org.apache.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob.java
License:Apache License
private void initializeM(Vector averageRatings) throws IOException { Random random = RandomUtils.getRandom(); FileSystem fs = FileSystem.get(pathToM(-1).toUri(), getConf()); SequenceFile.Writer writer = null; try {/*w w w . jav a 2 s . c o m*/ writer = new SequenceFile.Writer(fs, getConf(), new Path(pathToM(-1), "part-m-00000"), IntWritable.class, VectorWritable.class); IntWritable index = new IntWritable(); VectorWritable featureVector = new VectorWritable(); for (Vector.Element e : averageRatings.nonZeroes()) { Vector row = new DenseVector(numFeatures); row.setQuick(0, e.get()); for (int m = 1; m < numFeatures; m++) { row.setQuick(m, random.nextDouble()); } index.set(e.index()); featureVector.set(row); writer.append(index, featureVector); } } finally { Closeables.close(writer, false); } }
From source file:org.apache.mahout.classifier.rbm.MnistPreparer.java
License:Apache License
/** * To only process 44.000 images like the paper [hinton,2006] proposed, choose size 44000 * (http://www.cs.toronto.edu/~hinton/absps/ncfast.pdf) * * @param args the args//from ww w . ja v a2s . c o m * @return the int * @throws Exception the exception */ @Override public int run(String[] args) throws Exception { addOutputOption(); //chunknumber 600 gives nullpointer exception??? addOption("chunknumber", "cnr", "number of chunks to be created", true); addOption("labelpath", "l", "path to the label file", true); addOption("imagepath", "i", "path to image file", true); addOption("size", "s", "number of pairs to be processed", true); Map<String, String> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } Path output = getOutputPath(); FileSystem fileSystem = output.getFileSystem(getConf()); HadoopUtil.delete(getConf(), getOutputPath()); fileSystem.mkdirs(output); DataInputStream dataReader = new DataInputStream(new FileInputStream(new File(getOption("imagepath")))); DataInputStream labelReader = new DataInputStream(new FileInputStream(new File(getOption("labelpath")))); labelReader.skipBytes(8); dataReader.skipBytes(16); int label; IntWritable labelVector = new IntWritable(); VectorWritable imageVector = new VectorWritable(new DenseVector(28 * 28)); double[] pixels = new double[28 * 28]; Integer chunks = Integer.parseInt(getOption("chunknumber")); Integer size = Integer.parseInt(getOption("size")); SequenceFile.Writer[] writer = new SequenceFile.Writer[chunks]; int writernr = 0; Integer closedwriters = 0; int cntr = 0; //counter for the ten labels, each batch should have size/chunks /10(labels) examples of each label Integer[][] batches = new Integer[chunks][10]; for (int i = 0; i < batches.length; i++) { for (int j = 0; j < 10; j++) batches[i][j] = size / (10 * chunks); } try { while (cntr < size) { writernr = -1; label = labelReader.readUnsignedByte(); labelVector.set(label); for (int i = 0; i < pixels.length; i++) { pixels[i] = Double.valueOf(String.valueOf(dataReader.readUnsignedByte())) / 255.0; } for (int i = closedwriters; i < chunks; i++) { if (batches[i][label] > 0) { writernr = i; //open writers only when they are needed if (writer[writernr] == null) writer[writernr] = new Writer(fileSystem, getConf(), new Path(output, "chunk" + i), IntWritable.class, VectorWritable.class); break; } else //close writers, that are opened, yet finished for (int j = 0; j < 10; j++) { if (batches[i][j] != 0) break; if (j == 9) { writer[i].close(); closedwriters++; } } } if (closedwriters >= chunks) break; if (writernr == -1) continue; cntr++; if (cntr % 1000 == 0) Logger.getLogger(this.getClass()).info(cntr + " processed pairs"); imageVector.get().assign(pixels); writer[writernr].append(labelVector, imageVector); batches[writernr][label]--; } } catch (EOFException ex) { if (writernr > -1) //close last writer Closeables.closeQuietly(writer[writernr]); } if (writernr > -1) Closeables.closeQuietly(writer[writernr]); return 0; }
From source file:org.apache.mahout.common.DummyRecordWriterTest.java
License:Apache License
@Test public void testWrite() { DummyRecordWriter<IntWritable, VectorWritable> writer = new DummyRecordWriter<IntWritable, VectorWritable>(); IntWritable reusableIntWritable = new IntWritable(); VectorWritable reusableVectorWritable = new VectorWritable(); reusableIntWritable.set(0); reusableVectorWritable.set(new DenseVector(new double[] { 1, 2, 3 })); writer.write(reusableIntWritable, reusableVectorWritable); reusableIntWritable.set(1);//from w w w . j av a 2s .c o m reusableVectorWritable.set(new DenseVector(new double[] { 4, 5, 6 })); writer.write(reusableIntWritable, reusableVectorWritable); Assert.assertEquals("The writer must remember the two keys that is written to it", 2, writer.getKeys().size()); }
From source file:org.apache.mahout.common.IntTuple.java
License:Apache License
@Override public void write(DataOutput out) throws IOException { out.writeInt(tuple.size());/*from w w w .j av a 2s . c o m*/ IntWritable value = new IntWritable(); for (int entry : tuple.elements()) { value.set(entry); value.write(out); } }