List of usage examples for org.apache.hadoop.io IntWritable get
public int get()
From source file:org.apache.mahout.clustering.topdown.postprocessor.ClusterOutputPostProcessorReducer.java
License:Apache License
/** * The key is the remapped cluster id and the values contains the vectors in that cluster. *//*from w w w. j a v a 2s. c om*/ @Override protected void reduce(IntWritable key, Iterable<VectorWritable> values, Context context) throws IOException, InterruptedException { //remap the cluster back to its original id //and then output the vectors with their correct //cluster id. IntWritable outKey = new IntWritable(reverseClusterMappings.get(key.get())); System.out.println(outKey + " this: " + this); for (VectorWritable value : values) { context.write(outKey, value); } }
From source file:org.apache.mahout.common.IntTuple.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { int len = in.readInt(); tuple = new IntArrayList(len); IntWritable value = new IntWritable(); for (int i = 0; i < len; i++) { value.readFields(in);/*w w w . ja v a 2 s . c o m*/ tuple.add(value.get()); } }
From source file:org.apache.mahout.common.mapreduce.TransposeMapper.java
License:Apache License
@Override protected void map(IntWritable r, VectorWritable v, Context ctx) throws IOException, InterruptedException { int row = r.get(); for (Vector.Element e : v.get().nonZeroes()) { RandomAccessSparseVector tmp = new RandomAccessSparseVector(newNumCols, 1); tmp.setQuick(row, e.get());//from www.java 2 s . co m r.set(e.index()); ctx.write(r, new VectorWritable(tmp)); } }
From source file:org.apache.mahout.df.mapred.inmem.InMemBuilder.java
License:Apache License
@Override protected DecisionForest parseOutput(JobConf conf, PredictionCallback callback) throws IOException { Map<Integer, MapredOutput> output = new HashMap<Integer, MapredOutput>(); Path outputPath = getOutputPath(conf); FileSystem fs = outputPath.getFileSystem(conf); Path[] outfiles = DFUtils.listOutputFiles(fs, outputPath); // import the InMemOutputs IntWritable key = new IntWritable(); MapredOutput value = new MapredOutput(); for (Path path : outfiles) { Reader reader = new Reader(fs, path, conf); try {//from ww w.j a v a2s.com while (reader.next(key, value)) { output.put(key.get(), value.clone()); } } finally { reader.close(); } } return processOutput(output, callback); }
From source file:org.apache.mahout.df.mapred.inmem.InMemInputFormatTest.java
License:Apache License
public void testRecordReader() throws Exception { int n = 1;/*from w ww .j a v a2 s . com*/ int maxNumSplits = 100; int maxNbTrees = 1000; Random rng = RandomUtils.getRandom(); for (int nloop = 0; nloop < n; nloop++) { int numSplits = rng.nextInt(maxNumSplits) + 1; int nbTrees = rng.nextInt(maxNbTrees) + 1; JobConf conf = new JobConf(); Builder.setNbTrees(conf, nbTrees); InMemInputFormat inputFormat = new InMemInputFormat(); InputSplit[] splits = inputFormat.getSplits(conf, numSplits); for (int index = 0; index < numSplits; index++) { InMemInputSplit split = (InMemInputSplit) splits[index]; InMemRecordReader reader = (InMemRecordReader) inputFormat.getRecordReader(split, conf, null); for (int tree = 0; tree < split.getNbTrees(); tree++) { IntWritable key = reader.createKey(); NullWritable value = reader.createValue(); // reader.next() should return true until there is no tree left assertEquals(tree < split.getNbTrees(), reader.next(key, value)); assertEquals(split.getFirstId() + tree, key.get()); } } } }
From source file:org.apache.mahout.df.mapred.inmem.InMemMapper.java
License:Apache License
public void map(IntWritable key, OutputCollector<IntWritable, MapredOutput> output, InMemInputSplit split) throws IOException { SingleTreePredictions callback = null; int[] predictions = null; if (isOobEstimate() && !isNoOutput()) { callback = new SingleTreePredictions(data.size()); predictions = callback.getPredictions(); }/*from ww w .ja v a 2 s . com*/ initRandom(split); log.debug("Building..."); Node tree = bagging.build(key.get(), rng, callback); if (!isNoOutput()) { log.debug("Outputing..."); MapredOutput mrOut = new MapredOutput(tree, predictions); output.collect(key, mrOut); } }
From source file:org.apache.mahout.df.mapred.partial.Step0Job.java
License:Apache License
/** * Extracts the output and processes it//from w w w . j ava 2s .c o m * * @param job * * @return firstIds for each partition in Hadoop's order * @throws IOException */ protected Step0Output[] parseOutput(JobConf job) throws IOException { int numMaps = job.getNumMapTasks(); FileSystem fs = outputPath.getFileSystem(job); Path[] outfiles = DFUtils.listOutputFiles(fs, outputPath); int[] keys = new int[numMaps]; Step0Output[] values = new Step0Output[numMaps]; // read all the outputs IntWritable key = new IntWritable(); Step0Output value = new Step0Output(0L, 0); int index = 0; for (Path path : outfiles) { Reader reader = new Reader(fs, path, job); try { while (reader.next(key, value)) { keys[index] = key.get(); values[index] = value.clone(); index++; } } finally { reader.close(); } } return processOutput(keys, values); }
From source file:org.apache.mahout.df.mapreduce.inmem.InMemBuilder.java
License:Apache License
@Override protected DecisionForest parseOutput(Job job, PredictionCallback callback) throws IOException { Configuration conf = job.getConfiguration(); Map<Integer, MapredOutput> output = new HashMap<Integer, MapredOutput>(); Path outputPath = getOutputPath(conf); FileSystem fs = outputPath.getFileSystem(conf); Path[] outfiles = DFUtils.listOutputFiles(fs, outputPath); // import the InMemOutputs IntWritable key = new IntWritable(); MapredOutput value = new MapredOutput(); for (Path path : outfiles) { Reader reader = new Reader(fs, path, conf); try {// ww w . j a va 2s . com while (reader.next(key, value)) { output.put(key.get(), value.clone()); } } finally { reader.close(); } } return processOutput(output, callback); }
From source file:org.apache.mahout.df.mapreduce.inmem.InMemMapper.java
License:Apache License
protected void map(IntWritable key, Context context) throws IOException, InterruptedException { SingleTreePredictions callback = null; int[] predictions = null; if (isOobEstimate() && !isNoOutput()) { callback = new SingleTreePredictions(data.size()); predictions = callback.getPredictions(); }//w w w . ja va2s . com initRandom((InMemInputSplit) context.getInputSplit()); log.debug("Building..."); Node tree = bagging.build(key.get(), rng, callback); if (!isNoOutput()) { log.debug("Outputing..."); MapredOutput mrOut = new MapredOutput(tree, predictions); context.write(key, mrOut); } }
From source file:org.apache.mahout.df.mapreduce.partial.Step0Job.java
License:Apache License
/** * Extracts the output and processes it//from w ww .j av a 2 s. c om * * @return info for each partition in Hadoop's order * @throws IOException */ protected Step0Output[] parseOutput(JobContext job) throws IOException { Configuration conf = job.getConfiguration(); log.info("mapred.map.tasks = {}", conf.getInt("mapred.map.tasks", -1)); FileSystem fs = outputPath.getFileSystem(conf); Path[] outfiles = DFUtils.listOutputFiles(fs, outputPath); List<Integer> keys = new ArrayList<Integer>(); List<Step0Output> values = new ArrayList<Step0Output>(); // read all the outputs IntWritable key = new IntWritable(); Step0Output value = new Step0Output(0L, 0); for (Path path : outfiles) { Reader reader = new Reader(fs, path, conf); try { while (reader.next(key, value)) { keys.add(key.get()); values.add(value.clone()); } } finally { reader.close(); } } return processOutput(keys, values); }