List of usage examples for org.apache.hadoop.io IntWritable get
public int get()
From source file:org.apache.mahout.clustering.cdbw.CDbwMapper.java
License:Apache License
public static Map<Integer, List<VectorWritable>> getRepresentativePoints(Configuration conf) { String statePath = conf.get(CDbwDriver.STATE_IN_KEY); Map<Integer, List<VectorWritable>> representativePoints = new HashMap<Integer, List<VectorWritable>>(); try {//from ww w.j a v a2s . co m Path path = new Path(statePath); FileSystem fs = FileSystem.get(path.toUri(), conf); FileStatus[] status = fs.listStatus(path, new OutputLogFilter()); for (FileStatus s : status) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf); try { IntWritable key = new IntWritable(0); VectorWritable point = new VectorWritable(); while (reader.next(key, point)) { List<VectorWritable> repPoints = representativePoints.get(key.get()); if (repPoints == null) { repPoints = new ArrayList<VectorWritable>(); representativePoints.put(key.get(), repPoints); } repPoints.add(point); point = new VectorWritable(); } } finally { reader.close(); } } return representativePoints; } catch (IOException e) { throw new IllegalStateException(e); } }
From source file:org.apache.mahout.clustering.cdbw.CDbwReducer.java
License:Apache License
@Override protected void reduce(IntWritable key, Iterable<WeightedVectorWritable> values, Context context) throws IOException, InterruptedException { // find the most distant point WeightedVectorWritable mdp = null;// ww w. j a v a 2s.c o m for (WeightedVectorWritable dpw : values) { if (mdp == null || mdp.getWeight() < dpw.getWeight()) { mdp = new WeightedVectorWritable(dpw.getWeight(), dpw.getVector()); } } context.write(new IntWritable(key.get()), mdp.getVector()); }
From source file:org.apache.mahout.clustering.evaluation.RepresentativePointsMapper.java
License:Apache License
public static void mapPoint(IntWritable clusterId, WeightedVectorWritable point, DistanceMeasure measure, Map<Integer, List<VectorWritable>> representativePoints, Map<Integer, WeightedVectorWritable> mostDistantPoints) { int key = clusterId.get(); WeightedVectorWritable currentMDP = mostDistantPoints.get(key); List<VectorWritable> repPoints = representativePoints.get(key); double totalDistance = 0.0; if (repPoints != null) { for (VectorWritable refPoint : repPoints) { totalDistance += measure.distance(refPoint.get(), point.getVector()); }/*from w w w. j a v a2 s . c o m*/ } if (currentMDP == null || currentMDP.getWeight() < totalDistance) { mostDistantPoints.put(key, new WeightedVectorWritable(totalDistance, point.getVector().clone())); } }
From source file:org.apache.mahout.clustering.evaluation.RepresentativePointsReducer.java
License:Apache License
@Override protected void reduce(IntWritable key, Iterable<WeightedVectorWritable> values, Context context) throws IOException, InterruptedException { // find the most distant point WeightedVectorWritable mdp = null;/*w w w . ja v a 2 s . com*/ for (WeightedVectorWritable dpw : values) { if (mdp == null || mdp.getWeight() < dpw.getWeight()) { mdp = new WeightedVectorWritable(dpw.getWeight(), dpw.getVector()); } } context.write(new IntWritable(key.get()), new VectorWritable(mdp.getVector())); }
From source file:org.apache.mahout.clustering.spectral.AffinityMatrixInputReducer.java
License:Apache License
@Override protected void reduce(IntWritable row, Iterable<DistributedRowMatrix.MatrixEntryWritable> values, Context context) throws IOException, InterruptedException { int size = context.getConfiguration().getInt(Keys.AFFINITY_DIMENSIONS, Integer.MAX_VALUE); RandomAccessSparseVector out = new RandomAccessSparseVector(size, 100); for (DistributedRowMatrix.MatrixEntryWritable element : values) { out.setQuick(element.getCol(), element.getVal()); if (log.isDebugEnabled()) { log.debug("(DEBUG - REDUCE) Row[{}], Column[{}], Value[{}]", row.get(), element.getCol(), element.getVal());/*from w w w .ja v a 2s. c o m*/ } } SequentialAccessSparseVector output = new SequentialAccessSparseVector(out); context.write(row, new VectorWritable(output)); }
From source file:org.apache.mahout.clustering.spectral.common.AffinityMatrixInputReducer.java
License:Apache License
@Override protected void reduce(IntWritable row, Iterable<DistributedRowMatrix.MatrixEntryWritable> values, Context context) throws IOException, InterruptedException { int size = context.getConfiguration().getInt(EigencutsKeys.AFFINITY_DIMENSIONS, Integer.MAX_VALUE); RandomAccessSparseVector out = new RandomAccessSparseVector(size, 100); for (DistributedRowMatrix.MatrixEntryWritable element : values) { out.setQuick(element.getCol(), element.getVal()); if (log.isDebugEnabled()) { log.debug("(DEBUG - REDUCE) Row[{}], Column[{}], Value[{}]", new Object[] { row.get(), element.getCol(), element.getVal() }); }/*from w w w. jav a 2 s . c o m*/ } SequentialAccessSparseVector output = new SequentialAccessSparseVector(out); context.write(row, new VectorWritable(output)); }
From source file:org.apache.mahout.clustering.spectral.eigencuts.EigencutsSensitivityMapper.java
License:Apache License
@Override protected void map(IntWritable row, VectorWritable vw, Context context) throws IOException, InterruptedException { // first, does this particular eigenvector even pass the required threshold? double eigenvalue = Math.abs(eigenvalues.get(row.get())); double betak = -Functions.LOGARITHM.apply(2) / Functions.LOGARITHM.apply(eigenvalue); if (eigenvalue >= 1.0 || betak <= epsilon * beta0) { // doesn't pass the threshold! quit return;/*from ww w . j a va 2s. com*/ } // go through the vector, performing the calculations // sadly, no way to get around n^2 computations Map<Integer, EigencutsSensitivityNode> columns = Maps.newHashMap(); Vector ev = vw.get(); for (int i = 0; i < ev.size(); i++) { double minsij = Double.MAX_VALUE; int minInd = -1; for (int j = 0; j < ev.size(); j++) { double sij = performSensitivityCalculation(eigenvalue, ev.get(i), ev.get(j), diagonal.get(i), diagonal.get(j)); // perform non-maximal suppression // is this the smallest value in the row? if (sij < minsij) { minsij = sij; minInd = j; } } // is this the smallest value in the column? Integer column = minInd; EigencutsSensitivityNode value = new EigencutsSensitivityNode(i, minInd, minsij); if (!columns.containsKey(column)) { columns.put(column, value); } else if (columns.get(column).getSensitivity() > minsij) { columns.remove(column); columns.put(column, value); } } // write whatever values made it through for (EigencutsSensitivityNode e : columns.values()) { context.write(new IntWritable(e.getRow()), e); } }
From source file:org.apache.mahout.clustering.spectral.eigencuts.TestEigencutsAffinityCutsJob.java
License:Apache License
/** * This is by far the trickiest step. However, an easy condition is if * we have only two vertices - indicating vertices on the diagonal of the * two matrices - then we simply exit (since the algorithm does not operate * on the diagonal; it makes no sense to perform cuts by isolating data * points from themselves).//from w w w.ja v a 2 s . c om * * If there are four points, then first we must separate the two which * belong to the affinity matrix from the two that are sensitivities. In theory, * each pair should have exactly the same value (symmetry). If the sensitivity * is below a certain threshold, then we set the two values of the affinity * matrix to 0 (but not before adding the affinity values to the diagonal, so * as to maintain the overall sum of the row of the affinity matrix). * * @throws Exception */ @Test public void testEigencutsAffinityCutsCombiner() throws Exception { Configuration conf = new Configuration(); Path affinity = new Path("affinity"); Path sensitivity = new Path("sensitivity"); conf.set(EigencutsKeys.AFFINITY_PATH, affinity.getName()); conf.setInt(EigencutsKeys.AFFINITY_DIMENSIONS, this.affinity.length); // since we need the working paths to distinguish the vertex types, // we can't use the mapper (since we have no way of manually setting // the Context.workingPath() ) Map<Text, List<VertexWritable>> data = buildMapData(affinity, sensitivity, this.sensitivity); // now, set up the combiner EigencutsAffinityCutsCombiner combiner = new EigencutsAffinityCutsCombiner(); DummyRecordWriter<Text, VertexWritable> redWriter = new DummyRecordWriter<Text, VertexWritable>(); Reducer<Text, VertexWritable, Text, VertexWritable>.Context redContext = DummyRecordWriter.build(combiner, conf, redWriter, Text.class, VertexWritable.class); // perform the combining for (Map.Entry<Text, List<VertexWritable>> entry : data.entrySet()) { combiner.reduce(entry.getKey(), entry.getValue(), redContext); } // test the number of cuts, there should be 2 assertEquals("Number of cuts detected", 4, redContext.getCounter(EigencutsAffinityCutsJob.CUTSCOUNTER.NUM_CUTS).getValue()); // loop through all the results; let's see if they match up to our // affinity matrix (and all the cuts appear where they should Map<Text, List<VertexWritable>> results = redWriter.getData(); for (Map.Entry<Text, List<VertexWritable>> entry : results.entrySet()) { List<VertexWritable> row = entry.getValue(); IntWritable key = new IntWritable(Integer.parseInt(entry.getKey().toString())); double calcDiag = 0.0; double trueDiag = sumOfRowCuts(key.get(), this.sensitivity); for (VertexWritable e : row) { // should the value have been cut, e.g. set to 0? if (key.get() == e.getCol()) { // we have our diagonal calcDiag += e.getValue(); } else if (this.sensitivity[key.get()][e.getCol()] == 0.0) { // no, corresponding affinity should have same value as before assertEquals("Preserved affinity value", this.affinity[key.get()][e.getCol()], e.getValue(), EPSILON); } else { // yes, corresponding affinity value should be 0 assertEquals("Cut affinity value", 0.0, e.getValue(), EPSILON); } } // check the diagonal has the correct sum assertEquals("Diagonal sum from cuts", trueDiag, calcDiag, EPSILON); } }
From source file:org.apache.mahout.clustering.spectral.eigencuts.TestEigencutsAffinityCutsJob.java
License:Apache License
/** * Fairly straightforward: the task here is to reassemble the rows of the * affinity matrix. The tricky part is that any specific element in the list * of elements which does NOT lay on the diagonal will be so because it * did not drop below the sensitivity threshold, hence it was not "cut". * //from w w w. j a va 2s. c om * On the flip side, there will be many entries whose coordinate is now * set to the diagonal, indicating they were previously affinity entries * whose sensitivities were below the threshold, and hence were "cut" - * set to 0 at their original coordinates, and had their values added to * the diagonal entry (hence the numerous entries with the coordinate of * the diagonal). * * @throws Exception */ @Test public void testEigencutsAffinityCutsReducer() throws Exception { Configuration conf = new Configuration(); Path affinity = new Path("affinity"); Path sensitivity = new Path("sensitivity"); conf.set(EigencutsKeys.AFFINITY_PATH, affinity.getName()); conf.setInt(EigencutsKeys.AFFINITY_DIMENSIONS, this.affinity.length); // since we need the working paths to distinguish the vertex types, // we can't use the mapper (since we have no way of manually setting // the Context.workingPath() ) Map<Text, List<VertexWritable>> data = buildMapData(affinity, sensitivity, this.sensitivity); // now, set up the combiner EigencutsAffinityCutsCombiner combiner = new EigencutsAffinityCutsCombiner(); DummyRecordWriter<Text, VertexWritable> comWriter = new DummyRecordWriter<Text, VertexWritable>(); Reducer<Text, VertexWritable, Text, VertexWritable>.Context comContext = DummyRecordWriter.build(combiner, conf, comWriter, Text.class, VertexWritable.class); // perform the combining for (Map.Entry<Text, List<VertexWritable>> entry : data.entrySet()) { combiner.reduce(entry.getKey(), entry.getValue(), comContext); } // finally, set up the reduction writers EigencutsAffinityCutsReducer reducer = new EigencutsAffinityCutsReducer(); DummyRecordWriter<IntWritable, VectorWritable> redWriter = new DummyRecordWriter<IntWritable, VectorWritable>(); Reducer<Text, VertexWritable, IntWritable, VectorWritable>.Context redContext = DummyRecordWriter .build(reducer, conf, redWriter, Text.class, VertexWritable.class); // perform the reduction for (Text key : comWriter.getKeys()) { reducer.reduce(key, comWriter.getValue(key), redContext); } // now, check that the affinity matrix is correctly formed for (IntWritable row : redWriter.getKeys()) { List<VectorWritable> results = redWriter.getValue(row); // there should only be 1 vector assertEquals("Only one vector with a given row number", 1, results.size()); Vector therow = results.get(0).get(); for (Vector.Element e : therow) { // check the diagonal if (row.get() == e.index()) { assertEquals("Correct diagonal sum of cuts", sumOfRowCuts(row.get(), this.sensitivity), e.get(), EPSILON); } else { // not on the diagonal...if it was an element labeled to be cut, // it should have a value of 0. Otherwise, it should have kept its // previous value if (this.sensitivity[row.get()][e.index()] == 0.0) { // should be what it was originally assertEquals("Preserved element", this.affinity[row.get()][e.index()], e.get(), EPSILON); } else { // should be 0 assertEquals("Cut element", 0.0, e.get(), EPSILON); } } } } }
From source file:org.apache.mahout.clustering.topdown.postprocessor.ClusterOutputPostProcessorMapper.java
License:Apache License
@Override public void map(IntWritable key, WeightedVectorWritable val, Context context) throws IOException, InterruptedException { // by pivoting on the cluster mapping value, we can make sure that each unique cluster goes to it's own reducer, // since they are numbered from 0 to k-1, where k is the number of clusters outputVector.set(val.getVector()); context.write(new IntWritable(newClusterMappings.get(key.get())), outputVector); }