Example usage for org.apache.hadoop.io IntWritable get

List of usage examples for org.apache.hadoop.io IntWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable get.

Prototype

public int get() 

Source Link

Document

Return the value of this IntWritable.

Usage

From source file:org.apache.mahout.clustering.cdbw.CDbwMapper.java

License:Apache License

public static Map<Integer, List<VectorWritable>> getRepresentativePoints(Configuration conf) {
    String statePath = conf.get(CDbwDriver.STATE_IN_KEY);
    Map<Integer, List<VectorWritable>> representativePoints = new HashMap<Integer, List<VectorWritable>>();
    try {//from   ww  w.j  a v a2s  . co m
        Path path = new Path(statePath);
        FileSystem fs = FileSystem.get(path.toUri(), conf);
        FileStatus[] status = fs.listStatus(path, new OutputLogFilter());
        for (FileStatus s : status) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
            try {
                IntWritable key = new IntWritable(0);
                VectorWritable point = new VectorWritable();
                while (reader.next(key, point)) {
                    List<VectorWritable> repPoints = representativePoints.get(key.get());
                    if (repPoints == null) {
                        repPoints = new ArrayList<VectorWritable>();
                        representativePoints.put(key.get(), repPoints);
                    }
                    repPoints.add(point);
                    point = new VectorWritable();
                }
            } finally {
                reader.close();
            }
        }
        return representativePoints;
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
}

From source file:org.apache.mahout.clustering.cdbw.CDbwReducer.java

License:Apache License

@Override
protected void reduce(IntWritable key, Iterable<WeightedVectorWritable> values, Context context)
        throws IOException, InterruptedException {
    // find the most distant point
    WeightedVectorWritable mdp = null;// ww  w.  j  a  v a 2s.c  o m
    for (WeightedVectorWritable dpw : values) {
        if (mdp == null || mdp.getWeight() < dpw.getWeight()) {
            mdp = new WeightedVectorWritable(dpw.getWeight(), dpw.getVector());
        }
    }
    context.write(new IntWritable(key.get()), mdp.getVector());
}

From source file:org.apache.mahout.clustering.evaluation.RepresentativePointsMapper.java

License:Apache License

public static void mapPoint(IntWritable clusterId, WeightedVectorWritable point, DistanceMeasure measure,
        Map<Integer, List<VectorWritable>> representativePoints,
        Map<Integer, WeightedVectorWritable> mostDistantPoints) {
    int key = clusterId.get();
    WeightedVectorWritable currentMDP = mostDistantPoints.get(key);

    List<VectorWritable> repPoints = representativePoints.get(key);
    double totalDistance = 0.0;
    if (repPoints != null) {
        for (VectorWritable refPoint : repPoints) {
            totalDistance += measure.distance(refPoint.get(), point.getVector());
        }/*from   w  w  w. j  a v  a2 s . c  o m*/
    }
    if (currentMDP == null || currentMDP.getWeight() < totalDistance) {
        mostDistantPoints.put(key, new WeightedVectorWritable(totalDistance, point.getVector().clone()));
    }
}

From source file:org.apache.mahout.clustering.evaluation.RepresentativePointsReducer.java

License:Apache License

@Override
protected void reduce(IntWritable key, Iterable<WeightedVectorWritable> values, Context context)
        throws IOException, InterruptedException {
    // find the most distant point
    WeightedVectorWritable mdp = null;/*w  w w  . ja v  a  2 s .  com*/
    for (WeightedVectorWritable dpw : values) {
        if (mdp == null || mdp.getWeight() < dpw.getWeight()) {
            mdp = new WeightedVectorWritable(dpw.getWeight(), dpw.getVector());
        }
    }
    context.write(new IntWritable(key.get()), new VectorWritable(mdp.getVector()));
}

From source file:org.apache.mahout.clustering.spectral.AffinityMatrixInputReducer.java

License:Apache License

@Override
protected void reduce(IntWritable row, Iterable<DistributedRowMatrix.MatrixEntryWritable> values,
        Context context) throws IOException, InterruptedException {
    int size = context.getConfiguration().getInt(Keys.AFFINITY_DIMENSIONS, Integer.MAX_VALUE);
    RandomAccessSparseVector out = new RandomAccessSparseVector(size, 100);

    for (DistributedRowMatrix.MatrixEntryWritable element : values) {
        out.setQuick(element.getCol(), element.getVal());
        if (log.isDebugEnabled()) {
            log.debug("(DEBUG - REDUCE) Row[{}], Column[{}], Value[{}]", row.get(), element.getCol(),
                    element.getVal());/*from  w  w  w .ja  v a  2s.  c o m*/
        }
    }
    SequentialAccessSparseVector output = new SequentialAccessSparseVector(out);
    context.write(row, new VectorWritable(output));
}

From source file:org.apache.mahout.clustering.spectral.common.AffinityMatrixInputReducer.java

License:Apache License

@Override
protected void reduce(IntWritable row, Iterable<DistributedRowMatrix.MatrixEntryWritable> values,
        Context context) throws IOException, InterruptedException {
    int size = context.getConfiguration().getInt(EigencutsKeys.AFFINITY_DIMENSIONS, Integer.MAX_VALUE);
    RandomAccessSparseVector out = new RandomAccessSparseVector(size, 100);

    for (DistributedRowMatrix.MatrixEntryWritable element : values) {
        out.setQuick(element.getCol(), element.getVal());
        if (log.isDebugEnabled()) {
            log.debug("(DEBUG - REDUCE) Row[{}], Column[{}], Value[{}]",
                    new Object[] { row.get(), element.getCol(), element.getVal() });
        }/*from   w  w  w.  jav  a  2 s . c o m*/
    }
    SequentialAccessSparseVector output = new SequentialAccessSparseVector(out);
    context.write(row, new VectorWritable(output));
}

From source file:org.apache.mahout.clustering.spectral.eigencuts.EigencutsSensitivityMapper.java

License:Apache License

@Override
protected void map(IntWritable row, VectorWritable vw, Context context)
        throws IOException, InterruptedException {

    // first, does this particular eigenvector even pass the required threshold?
    double eigenvalue = Math.abs(eigenvalues.get(row.get()));
    double betak = -Functions.LOGARITHM.apply(2) / Functions.LOGARITHM.apply(eigenvalue);
    if (eigenvalue >= 1.0 || betak <= epsilon * beta0) {
        // doesn't pass the threshold! quit
        return;/*from ww  w  . j  a  va 2s. com*/
    }

    // go through the vector, performing the calculations
    // sadly, no way to get around n^2 computations      
    Map<Integer, EigencutsSensitivityNode> columns = Maps.newHashMap();
    Vector ev = vw.get();
    for (int i = 0; i < ev.size(); i++) {
        double minsij = Double.MAX_VALUE;
        int minInd = -1;
        for (int j = 0; j < ev.size(); j++) {
            double sij = performSensitivityCalculation(eigenvalue, ev.get(i), ev.get(j), diagonal.get(i),
                    diagonal.get(j));

            // perform non-maximal suppression
            // is this the smallest value in the row?
            if (sij < minsij) {
                minsij = sij;
                minInd = j;
            }
        }

        // is this the smallest value in the column?
        Integer column = minInd;
        EigencutsSensitivityNode value = new EigencutsSensitivityNode(i, minInd, minsij);
        if (!columns.containsKey(column)) {
            columns.put(column, value);
        } else if (columns.get(column).getSensitivity() > minsij) {
            columns.remove(column);
            columns.put(column, value);
        }
    }

    // write whatever values made it through

    for (EigencutsSensitivityNode e : columns.values()) {
        context.write(new IntWritable(e.getRow()), e);
    }
}

From source file:org.apache.mahout.clustering.spectral.eigencuts.TestEigencutsAffinityCutsJob.java

License:Apache License

/**
 * This is by far the trickiest step. However, an easy condition is if 
 * we have only two vertices - indicating vertices on the diagonal of the
 * two matrices - then we simply exit (since the algorithm does not operate
 * on the diagonal; it makes no sense to perform cuts by isolating data
 * points from themselves).//from w  w w.ja v  a  2  s  . c om
 * 
 * If there are four points, then first we must separate the two which
 * belong to the affinity matrix from the two that are sensitivities. In theory,
 * each pair should have exactly the same value (symmetry). If the sensitivity
 * is below a certain threshold, then we set the two values of the affinity
 * matrix to 0 (but not before adding the affinity values to the diagonal, so
 * as to maintain the overall sum of the row of the affinity matrix).
 * 
 * @throws Exception
 */
@Test
public void testEigencutsAffinityCutsCombiner() throws Exception {
    Configuration conf = new Configuration();
    Path affinity = new Path("affinity");
    Path sensitivity = new Path("sensitivity");
    conf.set(EigencutsKeys.AFFINITY_PATH, affinity.getName());
    conf.setInt(EigencutsKeys.AFFINITY_DIMENSIONS, this.affinity.length);

    // since we need the working paths to distinguish the vertex types, 
    // we can't use the mapper (since we have no way of manually setting
    // the Context.workingPath() )
    Map<Text, List<VertexWritable>> data = buildMapData(affinity, sensitivity, this.sensitivity);

    // now, set up the combiner
    EigencutsAffinityCutsCombiner combiner = new EigencutsAffinityCutsCombiner();
    DummyRecordWriter<Text, VertexWritable> redWriter = new DummyRecordWriter<Text, VertexWritable>();
    Reducer<Text, VertexWritable, Text, VertexWritable>.Context redContext = DummyRecordWriter.build(combiner,
            conf, redWriter, Text.class, VertexWritable.class);

    // perform the combining
    for (Map.Entry<Text, List<VertexWritable>> entry : data.entrySet()) {
        combiner.reduce(entry.getKey(), entry.getValue(), redContext);
    }

    // test the number of cuts, there should be 2
    assertEquals("Number of cuts detected", 4,
            redContext.getCounter(EigencutsAffinityCutsJob.CUTSCOUNTER.NUM_CUTS).getValue());

    // loop through all the results; let's see if they match up to our
    // affinity matrix (and all the cuts appear where they should
    Map<Text, List<VertexWritable>> results = redWriter.getData();
    for (Map.Entry<Text, List<VertexWritable>> entry : results.entrySet()) {
        List<VertexWritable> row = entry.getValue();
        IntWritable key = new IntWritable(Integer.parseInt(entry.getKey().toString()));

        double calcDiag = 0.0;
        double trueDiag = sumOfRowCuts(key.get(), this.sensitivity);
        for (VertexWritable e : row) {

            // should the value have been cut, e.g. set to 0?
            if (key.get() == e.getCol()) {
                // we have our diagonal
                calcDiag += e.getValue();
            } else if (this.sensitivity[key.get()][e.getCol()] == 0.0) {
                // no, corresponding affinity should have same value as before
                assertEquals("Preserved affinity value", this.affinity[key.get()][e.getCol()], e.getValue(),
                        EPSILON);
            } else {
                // yes, corresponding affinity value should be 0
                assertEquals("Cut affinity value", 0.0, e.getValue(), EPSILON);
            }
        }
        // check the diagonal has the correct sum
        assertEquals("Diagonal sum from cuts", trueDiag, calcDiag, EPSILON);
    }
}

From source file:org.apache.mahout.clustering.spectral.eigencuts.TestEigencutsAffinityCutsJob.java

License:Apache License

/**
 * Fairly straightforward: the task here is to reassemble the rows of the
 * affinity matrix. The tricky part is that any specific element in the list
 * of elements which does NOT lay on the diagonal will be so because it
 * did not drop below the sensitivity threshold, hence it was not "cut". 
 * //from w  w w. j  a va 2s.  c  om
 * On the flip side, there will be many entries whose coordinate is now
 * set to the diagonal, indicating they were previously affinity entries
 * whose sensitivities were below the threshold, and hence were "cut" - 
 * set to 0 at their original coordinates, and had their values added to
 * the diagonal entry (hence the numerous entries with the coordinate of
 * the diagonal).
 * 
 * @throws Exception
 */
@Test
public void testEigencutsAffinityCutsReducer() throws Exception {
    Configuration conf = new Configuration();
    Path affinity = new Path("affinity");
    Path sensitivity = new Path("sensitivity");
    conf.set(EigencutsKeys.AFFINITY_PATH, affinity.getName());
    conf.setInt(EigencutsKeys.AFFINITY_DIMENSIONS, this.affinity.length);

    // since we need the working paths to distinguish the vertex types, 
    // we can't use the mapper (since we have no way of manually setting
    // the Context.workingPath() )
    Map<Text, List<VertexWritable>> data = buildMapData(affinity, sensitivity, this.sensitivity);

    // now, set up the combiner
    EigencutsAffinityCutsCombiner combiner = new EigencutsAffinityCutsCombiner();
    DummyRecordWriter<Text, VertexWritable> comWriter = new DummyRecordWriter<Text, VertexWritable>();
    Reducer<Text, VertexWritable, Text, VertexWritable>.Context comContext = DummyRecordWriter.build(combiner,
            conf, comWriter, Text.class, VertexWritable.class);

    // perform the combining
    for (Map.Entry<Text, List<VertexWritable>> entry : data.entrySet()) {
        combiner.reduce(entry.getKey(), entry.getValue(), comContext);
    }

    // finally, set up the reduction writers
    EigencutsAffinityCutsReducer reducer = new EigencutsAffinityCutsReducer();
    DummyRecordWriter<IntWritable, VectorWritable> redWriter = new DummyRecordWriter<IntWritable, VectorWritable>();
    Reducer<Text, VertexWritable, IntWritable, VectorWritable>.Context redContext = DummyRecordWriter
            .build(reducer, conf, redWriter, Text.class, VertexWritable.class);

    // perform the reduction
    for (Text key : comWriter.getKeys()) {
        reducer.reduce(key, comWriter.getValue(key), redContext);
    }

    // now, check that the affinity matrix is correctly formed
    for (IntWritable row : redWriter.getKeys()) {
        List<VectorWritable> results = redWriter.getValue(row);
        // there should only be 1 vector
        assertEquals("Only one vector with a given row number", 1, results.size());
        Vector therow = results.get(0).get();
        for (Vector.Element e : therow) {
            // check the diagonal
            if (row.get() == e.index()) {
                assertEquals("Correct diagonal sum of cuts", sumOfRowCuts(row.get(), this.sensitivity), e.get(),
                        EPSILON);
            } else {
                // not on the diagonal...if it was an element labeled to be cut,
                // it should have a value of 0. Otherwise, it should have kept its
                // previous value
                if (this.sensitivity[row.get()][e.index()] == 0.0) {
                    // should be what it was originally
                    assertEquals("Preserved element", this.affinity[row.get()][e.index()], e.get(), EPSILON);
                } else {
                    // should be 0
                    assertEquals("Cut element", 0.0, e.get(), EPSILON);
                }
            }
        }
    }
}

From source file:org.apache.mahout.clustering.topdown.postprocessor.ClusterOutputPostProcessorMapper.java

License:Apache License

@Override
public void map(IntWritable key, WeightedVectorWritable val, Context context)
        throws IOException, InterruptedException {
    // by pivoting on the cluster mapping value, we can make sure that each unique cluster goes to it's own reducer,
    // since they are numbered from 0 to k-1, where k is the number of clusters
    outputVector.set(val.getVector());
    context.write(new IntWritable(newClusterMappings.get(key.get())), outputVector);
}