Example usage for org.apache.hadoop.io IntWritable get

List of usage examples for org.apache.hadoop.io IntWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable get.

Prototype

public int get() 

Source Link

Document

Return the value of this IntWritable.

Usage

From source file:org.apache.mahout.cf.taste.hadoop.als.PredictionMapper.java

License:Apache License

@Override
protected void map(IntWritable userIndexWritable, VectorWritable ratingsWritable, Context ctx)
        throws IOException, InterruptedException {

    Pair<OpenIntObjectHashMap<Vector>, OpenIntObjectHashMap<Vector>> uAndM = getSharedInstance();
    OpenIntObjectHashMap<Vector> U = uAndM.getFirst();
    OpenIntObjectHashMap<Vector> M = uAndM.getSecond();

    Vector ratings = ratingsWritable.get();
    int userIndex = userIndexWritable.get();
    final OpenIntHashSet alreadyRatedItems = new OpenIntHashSet(ratings.getNumNondefaultElements());

    for (Vector.Element e : ratings.nonZeroes()) {
        alreadyRatedItems.add(e.index());
    }//  w ww  .  ja va2  s  .c o m

    final TopItemsQueue topItemsQueue = new TopItemsQueue(recommendationsPerUser);
    final Vector userFeatures = U.get(userIndex);

    M.forEachPair(new IntObjectProcedure<Vector>() {
        @Override
        public boolean apply(int itemID, Vector itemFeatures) {
            if (!alreadyRatedItems.contains(itemID)) {
                double predictedRating = userFeatures.dot(itemFeatures);

                MutableRecommendedItem top = topItemsQueue.top();
                if (predictedRating > top.getValue()) {
                    top.set(itemID, (float) predictedRating);
                    topItemsQueue.updateTop();
                }
            }
            return true;
        }
    });

    List<RecommendedItem> recommendedItems = topItemsQueue.getTopItems();

    if (!recommendedItems.isEmpty()) {

        // cap predictions to maxRating
        for (RecommendedItem topItem : recommendedItems) {
            ((MutableRecommendedItem) topItem).capToMaxValue(maxRating);
        }

        if (usesLongIDs) {
            long userID = userIDIndex.get(userIndex);
            userIDWritable.set(userID);

            for (RecommendedItem topItem : recommendedItems) {
                // remap item IDs
                long itemID = itemIDIndex.get((int) topItem.getItemID());
                ((MutableRecommendedItem) topItem).setItemID(itemID);
            }

        } else {
            userIDWritable.set(userIndex);
        }

        recommendations.set(recommendedItems);
        ctx.write(userIDWritable, recommendations);
    }
}

From source file:org.apache.mahout.cf.taste.hadoop.item.SimilarityMatrixRowWrapperMapper.java

License:Apache License

@Override
protected void map(IntWritable key, VectorWritable value, Context context)
        throws IOException, InterruptedException {
    Vector similarityMatrixRow = value.get();
    /* remove self similarity */
    similarityMatrixRow.set(key.get(), Double.NaN);

    index.set(key.get());//  w ww  .j  a  v a 2 s  .co  m
    vectorOrPref.set(similarityMatrixRow);

    context.write(index, vectorOrPref);
}

From source file:org.apache.mahout.cf.taste.hadoop.similarity.item.MostSimilarItemPairsMapper.java

License:Apache License

@Override
protected void map(IntWritable itemIDIndexWritable, VectorWritable similarityVector, Context ctx)
        throws IOException, InterruptedException {

    int itemIDIndex = itemIDIndexWritable.get();

    Queue<SimilarItem> topMostSimilarItems = new PriorityQueue<SimilarItem>(maxSimilarItemsPerItem + 1,
            Collections.reverseOrder(SimilarItem.COMPARE_BY_SIMILARITY));

    Iterator<Vector.Element> similarityVectorIterator = similarityVector.get().iterateNonZero();

    while (similarityVectorIterator.hasNext()) {
        Vector.Element element = similarityVectorIterator.next();
        int index = element.index();
        double value = element.get();
        /* ignore self similarities */
        if (index != itemIDIndex) {
            if (topMostSimilarItems.size() < maxSimilarItemsPerItem) {
                topMostSimilarItems.add(new SimilarItem(indexItemIDMap.get(index), value));
            } else if (value > topMostSimilarItems.peek().getSimilarity()) {
                topMostSimilarItems.add(new SimilarItem(indexItemIDMap.get(index), value));
                topMostSimilarItems.poll();
            }//from w ww  .jav a  2 s  .  c  o  m
        }
    }

    if (!topMostSimilarItems.isEmpty()) {
        List<SimilarItem> mostSimilarItems = new ArrayList<SimilarItem>(topMostSimilarItems.size());
        mostSimilarItems.addAll(topMostSimilarItems);
        Collections.sort(mostSimilarItems, SimilarItem.COMPARE_BY_SIMILARITY);

        long itemID = indexItemIDMap.get(itemIDIndex);
        for (SimilarItem similarItem : mostSimilarItems) {
            long otherItemID = similarItem.getItemID();
            if (itemID < otherItemID) {
                ctx.write(new EntityEntityWritable(itemID, otherItemID),
                        new DoubleWritable(similarItem.getSimilarity()));
            } else {
                ctx.write(new EntityEntityWritable(otherItemID, itemID),
                        new DoubleWritable(similarItem.getSimilarity()));
            }
        }
    }
}

From source file:org.apache.mahout.classifier.naivebayes.trainer.NaiveBayesThetaComplementaryMapper.java

License:Apache License

@Override
protected void map(IntWritable key, VectorWritable value, Context context)
        throws IOException, InterruptedException {
    Vector vector = value.get();/*from  w ww  .j  a v  a  2  s  .  c om*/
    int label = key.get();
    double sigmaK = labelSum.get(label);
    Iterator<Element> it = vector.iterateNonZero();
    while (it.hasNext()) {
        Element e = it.next();
        double numerator = featureSum.get(e.index()) - e.get() + alphaI;
        double denominator = totalSum - sigmaK + vocabCount;
        double weight = Math.log(numerator / denominator);
        perLabelThetaNormalizer.set(label, perLabelThetaNormalizer.get(label) + weight);
    }
}

From source file:org.apache.mahout.classifier.naivebayes.trainer.NaiveBayesThetaMapper.java

License:Apache License

@Override
protected void map(IntWritable key, VectorWritable value, Context context)
        throws IOException, InterruptedException {
    Vector vector = value.get();//from w  ww . j a va  2s  . c o m
    int label = key.get();
    double weight = Math.log((vector.zSum() + alphaI) / (labelSum.get(label) + vocabCount));
    perLabelThetaNormalizer.set(label, perLabelThetaNormalizer.get(label) + weight);
}

From source file:org.apache.mahout.classifier.naivebayes.trainer.NaiveBayesWeightsMapper.java

License:Apache License

@Override
protected void map(IntWritable key, VectorWritable value, Context context)
        throws IOException, InterruptedException {
    Vector vector = value.get();//from  w  ww  . j  a v  a 2s  .c o  m
    if (featureSum == null) {
        featureSum = new RandomAccessSparseVector(vector.size(), vector.getNumNondefaultElements());
        labelSum = new RandomAccessSparseVector(labelMap.size());
    }

    int label = key.get();
    vector.addTo(featureSum);
    labelSum.set(label, labelSum.get(label) + vector.zSum());
}

From source file:org.apache.mahout.classifier.rbm.training.DBMBackPropTrainingMapper.java

License:Apache License

protected void map(IntWritable key, VectorWritable value, Context context)
        throws java.io.IOException, InterruptedException {
    for (int i = 0; i < label.size(); i++)
        label.setQuick(i, 0);/*www.  j av a  2  s.c  om*/
    label.set(key.get(), 1);

    BackPropTrainer trainer = new BackPropTrainer(learningrate);

    Matrix[] result = trainer.calculateWeightUpdates(dbm, value.get(), label);
    context.getCounter(BATCHES.SIZE).increment(1);

    //write for each RBM i (key, number of rbm) the result and put together the last two
    //matrices since they refer to just one labeled rbm, which was split to two for the training
    for (int i = 0; i < result.length - 1; i++) {
        if (i == result.length - 2) {
            Matrix updates = new DenseMatrix(result[i].rowSize() + result[i + 1].columnSize(),
                    result[i].columnSize());
            for (int j = 0; j < updates.rowSize(); j++)
                for (int k = 0; k < updates.columnSize(); k++) {
                    if (j < result[i].rowSize())
                        updates.set(j, k, result[i].get(j, k));
                    else
                        updates.set(j, k, result[i + 1].get(k, j - result[i].rowSize()));
                }

            context.write(new IntWritable(i), new MatrixWritable(updates));
        } else
            context.write(new IntWritable(i), new MatrixWritable(result[i]));
    }
}

From source file:org.apache.mahout.classifier.rbm.training.RBMGreedyPreTrainingMapper.java

License:Apache License

protected void map(IntWritable key, VectorWritable value, Context context)
        throws java.io.IOException, InterruptedException {
    CDTrainer trainer = new CDTrainer(learningRate, nrGibbsSampling);

    label.set(key.get(), 1);

    dbm.getRBM(0).getVisibleLayer().setActivations(value.get());
    for (int i = 0; i < nr; i++) {
        //double the bottom up connection for initialization
        dbm.getRBM(i).exciteHiddenLayer(2, false);
        if (i == nr - 1)
            //probabilities as activation for the data the rbm should train on
            dbm.getRBM(i).getHiddenLayer().setProbabilitiesAsActivation();
        else/* w  w  w  . j  av  a2  s. com*/
            dbm.getRBM(i).getHiddenLayer().updateNeurons();
    }

    context.getCounter(BATCH.SIZE).increment(1);

    if (nr == dbm.getRbmCount() - 1) {
        ((LabeledSimpleRBM) dbm.getRBM(nr)).getSoftmaxLayer().setActivations(label);

        Matrix updates = trainer.calculateWeightUpdates((LabeledSimpleRBM) dbm.getRBM(nr), true, false);
        context.write(new IntWritable(nr), new MatrixWritable(updates));
    } else {
        Matrix updates = trainer.calculateWeightUpdates((SimpleRBM) dbm.getRBM(nr), false, nr == 0);
        context.write(new IntWritable(nr), new MatrixWritable(updates));
    }
}

From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchUtils.java

License:Apache License

protected static void WriteModelToDirectory(HmmModel model, Path modelPath, Configuration conf)
        throws IOException {

    int numHidden = model.getNrOfHiddenStates();
    int numObserved = model.getNrOfOutputStates();
    Matrix emissionMatrix = model.getEmissionMatrix();
    Matrix transitionMatrix = model.getTransitionMatrix();
    Vector initialProbability = model.getInitialProbabilities();

    MapWritable initialDistributionMap = new MapWritable();
    MapWritable transitionDistributionMap = new MapWritable();
    MapWritable emissionDistributionMap = new MapWritable();
    // delete the output directory
    HadoopUtil.delete(conf, modelPath);//w  ww. jav  a  2 s  . com
    // create new file to store HMM
    FileSystem fs = FileSystem.get(modelPath.toUri(), conf);
    Path outFile = new Path(modelPath, "part-randomSeed");
    boolean newFile = fs.createNewFile(outFile);

    if (newFile) {
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outFile, Text.class,
                MapWritable.class);

        try {

            // construct one MapWritable<IntWritable, DoubleWritable> object
            // and two MapWritable<Text, MapWritable<IntWritable, DoubleWritable >> objects
            for (int i = 0; i < numHidden; i++) {
                IntWritable initialDistributionKey = new IntWritable(i);
                DoubleWritable initialDistributionValue = new DoubleWritable(initialProbability.get(i));
                log.info("BuildRandomModel Initial Distribution Map: State {} = {})",
                        initialDistributionKey.get(), initialDistributionValue.get());
                initialDistributionMap.put(initialDistributionKey, initialDistributionValue);

                Text transitionDistributionKey = new Text("TRANSIT_" + Integer.toString(i));
                MapWritable transitionDistributionValue = new MapWritable();
                for (int j = 0; j < numHidden; j++) {
                    IntWritable transitionDistributionInnerKey = new IntWritable(j);
                    DoubleWritable transitionDistributionInnerValue = new DoubleWritable(
                            transitionMatrix.get(i, j));
                    log.info("BuildRandomModel Transition Distribution Map Inner: ({}, {}) = ({}, {})",
                            new Object[] { i, j, transitionDistributionInnerKey.get(),
                                    transitionDistributionInnerValue.get() });
                    transitionDistributionValue.put(transitionDistributionInnerKey,
                            transitionDistributionInnerValue);
                }
                transitionDistributionMap.put(transitionDistributionKey, transitionDistributionValue);

                Text emissionDistributionKey = new Text("EMIT_" + Integer.toString(i));
                MapWritable emissionDistributionValue = new MapWritable();
                for (int j = 0; j < numObserved; j++) {
                    IntWritable emissionDistributionInnerKey = new IntWritable(j);
                    DoubleWritable emissionDistributionInnerValue = new DoubleWritable(
                            emissionMatrix.get(i, j));
                    log.info("BuildRandomModel Emission Distribution Map Inner: ({}, {}) = ({}, {})",
                            new Object[] { i, j, emissionDistributionInnerKey.get(),
                                    emissionDistributionInnerValue.get() });
                    emissionDistributionValue.put(emissionDistributionInnerKey, emissionDistributionInnerValue);
                }
                emissionDistributionMap.put(emissionDistributionKey, emissionDistributionValue);

            }

            writer.append(new Text("INITIAL"), initialDistributionMap);
            log.info("Wrote random Initial Distribution Map to {}", outFile);

            for (MapWritable.Entry<Writable, Writable> transitionEntry : transitionDistributionMap.entrySet()) {
                log.info("Writing Transition Distribution Map Key, Value = ({}, {})", transitionEntry.getKey(),
                        transitionEntry.getValue());
                writer.append(transitionEntry.getKey(), transitionEntry.getValue());
            }
            log.info("Wrote random Transition Distribution Map to {}", outFile);

            for (MapWritable.Entry<Writable, Writable> emissionEntry : emissionDistributionMap.entrySet()) {
                log.info("Writing Emission Distribution Map Key, Value = ({}, {})", emissionEntry.getKey(),
                        emissionEntry.getValue());
                writer.append(emissionEntry.getKey(), emissionEntry.getValue());
            }
            log.info("Wrote random Emission Distribution Map to {}", outFile);

        } finally {
            Closeables.closeQuietly(writer);
        }

    }

}

From source file:org.apache.mahout.clustering.cdbw.CDbwMapper.java

License:Apache License

@Override
protected void map(IntWritable clusterId, WeightedVectorWritable point, Context context)
        throws IOException, InterruptedException {
    int key = clusterId.get();
    WeightedVectorWritable currentMDP = mostDistantPoints.get(key);

    List<VectorWritable> refPoints = representativePoints.get(key);
    double totalDistance = 0.0;
    for (VectorWritable refPoint : refPoints) {
        totalDistance += measure.distance(refPoint.get(), point.getVector().get());
    }/*from  w w  w. j  a  va  2 s.  c o m*/
    if (currentMDP == null || currentMDP.getWeight() < totalDistance) {
        mostDistantPoints.put(key,
                new WeightedVectorWritable(totalDistance, new VectorWritable(point.getVector().get().clone())));
    }
}