List of usage examples for org.apache.hadoop.io IntWritable get
public int get()
From source file:org.apache.mahout.cf.taste.hadoop.als.PredictionMapper.java
License:Apache License
@Override protected void map(IntWritable userIndexWritable, VectorWritable ratingsWritable, Context ctx) throws IOException, InterruptedException { Pair<OpenIntObjectHashMap<Vector>, OpenIntObjectHashMap<Vector>> uAndM = getSharedInstance(); OpenIntObjectHashMap<Vector> U = uAndM.getFirst(); OpenIntObjectHashMap<Vector> M = uAndM.getSecond(); Vector ratings = ratingsWritable.get(); int userIndex = userIndexWritable.get(); final OpenIntHashSet alreadyRatedItems = new OpenIntHashSet(ratings.getNumNondefaultElements()); for (Vector.Element e : ratings.nonZeroes()) { alreadyRatedItems.add(e.index()); }// w ww . ja va2 s .c o m final TopItemsQueue topItemsQueue = new TopItemsQueue(recommendationsPerUser); final Vector userFeatures = U.get(userIndex); M.forEachPair(new IntObjectProcedure<Vector>() { @Override public boolean apply(int itemID, Vector itemFeatures) { if (!alreadyRatedItems.contains(itemID)) { double predictedRating = userFeatures.dot(itemFeatures); MutableRecommendedItem top = topItemsQueue.top(); if (predictedRating > top.getValue()) { top.set(itemID, (float) predictedRating); topItemsQueue.updateTop(); } } return true; } }); List<RecommendedItem> recommendedItems = topItemsQueue.getTopItems(); if (!recommendedItems.isEmpty()) { // cap predictions to maxRating for (RecommendedItem topItem : recommendedItems) { ((MutableRecommendedItem) topItem).capToMaxValue(maxRating); } if (usesLongIDs) { long userID = userIDIndex.get(userIndex); userIDWritable.set(userID); for (RecommendedItem topItem : recommendedItems) { // remap item IDs long itemID = itemIDIndex.get((int) topItem.getItemID()); ((MutableRecommendedItem) topItem).setItemID(itemID); } } else { userIDWritable.set(userIndex); } recommendations.set(recommendedItems); ctx.write(userIDWritable, recommendations); } }
From source file:org.apache.mahout.cf.taste.hadoop.item.SimilarityMatrixRowWrapperMapper.java
License:Apache License
@Override protected void map(IntWritable key, VectorWritable value, Context context) throws IOException, InterruptedException { Vector similarityMatrixRow = value.get(); /* remove self similarity */ similarityMatrixRow.set(key.get(), Double.NaN); index.set(key.get());// w ww .j a v a 2 s .co m vectorOrPref.set(similarityMatrixRow); context.write(index, vectorOrPref); }
From source file:org.apache.mahout.cf.taste.hadoop.similarity.item.MostSimilarItemPairsMapper.java
License:Apache License
@Override protected void map(IntWritable itemIDIndexWritable, VectorWritable similarityVector, Context ctx) throws IOException, InterruptedException { int itemIDIndex = itemIDIndexWritable.get(); Queue<SimilarItem> topMostSimilarItems = new PriorityQueue<SimilarItem>(maxSimilarItemsPerItem + 1, Collections.reverseOrder(SimilarItem.COMPARE_BY_SIMILARITY)); Iterator<Vector.Element> similarityVectorIterator = similarityVector.get().iterateNonZero(); while (similarityVectorIterator.hasNext()) { Vector.Element element = similarityVectorIterator.next(); int index = element.index(); double value = element.get(); /* ignore self similarities */ if (index != itemIDIndex) { if (topMostSimilarItems.size() < maxSimilarItemsPerItem) { topMostSimilarItems.add(new SimilarItem(indexItemIDMap.get(index), value)); } else if (value > topMostSimilarItems.peek().getSimilarity()) { topMostSimilarItems.add(new SimilarItem(indexItemIDMap.get(index), value)); topMostSimilarItems.poll(); }//from w ww .jav a 2 s . c o m } } if (!topMostSimilarItems.isEmpty()) { List<SimilarItem> mostSimilarItems = new ArrayList<SimilarItem>(topMostSimilarItems.size()); mostSimilarItems.addAll(topMostSimilarItems); Collections.sort(mostSimilarItems, SimilarItem.COMPARE_BY_SIMILARITY); long itemID = indexItemIDMap.get(itemIDIndex); for (SimilarItem similarItem : mostSimilarItems) { long otherItemID = similarItem.getItemID(); if (itemID < otherItemID) { ctx.write(new EntityEntityWritable(itemID, otherItemID), new DoubleWritable(similarItem.getSimilarity())); } else { ctx.write(new EntityEntityWritable(otherItemID, itemID), new DoubleWritable(similarItem.getSimilarity())); } } } }
From source file:org.apache.mahout.classifier.naivebayes.trainer.NaiveBayesThetaComplementaryMapper.java
License:Apache License
@Override protected void map(IntWritable key, VectorWritable value, Context context) throws IOException, InterruptedException { Vector vector = value.get();/*from w ww .j a v a 2 s . c om*/ int label = key.get(); double sigmaK = labelSum.get(label); Iterator<Element> it = vector.iterateNonZero(); while (it.hasNext()) { Element e = it.next(); double numerator = featureSum.get(e.index()) - e.get() + alphaI; double denominator = totalSum - sigmaK + vocabCount; double weight = Math.log(numerator / denominator); perLabelThetaNormalizer.set(label, perLabelThetaNormalizer.get(label) + weight); } }
From source file:org.apache.mahout.classifier.naivebayes.trainer.NaiveBayesThetaMapper.java
License:Apache License
@Override protected void map(IntWritable key, VectorWritable value, Context context) throws IOException, InterruptedException { Vector vector = value.get();//from w ww . j a va 2s . c o m int label = key.get(); double weight = Math.log((vector.zSum() + alphaI) / (labelSum.get(label) + vocabCount)); perLabelThetaNormalizer.set(label, perLabelThetaNormalizer.get(label) + weight); }
From source file:org.apache.mahout.classifier.naivebayes.trainer.NaiveBayesWeightsMapper.java
License:Apache License
@Override protected void map(IntWritable key, VectorWritable value, Context context) throws IOException, InterruptedException { Vector vector = value.get();//from w ww . j a v a 2s .c o m if (featureSum == null) { featureSum = new RandomAccessSparseVector(vector.size(), vector.getNumNondefaultElements()); labelSum = new RandomAccessSparseVector(labelMap.size()); } int label = key.get(); vector.addTo(featureSum); labelSum.set(label, labelSum.get(label) + vector.zSum()); }
From source file:org.apache.mahout.classifier.rbm.training.DBMBackPropTrainingMapper.java
License:Apache License
protected void map(IntWritable key, VectorWritable value, Context context) throws java.io.IOException, InterruptedException { for (int i = 0; i < label.size(); i++) label.setQuick(i, 0);/*www. j av a 2 s.c om*/ label.set(key.get(), 1); BackPropTrainer trainer = new BackPropTrainer(learningrate); Matrix[] result = trainer.calculateWeightUpdates(dbm, value.get(), label); context.getCounter(BATCHES.SIZE).increment(1); //write for each RBM i (key, number of rbm) the result and put together the last two //matrices since they refer to just one labeled rbm, which was split to two for the training for (int i = 0; i < result.length - 1; i++) { if (i == result.length - 2) { Matrix updates = new DenseMatrix(result[i].rowSize() + result[i + 1].columnSize(), result[i].columnSize()); for (int j = 0; j < updates.rowSize(); j++) for (int k = 0; k < updates.columnSize(); k++) { if (j < result[i].rowSize()) updates.set(j, k, result[i].get(j, k)); else updates.set(j, k, result[i + 1].get(k, j - result[i].rowSize())); } context.write(new IntWritable(i), new MatrixWritable(updates)); } else context.write(new IntWritable(i), new MatrixWritable(result[i])); } }
From source file:org.apache.mahout.classifier.rbm.training.RBMGreedyPreTrainingMapper.java
License:Apache License
protected void map(IntWritable key, VectorWritable value, Context context) throws java.io.IOException, InterruptedException { CDTrainer trainer = new CDTrainer(learningRate, nrGibbsSampling); label.set(key.get(), 1); dbm.getRBM(0).getVisibleLayer().setActivations(value.get()); for (int i = 0; i < nr; i++) { //double the bottom up connection for initialization dbm.getRBM(i).exciteHiddenLayer(2, false); if (i == nr - 1) //probabilities as activation for the data the rbm should train on dbm.getRBM(i).getHiddenLayer().setProbabilitiesAsActivation(); else/* w w w . j av a2 s. com*/ dbm.getRBM(i).getHiddenLayer().updateNeurons(); } context.getCounter(BATCH.SIZE).increment(1); if (nr == dbm.getRbmCount() - 1) { ((LabeledSimpleRBM) dbm.getRBM(nr)).getSoftmaxLayer().setActivations(label); Matrix updates = trainer.calculateWeightUpdates((LabeledSimpleRBM) dbm.getRBM(nr), true, false); context.write(new IntWritable(nr), new MatrixWritable(updates)); } else { Matrix updates = trainer.calculateWeightUpdates((SimpleRBM) dbm.getRBM(nr), false, nr == 0); context.write(new IntWritable(nr), new MatrixWritable(updates)); } }
From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchUtils.java
License:Apache License
protected static void WriteModelToDirectory(HmmModel model, Path modelPath, Configuration conf) throws IOException { int numHidden = model.getNrOfHiddenStates(); int numObserved = model.getNrOfOutputStates(); Matrix emissionMatrix = model.getEmissionMatrix(); Matrix transitionMatrix = model.getTransitionMatrix(); Vector initialProbability = model.getInitialProbabilities(); MapWritable initialDistributionMap = new MapWritable(); MapWritable transitionDistributionMap = new MapWritable(); MapWritable emissionDistributionMap = new MapWritable(); // delete the output directory HadoopUtil.delete(conf, modelPath);//w ww. jav a 2 s . com // create new file to store HMM FileSystem fs = FileSystem.get(modelPath.toUri(), conf); Path outFile = new Path(modelPath, "part-randomSeed"); boolean newFile = fs.createNewFile(outFile); if (newFile) { SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outFile, Text.class, MapWritable.class); try { // construct one MapWritable<IntWritable, DoubleWritable> object // and two MapWritable<Text, MapWritable<IntWritable, DoubleWritable >> objects for (int i = 0; i < numHidden; i++) { IntWritable initialDistributionKey = new IntWritable(i); DoubleWritable initialDistributionValue = new DoubleWritable(initialProbability.get(i)); log.info("BuildRandomModel Initial Distribution Map: State {} = {})", initialDistributionKey.get(), initialDistributionValue.get()); initialDistributionMap.put(initialDistributionKey, initialDistributionValue); Text transitionDistributionKey = new Text("TRANSIT_" + Integer.toString(i)); MapWritable transitionDistributionValue = new MapWritable(); for (int j = 0; j < numHidden; j++) { IntWritable transitionDistributionInnerKey = new IntWritable(j); DoubleWritable transitionDistributionInnerValue = new DoubleWritable( transitionMatrix.get(i, j)); log.info("BuildRandomModel Transition Distribution Map Inner: ({}, {}) = ({}, {})", new Object[] { i, j, transitionDistributionInnerKey.get(), transitionDistributionInnerValue.get() }); transitionDistributionValue.put(transitionDistributionInnerKey, transitionDistributionInnerValue); } transitionDistributionMap.put(transitionDistributionKey, transitionDistributionValue); Text emissionDistributionKey = new Text("EMIT_" + Integer.toString(i)); MapWritable emissionDistributionValue = new MapWritable(); for (int j = 0; j < numObserved; j++) { IntWritable emissionDistributionInnerKey = new IntWritable(j); DoubleWritable emissionDistributionInnerValue = new DoubleWritable( emissionMatrix.get(i, j)); log.info("BuildRandomModel Emission Distribution Map Inner: ({}, {}) = ({}, {})", new Object[] { i, j, emissionDistributionInnerKey.get(), emissionDistributionInnerValue.get() }); emissionDistributionValue.put(emissionDistributionInnerKey, emissionDistributionInnerValue); } emissionDistributionMap.put(emissionDistributionKey, emissionDistributionValue); } writer.append(new Text("INITIAL"), initialDistributionMap); log.info("Wrote random Initial Distribution Map to {}", outFile); for (MapWritable.Entry<Writable, Writable> transitionEntry : transitionDistributionMap.entrySet()) { log.info("Writing Transition Distribution Map Key, Value = ({}, {})", transitionEntry.getKey(), transitionEntry.getValue()); writer.append(transitionEntry.getKey(), transitionEntry.getValue()); } log.info("Wrote random Transition Distribution Map to {}", outFile); for (MapWritable.Entry<Writable, Writable> emissionEntry : emissionDistributionMap.entrySet()) { log.info("Writing Emission Distribution Map Key, Value = ({}, {})", emissionEntry.getKey(), emissionEntry.getValue()); writer.append(emissionEntry.getKey(), emissionEntry.getValue()); } log.info("Wrote random Emission Distribution Map to {}", outFile); } finally { Closeables.closeQuietly(writer); } } }
From source file:org.apache.mahout.clustering.cdbw.CDbwMapper.java
License:Apache License
@Override protected void map(IntWritable clusterId, WeightedVectorWritable point, Context context) throws IOException, InterruptedException { int key = clusterId.get(); WeightedVectorWritable currentMDP = mostDistantPoints.get(key); List<VectorWritable> refPoints = representativePoints.get(key); double totalDistance = 0.0; for (VectorWritable refPoint : refPoints) { totalDistance += measure.distance(refPoint.get(), point.getVector().get()); }/*from w w w. j a va 2 s. c o m*/ if (currentMDP == null || currentMDP.getWeight() < totalDistance) { mostDistantPoints.put(key, new WeightedVectorWritable(totalDistance, new VectorWritable(point.getVector().get().clone()))); } }