List of usage examples for org.apache.hadoop.io IntWritable get
public int get()
From source file:net.broomie.reducer.TokenizeReducer.java
License:Apache License
/** * @param key the key for reducer.//from w ww . ja v a 2 s . c om * @param values the values for reducer. * @param context context object. * @exception IOException exception for reading data error. * @exception InterruptedException exception. */ public final void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); }
From source file:net.broomie.reducer.TokenizeReducerTFIDF.java
License:Apache License
/** * @param key Specify the key for reducer. * @param values Specify the values for reducer. * @param context Specify the hadoop Context object. * @exception IOException Exception for open input file. * @exception InterruptedException exception. *///from ww w .j a v a 2 s . c om public final void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { LinkedHashMap<String, Integer> counter = new LinkedHashMap<String, Integer>(hashMapInitSiz); String keyToken = key.toString(); for (IntWritable valBuf : values) { if (!counter.containsKey(keyToken)) { counter.put(keyToken, valBuf.get()); } else { counter.put(keyToken, counter.get(keyToken).intValue() + valBuf.get()); } } Iterator<String> counterItr = counter.keySet().iterator(); while (counterItr.hasNext()) { String token = counterItr.next(); double tf = counter.get(token); if (wordCount.containsKey(token)) { int df = wordCount.get(token); //double score = tf * Math.log10(lineNum / df); double score = tf / df; val.set(score); context.write(new Text(token), val); } } }
From source file:nl.gridline.free.taalserver.WordCountPerDocMap.java
License:Apache License
@Override protected void map(WordDocId key, IntWritable value, Context context) throws IOException, InterruptedException { long docid = key.getDocId(); String word = key.getWord();// ww w. ja v a 2 s. c om int count = value.get(); context.write(new LongWritable(docid), new WordCount(word, count)); context.progress(); }
From source file:nl.gridline.zieook.inx.movielens.hbase.RecommendationsImportMap.java
License:Apache License
@Override public void map(IntWritable key, VectorWritable value, Context context) throws IOException, InterruptedException { Vector similarityMatrixRow = value.get(); // Remove self similarity similarityMatrixRow.set(key.get(), Double.NEGATIVE_INFINITY); // from the equation // determine max non-zero element: (==item index) List<RecommendationElement> recommendations = new ArrayList<RecommendationElement>(); // collect non-zero items: Iterator<Element> it = similarityMatrixRow.iterateNonZero(); while (it.hasNext()) { Element e = it.next();//from ww w. ja v a2s .com recommendations.add(new RecommendationElement(e.index(), e.get())); // LOG.info("created new recommendation for " + e.index()); } // sorted list of recommendations: now we have an item id, and similarity value: Collections.sort(recommendations, new SimilarityComparator()); LOG.info("sorted: " + recommendations.size()); int rank = 1; Put put = new Put(RowKeys.getRecommendationKey(collection, recommender, key.get())); for (RecommendationElement el : recommendations) { // if (el.getSimilarityValue() > 0) // { byte[] data = Recommend.getRecommendation(el.getItemIndex(), rank, el.getSimilarityValue()); put.add(RECOMMENDATION_COLUMN, Bytes.toBytes(rank), data); rank++; // } } context.write(new LongWritable(key.get()), put); }
From source file:nl.gridline.zieook.inx.movielens.items.ItemBasedSortSimilaritiesMapper.java
License:Apache License
@Override protected void map(IntWritable key, VectorWritable value, Context context) throws IOException, InterruptedException { int maxIndex = -1; Vector similarityMatrixRow = value.get(); /* remove self similarity */ similarityMatrixRow.set(key.get(), Double.NEGATIVE_INFINITY); ///*ww w . j a v a 2 s.com*/ // determine maximum index // Iterator<Element> it = similarityMatrixRow.iterateNonZero(); while (it.hasNext()) { Element e = it.next(); // e.index() // == item id if (e.index() > maxIndex) { maxIndex = e.index(); } } // System.out.println(String.format("key: %d maxIndex: %d", key.get(), maxIndex)); if (maxIndex > 0) { RecommendationElement[] itemBasedRecommendations = new RecommendationElement[maxIndex]; for (int i = 0; i < maxIndex; i++) { Element element = similarityMatrixRow.getElement(i); double similarityValue = Double.NEGATIVE_INFINITY; if (element != null) { similarityValue = element.get(); } itemBasedRecommendations[i] = new RecommendationElement(i, similarityValue); } Arrays.sort(itemBasedRecommendations, new SimilarityComparator()); RecommendationElementArray array = new RecommendationElementArray(itemBasedRecommendations); context.write(new VarIntWritable(key.get()), array); } }
From source file:nutchIndexer.NutchMap.java
License:Open Source License
@Override public void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException { TrecOLParser document = new TrecOLParser(value.toString()); documentAnalyzed = new MapWritable(); if (document.isParsed()) { this.tokenizer.tokenize(document.getDocContent()); while (this.tokenizer.hasMoreTokens()) { IntWritable counter = CastingTypes.zero; String newTerm = this.tokenizer.nextToken(); Text term = new Text(newTerm); if (documentAnalyzed.containsKey(term)) { counter = CastingTypes.strToIntWr(documentAnalyzed.get(term).toString()); }//from w w w. j a v a 2 s . c o m documentAnalyzed.put(term, CastingTypes.intToIntWr(counter.get() + 1)); } if (!documentAnalyzed.isEmpty()) { context.write(CastingTypes.strToIntWr(document.getDocId()), documentAnalyzed); } } }
From source file:nutchIndexer.NutchReduce.java
License:Open Source License
@Override public void reduce(IntWritable docId, Iterable<MapWritable> documentsAnalyzed, Context context) throws IOException, InterruptedException { for (MapWritable documentAnalyzed : documentsAnalyzed) { for (MapWritable.Entry<Writable, Writable> termEntry : documentAnalyzed.entrySet()) { Text term = (Text) termEntry.getKey(); IntWritable freq = (IntWritable) termEntry.getValue(); Integer documentId = docId.get(); this.invertedIndex.addPosting(term, documentId, freq); }/*w ww. j a va 2 s . c o m*/ } }
From source file:org.adamr.tsponmapreduce.mr.BruteReducer.java
@Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int wordCount = 0; for (IntWritable value : values) { wordCount += value.get(); }/*from w ww. j a v a2 s .co m*/ context.write(key, new IntWritable(wordCount)); }
From source file:org.ankus.mapreduce.algorithms.clustering.kmeans.KMeansClusterUpdateReducer.java
License:Apache License
protected void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { Iterator<Text> iterator = values.iterator(); KMeansClusterInfoMgr cluster = new KMeansClusterInfoMgr(); cluster.setClusterID(key.get()); int dataCnt = 0; while (iterator.hasNext()) { dataCnt++;//from ww w . j av a 2 s. co m String tokens[] = iterator.next().toString().split(mDelimiter); for (int i = 0; i < tokens.length; i++) { if (CommonMethods.isContainIndex(mIndexArr, i, true) && !CommonMethods.isContainIndex(mExceptionIndexArr, i, false)) { if (CommonMethods.isContainIndex(mNominalIndexArr, i, false)) { cluster.addAttributeValue(i, tokens[i], ConfigurationVariable.NOMINAL_ATTRIBUTE); } else cluster.addAttributeValue(i, tokens[i], ConfigurationVariable.NUMERIC_ATTRIBUTE); } } } cluster.finalCompute(dataCnt); String writeStr = cluster.getClusterInfoString(mDelimiter, context.getConfiguration().get("subDelimiter", "@@")); context.write(NullWritable.get(), new Text(writeStr)); }
From source file:org.apache.accumulo.server.tabletserver.log.MultiReaderTest.java
License:Apache License
private void scan(MultiReader reader, int start) throws IOException { IntWritable key = new IntWritable(); BytesWritable value = new BytesWritable(); for (int i = start + 1; i < 1000; i++) { if (i == 10) continue; assertTrue(reader.next(key, value)); assertEquals(i, key.get()); }/*ww w . j a va 2s. co m*/ }