Example usage for org.apache.hadoop.io IntWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable get.

Prototype

public int get()

Source Link

Document

Return the value of this IntWritable.

Usage

From source file:net.broomie.reducer.TokenizeReducer.java

License:Apache License

/**
 * @param key the key for reducer.//from   w  ww .  ja  v a 2  s . c om
 * @param values the values for reducer.
 * @param context context object.
 * @exception IOException exception for reading data error.
 * @exception InterruptedException exception.
 */
public final void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
    int sum = 0;
    for (IntWritable val : values) {
        sum += val.get();
    }
    result.set(sum);
    context.write(key, result);
}

From source file:net.broomie.reducer.TokenizeReducerTFIDF.java

License:Apache License

/**
 * @param key Specify the key for reducer.
 * @param values Specify the values for reducer.
 * @param context Specify the hadoop Context object.
 * @exception IOException Exception for open input file.
 * @exception InterruptedException exception.
 *///from   ww w  .j a  v  a  2  s . c  om
public final void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
    LinkedHashMap<String, Integer> counter = new LinkedHashMap<String, Integer>(hashMapInitSiz);
    String keyToken = key.toString();
    for (IntWritable valBuf : values) {
        if (!counter.containsKey(keyToken)) {
            counter.put(keyToken, valBuf.get());
        } else {
            counter.put(keyToken, counter.get(keyToken).intValue() + valBuf.get());
        }
    }
    Iterator<String> counterItr = counter.keySet().iterator();
    while (counterItr.hasNext()) {
        String token = counterItr.next();
        double tf = counter.get(token);
        if (wordCount.containsKey(token)) {
            int df = wordCount.get(token);
            //double score = tf * Math.log10(lineNum / df);
            double score = tf / df;
            val.set(score);
            context.write(new Text(token), val);
        }
    }
}

From source file:nl.gridline.free.taalserver.WordCountPerDocMap.java

License:Apache License

@Override
protected void map(WordDocId key, IntWritable value, Context context) throws IOException, InterruptedException {
    long docid = key.getDocId();
    String word = key.getWord();//  ww  w. ja v a  2 s. c om
    int count = value.get();
    context.write(new LongWritable(docid), new WordCount(word, count));
    context.progress();
}

From source file:nl.gridline.zieook.inx.movielens.hbase.RecommendationsImportMap.java

License:Apache License

@Override
public void map(IntWritable key, VectorWritable value, Context context)
        throws IOException, InterruptedException {

    Vector similarityMatrixRow = value.get();

    // Remove self similarity
    similarityMatrixRow.set(key.get(), Double.NEGATIVE_INFINITY); // from the equation

    // determine max non-zero element: (==item index)
    List<RecommendationElement> recommendations = new ArrayList<RecommendationElement>();

    // collect non-zero items:
    Iterator<Element> it = similarityMatrixRow.iterateNonZero();
    while (it.hasNext()) {
        Element e = it.next();//from  ww w.  ja v  a2s  .com
        recommendations.add(new RecommendationElement(e.index(), e.get()));
        // LOG.info("created new recommendation for " + e.index());
    }

    // sorted list of recommendations: now we have an item id, and similarity value:
    Collections.sort(recommendations, new SimilarityComparator());
    LOG.info("sorted: " + recommendations.size());

    int rank = 1;
    Put put = new Put(RowKeys.getRecommendationKey(collection, recommender, key.get()));
    for (RecommendationElement el : recommendations) {
        // if (el.getSimilarityValue() > 0)
        // {
        byte[] data = Recommend.getRecommendation(el.getItemIndex(), rank, el.getSimilarityValue());
        put.add(RECOMMENDATION_COLUMN, Bytes.toBytes(rank), data);
        rank++;
        // }
    }
    context.write(new LongWritable(key.get()), put);

}

From source file:nl.gridline.zieook.inx.movielens.items.ItemBasedSortSimilaritiesMapper.java

License:Apache License

@Override
protected void map(IntWritable key, VectorWritable value, Context context)
        throws IOException, InterruptedException {
    int maxIndex = -1;

    Vector similarityMatrixRow = value.get();
    /* remove self similarity */

    similarityMatrixRow.set(key.get(), Double.NEGATIVE_INFINITY);

    ///*ww  w .  j a  v  a 2 s.com*/
    // determine maximum index
    //
    Iterator<Element> it = similarityMatrixRow.iterateNonZero();

    while (it.hasNext()) {
        Element e = it.next();

        // e.index() // == item id

        if (e.index() > maxIndex) {
            maxIndex = e.index();
        }
    }

    // System.out.println(String.format("key: %d maxIndex: %d", key.get(), maxIndex));

    if (maxIndex > 0) {

        RecommendationElement[] itemBasedRecommendations = new RecommendationElement[maxIndex];

        for (int i = 0; i < maxIndex; i++) {
            Element element = similarityMatrixRow.getElement(i);

            double similarityValue = Double.NEGATIVE_INFINITY;

            if (element != null) {
                similarityValue = element.get();
            }

            itemBasedRecommendations[i] = new RecommendationElement(i, similarityValue);
        }

        Arrays.sort(itemBasedRecommendations, new SimilarityComparator());

        RecommendationElementArray array = new RecommendationElementArray(itemBasedRecommendations);

        context.write(new VarIntWritable(key.get()), array);

    }
}

From source file:nutchIndexer.NutchMap.java

License:Open Source License

@Override
public void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException {
    TrecOLParser document = new TrecOLParser(value.toString());
    documentAnalyzed = new MapWritable();
    if (document.isParsed()) {
        this.tokenizer.tokenize(document.getDocContent());
        while (this.tokenizer.hasMoreTokens()) {
            IntWritable counter = CastingTypes.zero;
            String newTerm = this.tokenizer.nextToken();
            Text term = new Text(newTerm);
            if (documentAnalyzed.containsKey(term)) {
                counter = CastingTypes.strToIntWr(documentAnalyzed.get(term).toString());
            }//from  w w w. j  a v  a  2  s  .  c o  m
            documentAnalyzed.put(term, CastingTypes.intToIntWr(counter.get() + 1));
        }
        if (!documentAnalyzed.isEmpty()) {
            context.write(CastingTypes.strToIntWr(document.getDocId()), documentAnalyzed);
        }
    }
}

From source file:nutchIndexer.NutchReduce.java

License:Open Source License

@Override
public void reduce(IntWritable docId, Iterable<MapWritable> documentsAnalyzed, Context context)
        throws IOException, InterruptedException {
    for (MapWritable documentAnalyzed : documentsAnalyzed) {
        for (MapWritable.Entry<Writable, Writable> termEntry : documentAnalyzed.entrySet()) {
            Text term = (Text) termEntry.getKey();
            IntWritable freq = (IntWritable) termEntry.getValue();
            Integer documentId = docId.get();
            this.invertedIndex.addPosting(term, documentId, freq);
        }/*w  ww. j  a va  2  s  .  c o m*/
    }
}

From source file:org.adamr.tsponmapreduce.mr.BruteReducer.java

@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
    int wordCount = 0;
    for (IntWritable value : values) {
        wordCount += value.get();
    }/*from  w ww. j  a v a2 s  .co m*/
    context.write(key, new IntWritable(wordCount));
}

From source file:org.ankus.mapreduce.algorithms.clustering.kmeans.KMeansClusterUpdateReducer.java

License:Apache License

protected void reduce(IntWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    Iterator<Text> iterator = values.iterator();

    KMeansClusterInfoMgr cluster = new KMeansClusterInfoMgr();
    cluster.setClusterID(key.get());
    int dataCnt = 0;
    while (iterator.hasNext()) {
        dataCnt++;//from  ww w  .  j  av  a  2 s.  co m
        String tokens[] = iterator.next().toString().split(mDelimiter);

        for (int i = 0; i < tokens.length; i++) {
            if (CommonMethods.isContainIndex(mIndexArr, i, true)
                    && !CommonMethods.isContainIndex(mExceptionIndexArr, i, false)) {
                if (CommonMethods.isContainIndex(mNominalIndexArr, i, false)) {
                    cluster.addAttributeValue(i, tokens[i], ConfigurationVariable.NOMINAL_ATTRIBUTE);
                } else
                    cluster.addAttributeValue(i, tokens[i], ConfigurationVariable.NUMERIC_ATTRIBUTE);
            }
        }
    }
    cluster.finalCompute(dataCnt);

    String writeStr = cluster.getClusterInfoString(mDelimiter,
            context.getConfiguration().get("subDelimiter", "@@"));
    context.write(NullWritable.get(), new Text(writeStr));
}

From source file:org.apache.accumulo.server.tabletserver.log.MultiReaderTest.java

License:Apache License

private void scan(MultiReader reader, int start) throws IOException {
    IntWritable key = new IntWritable();
    BytesWritable value = new BytesWritable();

    for (int i = start + 1; i < 1000; i++) {
        if (i == 10)
            continue;
        assertTrue(reader.next(key, value));
        assertEquals(i, key.get());
    }/*ww w .  j a va  2s.  co m*/
}