BU.MET.CS755.SpeciesViewerMapper.java Source code

Introduction

Here is the source code for BU.MET.CS755.SpeciesViewerMapper.java
Source

//

// Author - Jack Hebert (jhebert@cs.washington.edu)
// Copyright 2007
// Distributed under GPLv3
//
// Modified - Dino Konstantopoulos (dinok@bu.edu)
// Copyright 2010, BU MET CS 755 Cloud Computing
// Distributed under the "If it works, remolded by Dino Konstantopoulos,
// otherwise no idea who did! And by the way, you're free to do whatever
// you want to with it" dinolicense
//
package BU.MET.CS755;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.io.*;

public class SpeciesViewerMapper extends MapReduceBase implements Mapper<WritableComparable, Writable, Text, Text> {
    public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter)
            throws IOException {
        // get the current page
        String data = ((Text) value).toString();
        int index = data.indexOf(":");

        // Can happen if no outlinks
        if (index == -1) {
            index = data.length();
        }

        // split into title and PR (tab or variable number of blank spaces)
        String toParse = data.substring(0, index).trim();
        String[] splits = toParse.split("\t");

        if (splits.length == 0) {
            splits = toParse.split(" ");

            if (splits.length == 0) {
                return;
            }
        }

        String pagetitle = splits[0].trim();
        String pagerank = splits[splits.length - 1].trim();

        // parse score
        double currScore = 0.0;
        try {
            currScore = Double.parseDouble(pagerank);
        } catch (Exception e) {
            currScore = 0.0;
        }

        // collect
        //output.collect(new FloatWritable((float) - currScore), key);
        output.collect(new FloatWritable((float) -currScore * 1000), new Text(pagetitle));
    }
}