Example usage for org.apache.spark.api.java.function PairFlatMapFunction PairFlatMapFunction

Introduction

In this page you can find the example usage for org.apache.spark.api.java.function PairFlatMapFunction PairFlatMapFunction.

Prototype

PairFlatMapFunction

Source Link

Usage

From source file:com.cloudera.oryx.app.mllib.als.Evaluation.java

License:Open Source License

/**
 * Computes AUC (area under the ROC curve) as a recommender evaluation metric.
 * Really, it computes what might be described as "Mean AUC", as it computes AUC per
 * user and averages them./*w  w  w. ja  va 2  s  .c om*/
 */
static double areaUnderCurve(JavaSparkContext sparkContext, MatrixFactorizationModel mfModel,
        JavaRDD<Rating> positiveData) {

    // This does not use Spark's BinaryClassificationMetrics.areaUnderROC because it
    // is intended to operate on one large set of (score,label) pairs. The computation
    // here is really many small AUC problems, for which a much faster direct computation
    // is available.

    // Extract all positive (user,product) pairs
    JavaPairRDD<Integer, Integer> positiveUserProducts = positiveData
            .mapToPair(new PairFunction<Rating, Integer, Integer>() {
                @Override
                public Tuple2<Integer, Integer> call(Rating rating) {
                    return new Tuple2<>(rating.user(), rating.product());
                }
            });

    JavaPairRDD<Integer, Iterable<Rating>> positivePredictions = predictAll(mfModel, positiveData,
            positiveUserProducts);

    // All distinct item IDs, to be broadcast
    final Broadcast<List<Integer>> allItemIDsBC = sparkContext
            .broadcast(positiveUserProducts.values().distinct().collect());

    JavaPairRDD<Integer, Integer> negativeUserProducts = positiveUserProducts.groupByKey()
            .flatMapToPair(new PairFlatMapFunction<Tuple2<Integer, Iterable<Integer>>, Integer, Integer>() {
                private final RandomGenerator random = RandomManager.getRandom();

                @Override
                public Iterable<Tuple2<Integer, Integer>> call(
                        Tuple2<Integer, Iterable<Integer>> userIDsAndItemIDs) {
                    Integer userID = userIDsAndItemIDs._1();
                    Collection<Integer> positiveItemIDs = Sets.newHashSet(userIDsAndItemIDs._2());
                    int numPositive = positiveItemIDs.size();
                    Collection<Tuple2<Integer, Integer>> negative = new ArrayList<>(numPositive);
                    List<Integer> allItemIDs = allItemIDsBC.value();
                    int numItems = allItemIDs.size();
                    // Sample about as many negative examples as positive
                    for (int i = 0; i < numItems && negative.size() < numPositive; i++) {
                        Integer itemID = allItemIDs.get(random.nextInt(numItems));
                        if (!positiveItemIDs.contains(itemID)) {
                            negative.add(new Tuple2<>(userID, itemID));
                        }
                    }
                    return negative;
                }
            });

    JavaPairRDD<Integer, Iterable<Rating>> negativePredictions = predictAll(mfModel, positiveData,
            negativeUserProducts);

    return positivePredictions.join(negativePredictions).values()
            .mapToDouble(new DoubleFunction<Tuple2<Iterable<Rating>, Iterable<Rating>>>() {
                @Override
                public double call(Tuple2<Iterable<Rating>, Iterable<Rating>> t) {
                    // AUC is also the probability that random positive examples
                    // rank higher than random examples at large. Here we compare all random negative
                    // examples to all positive examples and report the totals as an alternative
                    // computation for AUC
                    long correct = 0;
                    long total = 0;
                    for (Rating positive : t._1()) {
                        for (Rating negative : t._2()) {
                            if (positive.rating() > negative.rating()) {
                                correct++;
                            }
                            total++;
                        }
                    }
                    return (double) correct / total;
                }
            }).mean();
}

From source file:com.cloudera.oryx.lazarus.batch.ExampleBatchLayerUpdate.java

License:Open Source License

public static Map<String, Integer> countDistinctOtherWords(JavaPairRDD<String, String> data) {
    return data.values().flatMapToPair(new PairFlatMapFunction<String, String, String>() {
        @Override//from  w ww.ja  v  a 2  s .c  om
        public Iterable<Tuple2<String, String>> call(String line) {
            List<Tuple2<String, String>> result = new ArrayList<>();
            Set<String> distinctTokens = new HashSet<>(Arrays.asList(line.split(" ")));
            for (String a : distinctTokens) {
                for (String b : distinctTokens) {
                    if (!a.equals(b)) {
                        result.add(new Tuple2<>(a, b));
                    }
                }
            }
            return result;
        }
    }).distinct().groupByKey().mapValues(new Function<Iterable<String>, Integer>() {
        @Override
        public Integer call(Iterable<String> values) {
            int count = 0;
            for (String v : values) {
                count++;
            }
            return count;
        }
    }).collectAsMap();
}

From source file:com.cloudera.oryx.ml.mllib.als.AUC.java

License:Open Source License

static double areaUnderCurve(JavaSparkContext sparkContext, MatrixFactorizationModel mfModel,
        JavaRDD<Rating> positiveData) {

    // This does not use Spark's BinaryClassificationMetrics.areaUnderROC because it
    // is intended to operate on one large set of (score,label) pairs. The computation
    // here is really many small AUC problems, for which a much faster direct computation
    // is available.

    // Extract all positive (user,product) pairs
    JavaPairRDD<Integer, Integer> positiveUserProducts = positiveData
            .mapToPair(new PairFunction<Rating, Integer, Integer>() {
                @Override/*from  www.  ja  va  2  s.  co m*/
                public Tuple2<Integer, Integer> call(Rating rating) {
                    return new Tuple2<>(rating.user(), rating.product());
                }
            });

    JavaPairRDD<Integer, Iterable<Rating>> positivePredictions = predictAll(mfModel, positiveData,
            positiveUserProducts);

    // All distinct item IDs, to be broadcast
    final Broadcast<List<Integer>> allItemIDsBC = sparkContext
            .broadcast(positiveUserProducts.values().distinct().collect());

    JavaPairRDD<Integer, Integer> negativeUserProducts = positiveUserProducts.groupByKey()
            .flatMapToPair(new PairFlatMapFunction<Tuple2<Integer, Iterable<Integer>>, Integer, Integer>() {
                private final RandomGenerator random = RandomManager.getRandom();

                @Override
                public Iterable<Tuple2<Integer, Integer>> call(
                        Tuple2<Integer, Iterable<Integer>> userIDsAndItemIDs) {
                    Integer userID = userIDsAndItemIDs._1();
                    Collection<Integer> positiveItemIDs = Sets.newHashSet(userIDsAndItemIDs._2());
                    int numPositive = positiveItemIDs.size();
                    Collection<Tuple2<Integer, Integer>> negative = new ArrayList<>(numPositive);
                    List<Integer> allItemIDs = allItemIDsBC.value();
                    int numItems = allItemIDs.size();
                    // Sample about as many negative examples as positive
                    for (int i = 0; i < numItems && negative.size() < numPositive; i++) {
                        Integer itemID = allItemIDs.get(random.nextInt(numItems));
                        if (!positiveItemIDs.contains(itemID)) {
                            negative.add(new Tuple2<>(userID, itemID));
                        }
                    }
                    return negative;
                }
            });

    JavaPairRDD<Integer, Iterable<Rating>> negativePredictions = predictAll(mfModel, positiveData,
            negativeUserProducts);

    return positivePredictions.join(negativePredictions).values()
            .mapToDouble(new DoubleFunction<Tuple2<Iterable<Rating>, Iterable<Rating>>>() {
                @Override
                public double call(Tuple2<Iterable<Rating>, Iterable<Rating>> t) {
                    // AUC is also the probability that random positive examples
                    // rank higher than random examples at large. Here we compare all random negative
                    // examples to all positive examples and report the totals as an alternative
                    // computation for AUC
                    long correct = 0;
                    long total = 0;
                    for (Rating positive : t._1()) {
                        for (Rating negative : t._2()) {
                            if (positive.rating() > negative.rating()) {
                                correct++;
                            }
                            total++;
                        }
                    }
                    return (double) correct / total;
                }
            }).mean();
}

From source file:com.hxr.bigdata.spark.example141.JavaPageRank.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaPageRank <file> <number_of_iterations>");
        System.exit(1);/*from   w w w  . j  a v a  2  s.  c o  m*/
    }

    showWarning();

    SparkConf sparkConf = new SparkConf().setAppName("JavaPageRank");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);

    // Loads in input file. It should be in format of:
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     ...
    JavaRDD<String> lines = ctx.textFile(args[0], 1);

    // Loads all URLs from input file and initialize their neighbors.
    JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(new PairFunction<String, String, String>() {

        public Tuple2<String, String> call(String s) {
            String[] parts = SPACES.split(s);
            return new Tuple2<String, String>(parts[0], parts[1]);
        }
    }).distinct().groupByKey().cache();

    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() {

        public Double call(Iterable<String> rs) {
            return 1.0;
        }
    });

    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < Integer.parseInt(args[1]); current++) {
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values()
                .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() {

                    public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) {
                        int urlCount = Iterables.size(s._1);
                        List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>();
                        for (String n : s._1) {
                            results.add(new Tuple2<String, Double>(n, s._2() / urlCount));
                        }
                        return results;
                    }
                });

        // Re-calculates URL ranks based on neighbor contributions.
        ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {

            public Double call(Double sum) {
                return 0.15 + sum * 0.85;
            }
        });
    }

    // Collects all URL ranks and dump them to console.
    List<Tuple2<String, Double>> output = ranks.collect();
    for (Tuple2<?, ?> tuple : output) {
        System.out.println(tuple._1() + " has rank: " + tuple._2() + ".");
    }

    ctx.stop();
}

From source file:com.jyz.study.hadoop.spark.examples.JavaPageRank.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaPageRank <file> <number_of_iterations>");
        System.exit(1);/*from  w ww. j  a va  2 s  .c  o m*/
    }

    SparkConf sparkConf = new SparkConf().setAppName("JavaPageRank");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);

    // Loads in input file. It should be in format of:
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     ...
    JavaRDD<String> lines = ctx.textFile(args[0], 1);

    // Loads all URLs from input file and initialize their neighbors.
    JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(new PairFunction<String, String, String>() {
        @Override
        public Tuple2<String, String> call(String s) {
            String[] parts = SPACES.split(s);
            return new Tuple2<String, String>(parts[0], parts[1]);
        }
    }).distinct().groupByKey().cache();

    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() {
        @Override
        public Double call(Iterable<String> rs) {
            return 1.0;
        }
    });

    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < Integer.parseInt(args[1]); current++) {
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values()
                .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() {
                    @Override
                    public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) {
                        int urlCount = Iterables.size(s._1);
                        List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>();
                        for (String n : s._1) {
                            results.add(new Tuple2<String, Double>(n, s._2() / urlCount));
                        }
                        return results;
                    }
                });

        // Re-calculates URL ranks based on neighbor contributions.
        ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
            @Override
            public Double call(Double sum) {
                return 0.15 + sum * 0.85;
            }
        });
    }

    // Collects all URL ranks and dump them to console.
    List<Tuple2<String, Double>> output = ranks.collect();
    for (Tuple2<?, ?> tuple : output) {
        System.out.println(tuple._1() + " has rank: " + tuple._2() + ".");
    }

    ctx.stop();
}

From source file:com.sdw.dream.spark.examples.JavaPageRank.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaPageRank <file> <number_of_iterations>");
        System.exit(1);//from   w  w w  .  ja  v  a  2 s.c  o  m
    }

    showWarning();

    SparkConf sparkConf = new SparkConf().setAppName("JavaPageRank");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);

    // Loads in input file. It should be in format of:
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     ...
    JavaRDD<String> lines = ctx.textFile(args[0], 1);

    // Loads all URLs from input file and initialize their neighbors.
    JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(new PairFunction<String, String, String>() {
        @Override
        public Tuple2<String, String> call(String s) {
            String[] parts = SPACES.split(s);
            return new Tuple2<String, String>(parts[0], parts[1]);
        }
    }).distinct().groupByKey().cache();

    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() {
        @Override
        public Double call(Iterable<String> rs) {
            return 1.0;
        }
    });

    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < Integer.parseInt(args[1]); current++) {
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values()
                .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() {
                    @Override
                    public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) {
                        int urlCount = Iterables.size(s._1);
                        List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>();
                        for (String n : s._1) {
                            results.add(new Tuple2<String, Double>(n, s._2() / urlCount));
                        }
                        return results;
                    }
                });

        // Re-calculates URL ranks based on neighbor contributions.
        ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
            @Override
            public Double call(Double sum) {
                return 0.15 + sum * 0.85;
            }
        });
    }

    // Collects all URL ranks and dump them to console.
    List<Tuple2<String, Double>> output = ranks.collect();
    for (Tuple2<?, ?> tuple : output) {
        System.out.println(tuple._1() + " has rank: " + tuple._2() + ".");
    }

    ctx.stop();
}

From source file:com.spark.cis833.extra.SparkPageRank.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: SparkPageRank <input> <output>");
        System.exit(1);/*from w w w  .  j  a  va 2 s  .c o  m*/
    }
    SparkConf sparkConf = new SparkConf().setAppName("SparkPageRank");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);
    // Loads in input file. It should be in format of:
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     ...
    JavaRDD<String> lines = ctx.textFile(args[0], 1);
    // Loads all URLs from input file and initialize their neighbors.
    JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(new PairFunction<String, String, String>() {
        @Override
        public Tuple2<String, String> call(String s) {
            String[] parts = SPACES.split(s);
            return new Tuple2<String, String>(parts[0], parts[1]);
        }
    }).distinct().groupByKey().cache();
    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() {
        @Override
        public Double call(Iterable<String> rs) {
            return 1.0;
        }
    });
    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < Integer.parseInt("10"); current++) {
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values()
                .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() {
                    @Override
                    public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) {
                        int urlCount = Iterables.size(s._1);
                        List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>();
                        for (String n : s._1) {
                            results.add(new Tuple2<String, Double>(n, s._2() / urlCount));
                        }
                        return results;
                    }
                }).sortByKey(false);
        // Re-calculates URL ranks based on neighbor contributions.
        ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
            @Override
            public Double call(Double sum) {
                return 0.15 + sum * 0.85;
            }
        }).sortByKey(false);
    }

    // Collects all URL ranks and dump them to console.
    List<Tuple2<String, Double>> output = ranks.collect();
    for (Tuple2<?, ?> tuple : output) {
        System.out.println(tuple._1() + " has rank: " + tuple._2() + ".");
    }

    JavaPairRDD<Double, String> swap1 = ranks
            .mapToPair(new PairFunction<Tuple2<String, Double>, Double, String>() {
                @Override
                public Tuple2<Double, String> call(Tuple2<String, Double> item) throws Exception {
                    return item.swap();
                }

            }).sortByKey(false);

    swap1.saveAsTextFile(args[1]);

    ctx.stop();
}

From source file:com.springdeveloper.spark.SparkHashtags.java

License:Apache License

public static void main(String[] args) {
    System.out.println("Spark Hashtags:");

    String fileName = "";
    if (args.length > 0) {
        fileName = args[0];/*w w w.j  a  v  a  2 s.  c om*/
        System.out.println("processing: " + fileName);
    }

    SparkConf conf = new SparkConf().setAppName("spark-hashtags");
    JavaSparkContext sc = new JavaSparkContext(conf);
    JavaRDD<String> tweetData = sc.textFile(fileName).cache();

    JavaRDD<Map<String, Object>> tweets = tweetData.map(new Function<String, Map<String, Object>>() {
        public Map<String, Object> call(String s) throws Exception {
            return jsonMapper.readValue(s.toString(), new TypeReference<HashMap<String, Object>>() {
            });
        }
    });

    JavaPairRDD<String, Integer> hashTags = tweets
            .flatMapToPair(new PairFlatMapFunction<Map<String, Object>, String, Integer>() {
                public Iterable<Tuple2<String, Integer>> call(Map<String, Object> tweet) throws Exception {

                    Map<String, Object> entities = (Map<String, Object>) tweet.get("entities");
                    List<Map<String, Object>> hashTagEntries = null;
                    if (entities != null) {
                        hashTagEntries = (List<Map<String, Object>>) entities.get("hashtags");
                    }
                    List<Tuple2<String, Integer>> hashTags = new ArrayList<Tuple2<String, Integer>>();
                    if (hashTagEntries != null && hashTagEntries.size() > 0) {
                        for (Map<String, Object> hashTagEntry : hashTagEntries) {
                            String hashTag = hashTagEntry.get("text").toString();
                            hashTags.add(new Tuple2<String, Integer>(hashTag, 1));
                        }
                    }
                    return hashTags;
                }
            });

    JavaPairRDD<String, Integer> hashTagCounts = hashTags
            .reduceByKey(new Function2<Integer, Integer, Integer>() {
                public Integer call(Integer int1, Integer int2) throws Exception {
                    return int1 + int2;
                }
            });

    JavaPairRDD<String, Integer> hashTagCountsSorted = hashTagCounts
            .mapToPair(new PairFunction<Tuple2<String, Integer>, Integer, String>() {
                public Tuple2<Integer, String> call(Tuple2<String, Integer> in) throws Exception {
                    return new Tuple2<Integer, String>(in._2, in._1);
                }
            }).sortByKey(false).mapToPair(new PairFunction<Tuple2<Integer, String>, String, Integer>() {
                public Tuple2<String, Integer> call(Tuple2<Integer, String> in) throws Exception {
                    return new Tuple2<String, Integer>(in._2, in._1);
                }
            });

    List<Tuple2<String, Integer>> top10 = hashTagCountsSorted.take(10);

    System.out.println("Tweets: " + tweets.count());
    System.out.println("HashTags: " + top10);

    sc.stop();
}

From source file:com.streamsets.spark.GetCreditCardType.java

License:Apache License

@Override
public TransformResult transform(JavaRDD<Record> records) {
    // Validate incoming records
    JavaPairRDD<Record, String> errors = records
            .mapPartitionsToPair(new PairFlatMapFunction<Iterator<Record>, Record, String>() {
                public Iterable<Tuple2<Record, String>> call(Iterator<Record> recordIterator) throws Exception {
                    List<Tuple2<Record, String>> errors = new LinkedList<>();
                    // Iterate through incoming records
                    while (recordIterator.hasNext()) {
                        Record record = recordIterator.next();
                        // Validate each record
                        if (!validateRecord(record)) {
                            // We have a problem - flag the record as an error
                            errors.add(new Tuple2<>(record, "Credit card number is missing"));
                        }/*from w  w w  .j av  a 2 s.  c o  m*/
                    }
                    return errors;
                }
            });

    // Filter out invalid records before applying the map
    JavaRDD<Record> result = records.filter(new Function<Record, Boolean>() {
        // Only operate on valid records
        public Boolean call(Record record) throws Exception {
            return validateRecord(record);
        }
    }).map(new Function<Record, Record>() {
        public Record call(Record record) throws Exception {
            // Get the credit card number from the record
            String creditCard = record.get(VALUE_PATH).getValueAsString();

            // Look through the map of credit card types
            for (Map.Entry<String, String[]> entry : ccTypes.entrySet()) {
                // Find the first matching prefix
                for (String prefix : entry.getValue()) {
                    if (creditCard.startsWith(prefix)) {
                        // Set the credit card type
                        record.set(RESULT_PATH, Field.create(entry.getKey()));
                        return record;
                    }
                }
            }

            return record;
        }
    });
    return new TransformResult(result, errors);
}

From source file:common.JavaPageRank.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 3) {
        System.err.println("Usage: JavaPageRank <master> <file> <number_of_iterations>");
        System.exit(1);/*  www . j  a  va 2 s  . c  o m*/
    }

    JavaSparkContext ctx = new JavaSparkContext(args[0], "JavaPageRank", System.getenv("SPARK_HOME"),
            System.getenv("SPARK_EXAMPLES_JAR"));

    // Loads in input file. It should be in format of:
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     ...
    JavaRDD<String> lines = ctx.textFile(args[1], 1);

    // Loads all URLs from input file and initialize their neighbors.
    JavaPairRDD<String, List<String>> links = lines.map(new PairFunction<String, String, String>() {
        @Override
        public Tuple2<String, String> call(String s) {
            String[] parts = s.split("\\s+");
            return new Tuple2<String, String>(parts[0], parts[1]);
        }
    }).distinct().groupByKey().cache();

    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<List<String>, Double>() {
        @Override
        public Double call(List<String> rs) throws Exception {
            return 1.0;
        }
    });

    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < Integer.parseInt(args[2]); current++) {
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values()
                .flatMap(new PairFlatMapFunction<Tuple2<List<String>, Double>, String, Double>() {
                    @Override
                    public Iterable<Tuple2<String, Double>> call(Tuple2<List<String>, Double> s) {
                        List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>();
                        for (String n : s._1) {
                            results.add(new Tuple2<String, Double>(n, s._2 / s._1.size()));
                        }
                        return results;
                    }
                });

        // Re-calculates URL ranks based on neighbor contributions.
        ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
            @Override
            public Double call(Double sum) throws Exception {
                return 0.15 + sum * 0.85;
            }
        });
    }

    // Collects all URL ranks and dump them to console.
    List<Tuple2<String, Double>> output = ranks.collect();
    for (Tuple2 tuple : output) {
        System.out.println(tuple._1 + " has rank: " + tuple._2 + ".");
    }

    System.exit(0);
}