Example usage for com.google.common.collect Iterables size

List of usage examples for com.google.common.collect Iterables size

Introduction

In this page you can find the example usage for com.google.common.collect Iterables size.

Prototype

public static int size(Iterable<?> iterable) 

Source Link

Document

Returns the number of elements in iterable .

Usage

From source file:org.neo4j.nlp.impl.util.PageRankSpark.java

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaPageRank <file> <number_of_iterations>");
        System.exit(1);/*  w w w. j  av a  2  s  .  c o m*/
    }
    SparkConf sparkConf = new SparkConf().setAppName("Graphify");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);

    JavaRDD<String> lines = ctx.textFile(args[0], 1);

    // Loads all URLs from input file and initialize their neighbors.
    JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(s -> {
        String[] parts = SPACES.split(s);
        return new Tuple2<>(parts[0], parts[1]);
    }).distinct().groupByKey().cache();

    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
    JavaPairRDD<String, Double> ranks = links.mapValues(rs -> 1.0);

    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < Integer.parseInt(args[1]); current++) {
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values().flatMapToPair(s -> {
            int urlCount = Iterables.size(s._1());
            List<Tuple2<String, Double>> results = new ArrayList<>();
            for (String n : s._1()) {
                results.add(new Tuple2<>(n, s._2() / urlCount));
            }
            return results;
        });
        // Re-calculates URL ranks based on neighbor contributions.
        ranks = contribs.reduceByKey(new Sum()).mapValues(sum -> 0.15 + sum * 0.85);
    }

    // Collects all URL ranks and dump them to console.
    List<Tuple2<String, Double>> output = ranks.collect();
    for (Tuple2<?, ?> tuple : output) {
        System.out.println(tuple._1() + " has rank: " + tuple._2() + ".");
    }
    ctx.stop();
}

From source file:org.apache.spark.examples.PageRank.java

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: PageRank <file> <number_of_iterations>");
        System.exit(1);//from   w  ww  . j a v  a2 s.  co  m
    }

    final SparkConf sparkConf = new SparkConf().setAppName("PageRank");
    final JavaSparkContext ctx = new JavaSparkContext(sparkConf);

    // Loads in input file. It should be in format of:
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     ...
    final JavaRDD<String> lines = ctx.textFile(args[0], 1);
    final int ITERATIONS = Integer.parseInt(args[1]);

    // Loads all URLs from input file and initialize their neighbors.
    final JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(s -> {
        String[] parts = SPACES.split(s);
        return new Tuple2<>(parts[0], parts[1]);
    }).distinct().groupByKey().cache();

    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
    JavaPairRDD<String, Double> ranks = links.mapValues(rs -> 1.0);

    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < ITERATIONS; current++) {
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values().flatMapToPair(s -> {
            int urlCount = Iterables.size(s._1);
            List<Tuple2<String, Double>> results = new ArrayList<>();
            for (String n : s._1) {
                results.add(new Tuple2<>(n, s._2() / urlCount));
            }
            return results;
        });

        // Re-calculates URL ranks based on neighbor contributions.
        ranks = contribs.reduceByKey((a, b) -> a + b).mapValues(sum -> 0.15 + sum * 0.85);
    }

    // Collects all URL ranks and dump them to console.
    final List<Tuple2<String, Double>> output = ranks.collect();
    for (Tuple2 tuple : output) {
        System.out.println(tuple._1() + " has rank: " + tuple._2() + ".");
    }

    ctx.stop();
}

From source file:gtl.spark.java.example.apache.JavaPageRank.java

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaPageRank <file> <number_of_iterations>");
        System.exit(1);//from   w w  w  .ja v  a 2  s .  c o m
    }

    showWarning();

    SparkSession spark = SparkSession.builder().appName("JavaPageRank").getOrCreate();

    // Loads in input file. It should be in format of:
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     ...
    JavaRDD<String> lines = spark.read().textFile(args[0]).javaRDD();

    // Loads all URLs from input file and initialize their neighbors.
    JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(s -> {
        String[] parts = SPACES.split(s);
        return new Tuple2<>(parts[0], parts[1]);
    }).distinct().groupByKey().cache();

    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
    JavaPairRDD<String, Double> ranks = links.mapValues(rs -> 1.0);

    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < Integer.parseInt(args[1]); current++) {
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values().flatMapToPair(s -> {
            int urlCount = Iterables.size(s._1());
            List<Tuple2<String, Double>> results = new ArrayList<>();
            for (String n : s._1) {
                results.add(new Tuple2<>(n, s._2() / urlCount));
            }
            return results.iterator();
        });

        // Re-calculates URL ranks based on neighbor contributions.
        ranks = contribs.reduceByKey(new Sum()).mapValues(sum -> 0.15 + sum * 0.85);
    }

    // Collects all URL ranks and dump them to console.
    List<Tuple2<String, Double>> output = ranks.collect();
    for (Tuple2<?, ?> tuple : output) {
        System.out.println(tuple._1() + " has rank: " + tuple._2() + ".");
    }

    spark.stop();
}

From source file:com.spark.cis833.extra.SparkPageRank.java

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: SparkPageRank <input> <output>");
        System.exit(1);/*from   w  w w. j av a 2  s . c o m*/
    }
    SparkConf sparkConf = new SparkConf().setAppName("SparkPageRank");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);
    // Loads in input file. It should be in format of:
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     ...
    JavaRDD<String> lines = ctx.textFile(args[0], 1);
    // Loads all URLs from input file and initialize their neighbors.
    JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(new PairFunction<String, String, String>() {
        @Override
        public Tuple2<String, String> call(String s) {
            String[] parts = SPACES.split(s);
            return new Tuple2<String, String>(parts[0], parts[1]);
        }
    }).distinct().groupByKey().cache();
    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() {
        @Override
        public Double call(Iterable<String> rs) {
            return 1.0;
        }
    });
    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < Integer.parseInt("10"); current++) {
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values()
                .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() {
                    @Override
                    public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) {
                        int urlCount = Iterables.size(s._1);
                        List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>();
                        for (String n : s._1) {
                            results.add(new Tuple2<String, Double>(n, s._2() / urlCount));
                        }
                        return results;
                    }
                }).sortByKey(false);
        // Re-calculates URL ranks based on neighbor contributions.
        ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
            @Override
            public Double call(Double sum) {
                return 0.15 + sum * 0.85;
            }
        }).sortByKey(false);
    }

    // Collects all URL ranks and dump them to console.
    List<Tuple2<String, Double>> output = ranks.collect();
    for (Tuple2<?, ?> tuple : output) {
        System.out.println(tuple._1() + " has rank: " + tuple._2() + ".");
    }

    JavaPairRDD<Double, String> swap1 = ranks
            .mapToPair(new PairFunction<Tuple2<String, Double>, Double, String>() {
                @Override
                public Tuple2<Double, String> call(Tuple2<String, Double> item) throws Exception {
                    return item.swap();
                }

            }).sortByKey(false);

    swap1.saveAsTextFile(args[1]);

    ctx.stop();
}

From source file:com.jyz.study.hadoop.spark.examples.JavaPageRank.java

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaPageRank <file> <number_of_iterations>");
        System.exit(1);/*from   ww w  .  ja  v a 2s  .co m*/
    }

    SparkConf sparkConf = new SparkConf().setAppName("JavaPageRank");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);

    // Loads in input file. It should be in format of:
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     ...
    JavaRDD<String> lines = ctx.textFile(args[0], 1);

    // Loads all URLs from input file and initialize their neighbors.
    JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(new PairFunction<String, String, String>() {
        @Override
        public Tuple2<String, String> call(String s) {
            String[] parts = SPACES.split(s);
            return new Tuple2<String, String>(parts[0], parts[1]);
        }
    }).distinct().groupByKey().cache();

    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() {
        @Override
        public Double call(Iterable<String> rs) {
            return 1.0;
        }
    });

    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < Integer.parseInt(args[1]); current++) {
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values()
                .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() {
                    @Override
                    public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) {
                        int urlCount = Iterables.size(s._1);
                        List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>();
                        for (String n : s._1) {
                            results.add(new Tuple2<String, Double>(n, s._2() / urlCount));
                        }
                        return results;
                    }
                });

        // Re-calculates URL ranks based on neighbor contributions.
        ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
            @Override
            public Double call(Double sum) {
                return 0.15 + sum * 0.85;
            }
        });
    }

    // Collects all URL ranks and dump them to console.
    List<Tuple2<String, Double>> output = ranks.collect();
    for (Tuple2<?, ?> tuple : output) {
        System.out.println(tuple._1() + " has rank: " + tuple._2() + ".");
    }

    ctx.stop();
}

From source file:JavaPageRank.java

public static void main(String[] args) throws Exception {
    Integer iterationNum = 5;//from w w  w.  j a v  a  2 s.c o  m
    String path = JavaWordCountDF.class.getClassLoader().getResource("pagerank_data.txt").getPath();

    showWarning();

    SparkSession spark = SparkSession.builder().master("local").appName("JavaPageRank").getOrCreate();

    // Loads in input file. It should be in format of:
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     ...
    JavaRDD<String> lines = spark.read().textFile(path).javaRDD();

    // Loads all URLs from input file and initialize their neighbors.
    // lambda type: (PairFunction<String, String, String>)
    JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(s -> {
        String[] parts = SPACES.split(s);
        return new Tuple2<>(parts[0], parts[1]);
    }).distinct().groupByKey().cache();

    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
    // Lambda type: (Function<Iterable<String>, Double>)
    JavaPairRDD<String, Double> ranks = links.mapValues(rs -> 1.0);

    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < iterationNum; current++) {
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values()
                // (PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>)
                .flatMapToPair(s -> {
                    int urlCount = Iterables.size(s._1);
                    List<Tuple2<String, Double>> results = new ArrayList<>();
                    for (String n : s._1) {
                        results.add(new Tuple2<>(n, s._2() / urlCount));
                    }
                    return results.iterator();
                });

        // Re-calculates URL ranks based on neighbor contributions.
        // (Function<Double, Double>) type of sum
        ranks = contribs.reduceByKey(new Sum()).mapValues(sum -> 0.15 + sum * 0.85);
    }

    // Collects all URL ranks and dump them to console.
    List<Tuple2<String, Double>> output = ranks.collect();
    for (Tuple2<?, ?> tuple : output) {
        System.out.println(tuple._1() + " has rank: " + tuple._2() + ".");
    }

    spark.stop();
}

From source file:com.andado.spark.examples.JavaPageRank.java

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaPageRank <file> <number_of_iterations>");
        System.exit(1);/*from w ww.j a  v  a  2 s  . co  m*/
    }

    showWarning();

    SparkSession spark = SparkSession.builder().appName("JavaPageRank").getOrCreate();

    // Loads in input file. It should be in format of:
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     ...
    JavaRDD<String> lines = spark.read().textFile(args[0]).javaRDD();

    // Loads all URLs from input file and initialize their neighbors.
    JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(new PairFunction<String, String, String>() {
        @Override
        public Tuple2<String, String> call(String s) {
            String[] parts = SPACES.split(s);
            return new Tuple2<>(parts[0], parts[1]);
        }
    }).distinct().groupByKey().cache();

    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() {
        @Override
        public Double call(Iterable<String> rs) {
            return 1.0;
        }
    });

    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < Integer.parseInt(args[1]); current++) {
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values()
                .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() {
                    @Override
                    public Iterator<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) {
                        int urlCount = Iterables.size(s._1);
                        List<Tuple2<String, Double>> results = new ArrayList<>();
                        for (String n : s._1) {
                            results.add(new Tuple2<>(n, s._2() / urlCount));
                        }
                        return results.iterator();
                    }
                });

        // Re-calculates URL ranks based on neighbor contributions.
        ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
            @Override
            public Double call(Double sum) {
                return 0.15 + sum * 0.85;
            }
        });
    }

    // Collects all URL ranks and dump them to console.
    List<Tuple2<String, Double>> output = ranks.collect();
    for (Tuple2<?, ?> tuple : output) {
        System.out.println(tuple._1() + " has rank: " + tuple._2() + ".");
    }

    spark.stop();
}

From source file:com.hxr.bigdata.spark.example141.JavaPageRank.java

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaPageRank <file> <number_of_iterations>");
        System.exit(1);//from w  w w  . j  a v a 2s  .  c  o  m
    }

    showWarning();

    SparkConf sparkConf = new SparkConf().setAppName("JavaPageRank");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);

    // Loads in input file. It should be in format of:
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     ...
    JavaRDD<String> lines = ctx.textFile(args[0], 1);

    // Loads all URLs from input file and initialize their neighbors.
    JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(new PairFunction<String, String, String>() {

        public Tuple2<String, String> call(String s) {
            String[] parts = SPACES.split(s);
            return new Tuple2<String, String>(parts[0], parts[1]);
        }
    }).distinct().groupByKey().cache();

    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() {

        public Double call(Iterable<String> rs) {
            return 1.0;
        }
    });

    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < Integer.parseInt(args[1]); current++) {
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values()
                .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() {

                    public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) {
                        int urlCount = Iterables.size(s._1);
                        List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>();
                        for (String n : s._1) {
                            results.add(new Tuple2<String, Double>(n, s._2() / urlCount));
                        }
                        return results;
                    }
                });

        // Re-calculates URL ranks based on neighbor contributions.
        ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {

            public Double call(Double sum) {
                return 0.15 + sum * 0.85;
            }
        });
    }

    // Collects all URL ranks and dump them to console.
    List<Tuple2<String, Double>> output = ranks.collect();
    for (Tuple2<?, ?> tuple : output) {
        System.out.println(tuple._1() + " has rank: " + tuple._2() + ".");
    }

    ctx.stop();
}

From source file:com.sdw.dream.spark.examples.JavaPageRank.java

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaPageRank <file> <number_of_iterations>");
        System.exit(1);// w w  w  .j  av  a 2s  .  co m
    }

    showWarning();

    SparkConf sparkConf = new SparkConf().setAppName("JavaPageRank");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);

    // Loads in input file. It should be in format of:
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     ...
    JavaRDD<String> lines = ctx.textFile(args[0], 1);

    // Loads all URLs from input file and initialize their neighbors.
    JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(new PairFunction<String, String, String>() {
        @Override
        public Tuple2<String, String> call(String s) {
            String[] parts = SPACES.split(s);
            return new Tuple2<String, String>(parts[0], parts[1]);
        }
    }).distinct().groupByKey().cache();

    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() {
        @Override
        public Double call(Iterable<String> rs) {
            return 1.0;
        }
    });

    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < Integer.parseInt(args[1]); current++) {
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<String, Double> contribs = links.join(ranks).values()
                .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() {
                    @Override
                    public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) {
                        int urlCount = Iterables.size(s._1);
                        List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>();
                        for (String n : s._1) {
                            results.add(new Tuple2<String, Double>(n, s._2() / urlCount));
                        }
                        return results;
                    }
                });

        // Re-calculates URL ranks based on neighbor contributions.
        ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
            @Override
            public Double call(Double sum) {
                return 0.15 + sum * 0.85;
            }
        });
    }

    // Collects all URL ranks and dump them to console.
    List<Tuple2<String, Double>> output = ranks.collect();
    for (Tuple2<?, ?> tuple : output) {
        System.out.println(tuple._1() + " has rank: " + tuple._2() + ".");
    }

    ctx.stop();
}

From source file:org.biocaddie.citationanalysis.metrics.JavaPageRankInt.java

public static void main(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println("Usage: JavaPageRank <file> <number_of_iterations>");
        System.exit(1);//from   w  w w . j  a  v a 2s  .  c  o  m
    }

    double alpha = 0.5;
    JavaSparkContext ctx = SparkUtils.getJavaSparkContext("JavaPageRank");

    // Loads in input file. It should be in format of:
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     ...
    //    JavaRDD<String> lines = ctx.textFile(args[0], 1);
    JavaRDD<String> lines = ctx.textFile(args[0]);

    // Loads all URLs from input file and initialize their neighbors.
    JavaPairRDD<Integer, Iterable<Integer>> links = lines
            .mapToPair(new PairFunction<String, Integer, Integer>() {
                @Override
                public Tuple2<Integer, Integer> call(String s) {
                    String[] parts = SPACES.split(s);
                    return new Tuple2<Integer, Integer>(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]));
                }
            }).distinct().groupByKey().cache();

    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.

    JavaPairRDD<Integer, Double> ranks = links.mapValues(new Function<Iterable<Integer>, Double>() {
        @Override
        public Double call(Iterable<Integer> rs) {
            return 1.0;
        }
    });

    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < Integer.parseInt(args[2]); current++) {
        // Calculates URL contributions to the rank of other URLs.
        JavaPairRDD<Integer, Double> contribs = links.join(ranks).values()
                .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<Integer>, Double>, Integer, Double>() {
                    @Override
                    public Iterable<Tuple2<Integer, Double>> call(Tuple2<Iterable<Integer>, Double> s) {
                        int urlCount = Iterables.size(s._1);
                        List<Tuple2<Integer, Double>> results = new ArrayList<Tuple2<Integer, Double>>();
                        for (Integer n : s._1) {
                            results.add(new Tuple2<Integer, Double>(n, s._2() / urlCount));
                        }
                        return results;
                    }
                });

        // Re-calculates URL ranks based on neighbor contributions.
        ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
            @Override
            public Double call(Double sum) {
                return alpha + sum * 1 - alpha;
                //         return 0.15 + sum * 0.85;
            }
        });
    }

    JavaRDD<String> idLines = ctx.textFile(args[1]);
    JavaPairRDD<Integer, Integer> pmIds = idLines.mapToPair(new PairFunction<String, Integer, Integer>() {
        @Override
        public Tuple2<Integer, Integer> call(String s) {
            String[] parts = s.split(",");
            return new Tuple2<Integer, Integer>(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]));
        }
    });

    ranks = ranks.filter(t -> t._2 > 0.8);

    JavaPairRDD<Integer, Tuple2<Double, Integer>> join = ranks.join(pmIds);

    List<Tuple2<Integer, Tuple2<Double, Integer>>> collect = join.collect();
    for (Tuple2<Integer, Tuple2<Double, Integer>> t : collect) {
        System.out.println(t._1 + "," + t._2._2 + "," + t._2._1);
    }

    // Collects all URL ranks and dump them to console.
    //    List<Tuple2<Integer, Double>> output = ranks.collect();
    //    for (Tuple2<?,?> tuple : output) {
    //        System.out.println(tuple._1() + " has rank: " + tuple._2() + ".");
    //    }

    ctx.stop();
}