Example usage for org.apache.spark.api.java Optional get

List of usage examples for org.apache.spark.api.java Optional get

Introduction

In this page you can find the example usage for org.apache.spark.api.java Optional get.

Prototype

public T get() 

Source Link

Usage

From source file:esiptestbed.mudrod.utils.MatrixUtil.java

License:Apache License

public static LabeledRowMatrix createDocWordMatrix(JavaPairRDD<String, List<String>> uniqueDocRDD,
        JavaSparkContext sc) {// w  w w .j av  a  2 s  .c  o  m
    // Index word with unique IDs
    JavaPairRDD<String, Long> wordIDRDD = uniqueDocRDD.values()
            .flatMap(new FlatMapFunction<List<String>, String>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Iterator<String> call(List<String> arg0) throws Exception {
                    return arg0.iterator();
                }
            }).distinct().zipWithIndex();

    //
    JavaPairRDD<Tuple2<String, String>, Double> docword_num_RDD = uniqueDocRDD.flatMapToPair(
            new PairFlatMapFunction<Tuple2<String, List<String>>, Tuple2<String, String>, Double>() {

                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Iterator<Tuple2<Tuple2<String, String>, Double>> call(
                        Tuple2<String, List<String>> docwords) throws Exception {
                    List<Tuple2<Tuple2<String, String>, Double>> pairs = new ArrayList<Tuple2<Tuple2<String, String>, Double>>();
                    List<String> words = docwords._2;
                    int n = words.size();
                    for (int i = 0; i < n; i++) {
                        Tuple2<String, String> worddoc = new Tuple2<String, String>(docwords._1, words.get(i));
                        pairs.add(new Tuple2<Tuple2<String, String>, Double>(worddoc, 1.0));
                    }
                    return pairs.iterator();
                }
            }).reduceByKey(new Function2<Double, Double, Double>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Double call(Double first, Double second) throws Exception {
                    return first + second;
                }
            });

    //
    JavaPairRDD<String, Tuple2<String, Double>> word_docnum_RDD = docword_num_RDD.mapToPair(
            new PairFunction<Tuple2<Tuple2<String, String>, Double>, String, Tuple2<String, Double>>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Tuple2<String, Double>> call(Tuple2<Tuple2<String, String>, Double> arg0)
                        throws Exception {

                    Tuple2<String, Double> wordmums = new Tuple2<String, Double>(arg0._1._1, arg0._2);
                    return new Tuple2<String, Tuple2<String, Double>>(arg0._1._2, wordmums);
                }
            });

    //

    JavaPairRDD<String, Tuple2<Tuple2<String, Double>, Optional<Long>>> testRDD = word_docnum_RDD
            .leftOuterJoin(wordIDRDD);

    int wordsize = (int) wordIDRDD.count();
    JavaPairRDD<String, Vector> doc_vectorRDD = testRDD.mapToPair(
            new PairFunction<Tuple2<String, Tuple2<Tuple2<String, Double>, Optional<Long>>>, String, Tuple2<List<Long>, List<Double>>>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Tuple2<List<Long>, List<Double>>> call(
                        Tuple2<String, Tuple2<Tuple2<String, Double>, Optional<Long>>> arg0) throws Exception {
                    Optional<Long> oid = arg0._2._2;
                    Long wordId = (long) 0;
                    if (oid.isPresent()) {
                        wordId = oid.get();
                    }

                    List<Long> word = new ArrayList<Long>();
                    word.add(wordId);

                    List<Double> count = new ArrayList<Double>();
                    count.add(arg0._2._1._2);

                    Tuple2<List<Long>, List<Double>> wordcount = new Tuple2<List<Long>, List<Double>>(word,
                            count);

                    return new Tuple2<String, Tuple2<List<Long>, List<Double>>>(arg0._2._1._1, wordcount);
                }

            }).reduceByKey(
                    new Function2<Tuple2<List<Long>, List<Double>>, Tuple2<List<Long>, List<Double>>, Tuple2<List<Long>, List<Double>>>() {
                        /**
                         *
                         */
                        private static final long serialVersionUID = 1L;

                        @Override
                        public Tuple2<List<Long>, List<Double>> call(Tuple2<List<Long>, List<Double>> arg0,
                                Tuple2<List<Long>, List<Double>> arg1) throws Exception {
                            arg0._1.addAll(arg1._1);
                            arg0._2.addAll(arg1._2);
                            return new Tuple2<List<Long>, List<Double>>(arg0._1, arg0._2);
                        }
                    })
            .mapToPair(new PairFunction<Tuple2<String, Tuple2<List<Long>, List<Double>>>, String, Vector>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Vector> call(Tuple2<String, Tuple2<List<Long>, List<Double>>> arg0)
                        throws Exception {
                    int docsize = arg0._2._1.size();
                    int[] intArray = new int[docsize];
                    double[] doubleArray = new double[docsize];
                    for (int i = 0; i < docsize; i++) {
                        intArray[i] = arg0._2._1.get(i).intValue();
                        doubleArray[i] = arg0._2._2.get(i).intValue();
                    }
                    Vector sv = Vectors.sparse(wordsize, intArray, doubleArray);
                    return new Tuple2<String, Vector>(arg0._1, sv);
                }
            });

    RowMatrix docwordMatrix = new RowMatrix(doc_vectorRDD.values().rdd());

    LabeledRowMatrix labeledRowMatrix = new LabeledRowMatrix();
    labeledRowMatrix.wordDocMatrix = docwordMatrix;
    labeledRowMatrix.words = doc_vectorRDD.keys().collect();
    labeledRowMatrix.docs = wordIDRDD.keys().collect();

    return labeledRowMatrix;
}

From source file:esiptestbed.mudrod.utils.SimilarityUtil.java

License:Apache License

/**
 * MatrixtoTriples:Convert term similarity matrix to linkage triple list.
 *
 *
 * @param keys//  www.  ja  v a  2 s  . co  m
 *          each key is a term
 * @param simMatirx
 *          term similarity matrix, in which each row and column is a term and
 *          the cell value is the similarity between the two terms
 * @return linkage triple list
 */
public static List<LinkageTriple> MatrixtoTriples(JavaRDD<String> keys, CoordinateMatrix simMatirx) {
    if (simMatirx.numCols() != keys.count()) {
        return null;
    }

    // index words
    JavaPairRDD<Long, String> keyIdRDD = JavaPairRDD
            .fromJavaRDD(keys.zipWithIndex().map(new Function<Tuple2<String, Long>, Tuple2<Long, String>>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<Long, String> call(Tuple2<String, Long> doc_id) {
                    return doc_id.swap();
                }
            }));

    JavaPairRDD<Long, LinkageTriple> entries_rowRDD = simMatirx.entries().toJavaRDD()
            .mapToPair(new PairFunction<MatrixEntry, Long, LinkageTriple>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<Long, LinkageTriple> call(MatrixEntry t) throws Exception {
                    LinkageTriple triple = new LinkageTriple();
                    triple.keyAId = t.i();
                    triple.keyBId = t.j();
                    triple.weight = t.value();
                    return new Tuple2<Long, LinkageTriple>(triple.keyAId, triple);
                }
            });

    JavaPairRDD<Long, LinkageTriple> entries_colRDD = entries_rowRDD.leftOuterJoin(keyIdRDD).values()
            .mapToPair(new PairFunction<Tuple2<LinkageTriple, Optional<String>>, Long, LinkageTriple>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<Long, LinkageTriple> call(Tuple2<LinkageTriple, Optional<String>> t)
                        throws Exception {
                    LinkageTriple triple = t._1;
                    Optional<String> stra = t._2;
                    if (stra.isPresent()) {
                        triple.keyA = stra.get();
                    }
                    return new Tuple2<Long, LinkageTriple>(triple.keyBId, triple);
                }
            });

    JavaRDD<LinkageTriple> tripleRDD = entries_colRDD.leftOuterJoin(keyIdRDD).values()
            .map(new Function<Tuple2<LinkageTriple, Optional<String>>, LinkageTriple>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public LinkageTriple call(Tuple2<LinkageTriple, Optional<String>> t) throws Exception {
                    LinkageTriple triple = t._1;
                    Optional<String> strb = t._2;
                    if (strb.isPresent()) {
                        triple.keyB = strb.get();
                    }
                    return triple;
                }
            });

    List<LinkageTriple> triples = tripleRDD.collect();
    return triples;
}

From source file:esiptestbed.mudrod.weblog.structure.SessionExtractor.java

License:Apache License

public JavaPairRDD<String, Double> bulidSessionItermRDD(JavaRDD<ClickStream> clickstreamRDD, int filterOpt) {
    JavaPairRDD<String, String> sessionItemRDD = clickstreamRDD
            .mapToPair(new PairFunction<ClickStream, String, String>() {
                /**/*from   www  .  j a v  a  2  s.co m*/
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, String> call(ClickStream click) throws Exception {

                    String sessionID = click.getSessionID();
                    return new Tuple2<String, String>(sessionID, click.getViewDataset());
                }
            }).distinct();

    // remove some sessions
    JavaPairRDD<String, Double> sessionItemNumRDD = sessionItemRDD.keys()
            .mapToPair(new PairFunction<String, String, Double>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Double> call(String item) throws Exception {
                    return new Tuple2<String, Double>(item, 1.0);
                }
            }).reduceByKey(new Function2<Double, Double, Double>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Double call(Double v1, Double v2) throws Exception {
                    return v1 + v2;
                }
            }).filter(new Function<Tuple2<String, Double>, Boolean>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Boolean call(Tuple2<String, Double> arg0) throws Exception {
                    Boolean b = true;
                    if (arg0._2 < 2) {
                        b = false;
                    }
                    return b;
                }
            });

    JavaPairRDD<String, Double> filteredSessionItemRDD = sessionItemNumRDD.leftOuterJoin(sessionItemRDD)
            .mapToPair(new PairFunction<Tuple2<String, Tuple2<Double, Optional<String>>>, String, Double>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Double> call(Tuple2<String, Tuple2<Double, Optional<String>>> arg0)
                        throws Exception {

                    Tuple2<Double, Optional<String>> test = arg0._2;
                    Optional<String> optStr = test._2;
                    String item = "";
                    if (optStr.isPresent()) {
                        item = optStr.get();
                    }
                    return new Tuple2<String, Double>(arg0._1 + "," + item, 1.0);
                }

            });

    return filteredSessionItemRDD;
}

From source file:gov.nasa.jpl.mudrod.utils.MatrixUtil.java

License:Apache License

public static LabeledRowMatrix createDocWordMatrix(JavaPairRDD<String, List<String>> uniqueDocRDD,
        JavaSparkContext sc) {//  w w  w . ja va2  s  . co  m
    // Index word with unique IDs
    JavaPairRDD<String, Long> wordIDRDD = uniqueDocRDD.values()
            .flatMap(new FlatMapFunction<List<String>, String>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Iterator<String> call(List<String> arg0) throws Exception {
                    return arg0.iterator();
                }
            }).distinct().zipWithIndex();

    //
    JavaPairRDD<Tuple2<String, String>, Double> docwordNumRDD = uniqueDocRDD.flatMapToPair(
            new PairFlatMapFunction<Tuple2<String, List<String>>, Tuple2<String, String>, Double>() {

                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Iterator<Tuple2<Tuple2<String, String>, Double>> call(
                        Tuple2<String, List<String>> docwords) throws Exception {
                    List<Tuple2<Tuple2<String, String>, Double>> pairs = new ArrayList<>();
                    List<String> words = docwords._2;
                    int n = words.size();
                    for (int i = 0; i < n; i++) {
                        Tuple2<String, String> worddoc = new Tuple2<>(docwords._1, words.get(i));
                        pairs.add(new Tuple2<Tuple2<String, String>, Double>(worddoc, 1.0));
                    }
                    return pairs.iterator();
                }
            }).reduceByKey(new Function2<Double, Double, Double>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Double call(Double first, Double second) throws Exception {
                    return first + second;
                }
            });

    //
    JavaPairRDD<String, Tuple2<String, Double>> wordDocnumRDD = docwordNumRDD.mapToPair(
            new PairFunction<Tuple2<Tuple2<String, String>, Double>, String, Tuple2<String, Double>>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Tuple2<String, Double>> call(Tuple2<Tuple2<String, String>, Double> arg0)
                        throws Exception {

                    Tuple2<String, Double> wordmums = new Tuple2<>(arg0._1._1, arg0._2);
                    return new Tuple2<>(arg0._1._2, wordmums);
                }
            });

    //

    JavaPairRDD<String, Tuple2<Tuple2<String, Double>, Optional<Long>>> testRDD = wordDocnumRDD
            .leftOuterJoin(wordIDRDD);

    int wordsize = (int) wordIDRDD.count();
    JavaPairRDD<String, Vector> docVectorRDD = testRDD.mapToPair(
            new PairFunction<Tuple2<String, Tuple2<Tuple2<String, Double>, Optional<Long>>>, String, Tuple2<List<Long>, List<Double>>>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Tuple2<List<Long>, List<Double>>> call(
                        Tuple2<String, Tuple2<Tuple2<String, Double>, Optional<Long>>> arg0) throws Exception {
                    Optional<Long> oid = arg0._2._2;
                    Long wordId = (long) 0;
                    if (oid.isPresent()) {
                        wordId = oid.get();
                    }

                    List<Long> word = new ArrayList<>();
                    word.add(wordId);

                    List<Double> count = new ArrayList<>();
                    count.add(arg0._2._1._2);

                    Tuple2<List<Long>, List<Double>> wordcount = new Tuple2<>(word, count);

                    return new Tuple2<>(arg0._2._1._1, wordcount);
                }

            }).reduceByKey(
                    new Function2<Tuple2<List<Long>, List<Double>>, Tuple2<List<Long>, List<Double>>, Tuple2<List<Long>, List<Double>>>() {
                        /**
                         *
                         */
                        private static final long serialVersionUID = 1L;

                        @Override
                        public Tuple2<List<Long>, List<Double>> call(Tuple2<List<Long>, List<Double>> arg0,
                                Tuple2<List<Long>, List<Double>> arg1) throws Exception {
                            arg0._1.addAll(arg1._1);
                            arg0._2.addAll(arg1._2);
                            return new Tuple2<>(arg0._1, arg0._2);
                        }
                    })
            .mapToPair(new PairFunction<Tuple2<String, Tuple2<List<Long>, List<Double>>>, String, Vector>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Vector> call(Tuple2<String, Tuple2<List<Long>, List<Double>>> arg0)
                        throws Exception {
                    int docsize = arg0._2._1.size();
                    int[] intArray = new int[docsize];
                    double[] doubleArray = new double[docsize];
                    for (int i = 0; i < docsize; i++) {
                        intArray[i] = arg0._2._1.get(i).intValue();
                        doubleArray[i] = arg0._2._2.get(i).intValue();
                    }
                    Vector sv = Vectors.sparse(wordsize, intArray, doubleArray);
                    return new Tuple2<>(arg0._1, sv);
                }
            });

    RowMatrix docwordMatrix = new RowMatrix(docVectorRDD.values().rdd());

    LabeledRowMatrix labeledRowMatrix = new LabeledRowMatrix();
    labeledRowMatrix.rowMatrix = docwordMatrix;
    labeledRowMatrix.rowkeys = docVectorRDD.keys().collect();
    labeledRowMatrix.colkeys = wordIDRDD.keys().collect();

    return labeledRowMatrix;
}

From source file:gov.nasa.jpl.mudrod.utils.SimilarityUtil.java

License:Apache License

/**
 * MatrixtoTriples:Convert term similarity matrix to linkage triple list.
 *
 * @param keys      each key is a term/*  w  w  w. java 2  s.  c o  m*/
 * @param simMatirx term similarity matrix, in which each row and column is a term and
 *                  the cell value is the similarity between the two terms
 * @return linkage triple list
 */
public static List<LinkageTriple> matrixToTriples(JavaRDD<String> keys, CoordinateMatrix simMatirx) {
    if (simMatirx.numCols() != keys.count()) {
        return null;
    }

    // index words
    JavaPairRDD<Long, String> keyIdRDD = JavaPairRDD
            .fromJavaRDD(keys.zipWithIndex().map(new Function<Tuple2<String, Long>, Tuple2<Long, String>>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<Long, String> call(Tuple2<String, Long> docId) {
                    return docId.swap();
                }
            }));

    JavaPairRDD<Long, LinkageTriple> entriesRowRDD = simMatirx.entries().toJavaRDD()
            .mapToPair(new PairFunction<MatrixEntry, Long, LinkageTriple>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<Long, LinkageTriple> call(MatrixEntry t) throws Exception {
                    LinkageTriple triple = new LinkageTriple();
                    triple.keyAId = t.i();
                    triple.keyBId = t.j();
                    triple.weight = t.value();
                    return new Tuple2<>(triple.keyAId, triple);
                }
            });

    JavaPairRDD<Long, LinkageTriple> entriesColRDD = entriesRowRDD.leftOuterJoin(keyIdRDD).values()
            .mapToPair(new PairFunction<Tuple2<LinkageTriple, Optional<String>>, Long, LinkageTriple>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<Long, LinkageTriple> call(Tuple2<LinkageTriple, Optional<String>> t)
                        throws Exception {
                    LinkageTriple triple = t._1;
                    Optional<String> stra = t._2;
                    if (stra.isPresent()) {
                        triple.keyA = stra.get();
                    }
                    return new Tuple2<>(triple.keyBId, triple);
                }
            });

    JavaRDD<LinkageTriple> tripleRDD = entriesColRDD.leftOuterJoin(keyIdRDD).values()
            .map(new Function<Tuple2<LinkageTriple, Optional<String>>, LinkageTriple>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public LinkageTriple call(Tuple2<LinkageTriple, Optional<String>> t) throws Exception {
                    LinkageTriple triple = t._1;
                    Optional<String> strb = t._2;
                    if (strb.isPresent()) {
                        triple.keyB = strb.get();
                    }
                    return triple;
                }
            });
    return tripleRDD.collect();
}

From source file:gov.nasa.jpl.mudrod.weblog.structure.SessionExtractor.java

License:Apache License

public JavaPairRDD<String, Double> bulidSessionItermRDD(JavaRDD<ClickStream> clickstreamRDD) {
    JavaPairRDD<String, String> sessionItemRDD = clickstreamRDD
            .mapToPair(new PairFunction<ClickStream, String, String>() {
                /**// w  w  w. j av  a 2 s  .c o m
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, String> call(ClickStream click) throws Exception {

                    String sessionID = click.getSessionID();
                    return new Tuple2<>(sessionID, click.getViewDataset());
                }
            }).distinct();

    // remove some sessions
    JavaPairRDD<String, Double> sessionItemNumRDD = sessionItemRDD.keys()
            .mapToPair(new PairFunction<String, String, Double>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Double> call(String item) throws Exception {
                    return new Tuple2<>(item, 1.0);
                }
            }).reduceByKey(new Function2<Double, Double, Double>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Double call(Double v1, Double v2) throws Exception {
                    return v1 + v2;
                }
            }).filter(new Function<Tuple2<String, Double>, Boolean>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Boolean call(Tuple2<String, Double> arg0) throws Exception {
                    Boolean b = true;
                    if (arg0._2 < 2) {
                        b = false;
                    }
                    return b;
                }
            });

    return sessionItemNumRDD.leftOuterJoin(sessionItemRDD)
            .mapToPair(new PairFunction<Tuple2<String, Tuple2<Double, Optional<String>>>, String, Double>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Double> call(Tuple2<String, Tuple2<Double, Optional<String>>> arg0)
                        throws Exception {

                    Tuple2<Double, Optional<String>> test = arg0._2;
                    Optional<String> optStr = test._2;
                    String item = "";
                    if (optStr.isPresent()) {
                        item = optStr.get();
                    }
                    return new Tuple2<>(arg0._1 + "," + item, 1.0);
                }

            });
}