Example usage for org.apache.spark.api.java Optional isPresent

List of usage examples for org.apache.spark.api.java Optional isPresent

Introduction

In this page you can find the example usage for org.apache.spark.api.java Optional isPresent.

Prototype

public boolean isPresent() 

Source Link

Usage

From source file:esiptestbed.mudrod.utils.MatrixUtil.java

License:Apache License

public static LabeledRowMatrix createDocWordMatrix(JavaPairRDD<String, List<String>> uniqueDocRDD,
        JavaSparkContext sc) {// w  ww  .java 2s. com
    // Index word with unique IDs
    JavaPairRDD<String, Long> wordIDRDD = uniqueDocRDD.values()
            .flatMap(new FlatMapFunction<List<String>, String>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Iterator<String> call(List<String> arg0) throws Exception {
                    return arg0.iterator();
                }
            }).distinct().zipWithIndex();

    //
    JavaPairRDD<Tuple2<String, String>, Double> docword_num_RDD = uniqueDocRDD.flatMapToPair(
            new PairFlatMapFunction<Tuple2<String, List<String>>, Tuple2<String, String>, Double>() {

                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Iterator<Tuple2<Tuple2<String, String>, Double>> call(
                        Tuple2<String, List<String>> docwords) throws Exception {
                    List<Tuple2<Tuple2<String, String>, Double>> pairs = new ArrayList<Tuple2<Tuple2<String, String>, Double>>();
                    List<String> words = docwords._2;
                    int n = words.size();
                    for (int i = 0; i < n; i++) {
                        Tuple2<String, String> worddoc = new Tuple2<String, String>(docwords._1, words.get(i));
                        pairs.add(new Tuple2<Tuple2<String, String>, Double>(worddoc, 1.0));
                    }
                    return pairs.iterator();
                }
            }).reduceByKey(new Function2<Double, Double, Double>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Double call(Double first, Double second) throws Exception {
                    return first + second;
                }
            });

    //
    JavaPairRDD<String, Tuple2<String, Double>> word_docnum_RDD = docword_num_RDD.mapToPair(
            new PairFunction<Tuple2<Tuple2<String, String>, Double>, String, Tuple2<String, Double>>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Tuple2<String, Double>> call(Tuple2<Tuple2<String, String>, Double> arg0)
                        throws Exception {

                    Tuple2<String, Double> wordmums = new Tuple2<String, Double>(arg0._1._1, arg0._2);
                    return new Tuple2<String, Tuple2<String, Double>>(arg0._1._2, wordmums);
                }
            });

    //

    JavaPairRDD<String, Tuple2<Tuple2<String, Double>, Optional<Long>>> testRDD = word_docnum_RDD
            .leftOuterJoin(wordIDRDD);

    int wordsize = (int) wordIDRDD.count();
    JavaPairRDD<String, Vector> doc_vectorRDD = testRDD.mapToPair(
            new PairFunction<Tuple2<String, Tuple2<Tuple2<String, Double>, Optional<Long>>>, String, Tuple2<List<Long>, List<Double>>>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Tuple2<List<Long>, List<Double>>> call(
                        Tuple2<String, Tuple2<Tuple2<String, Double>, Optional<Long>>> arg0) throws Exception {
                    Optional<Long> oid = arg0._2._2;
                    Long wordId = (long) 0;
                    if (oid.isPresent()) {
                        wordId = oid.get();
                    }

                    List<Long> word = new ArrayList<Long>();
                    word.add(wordId);

                    List<Double> count = new ArrayList<Double>();
                    count.add(arg0._2._1._2);

                    Tuple2<List<Long>, List<Double>> wordcount = new Tuple2<List<Long>, List<Double>>(word,
                            count);

                    return new Tuple2<String, Tuple2<List<Long>, List<Double>>>(arg0._2._1._1, wordcount);
                }

            }).reduceByKey(
                    new Function2<Tuple2<List<Long>, List<Double>>, Tuple2<List<Long>, List<Double>>, Tuple2<List<Long>, List<Double>>>() {
                        /**
                         *
                         */
                        private static final long serialVersionUID = 1L;

                        @Override
                        public Tuple2<List<Long>, List<Double>> call(Tuple2<List<Long>, List<Double>> arg0,
                                Tuple2<List<Long>, List<Double>> arg1) throws Exception {
                            arg0._1.addAll(arg1._1);
                            arg0._2.addAll(arg1._2);
                            return new Tuple2<List<Long>, List<Double>>(arg0._1, arg0._2);
                        }
                    })
            .mapToPair(new PairFunction<Tuple2<String, Tuple2<List<Long>, List<Double>>>, String, Vector>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Vector> call(Tuple2<String, Tuple2<List<Long>, List<Double>>> arg0)
                        throws Exception {
                    int docsize = arg0._2._1.size();
                    int[] intArray = new int[docsize];
                    double[] doubleArray = new double[docsize];
                    for (int i = 0; i < docsize; i++) {
                        intArray[i] = arg0._2._1.get(i).intValue();
                        doubleArray[i] = arg0._2._2.get(i).intValue();
                    }
                    Vector sv = Vectors.sparse(wordsize, intArray, doubleArray);
                    return new Tuple2<String, Vector>(arg0._1, sv);
                }
            });

    RowMatrix docwordMatrix = new RowMatrix(doc_vectorRDD.values().rdd());

    LabeledRowMatrix labeledRowMatrix = new LabeledRowMatrix();
    labeledRowMatrix.wordDocMatrix = docwordMatrix;
    labeledRowMatrix.words = doc_vectorRDD.keys().collect();
    labeledRowMatrix.docs = wordIDRDD.keys().collect();

    return labeledRowMatrix;
}

From source file:esiptestbed.mudrod.utils.SimilarityUtil.java

License:Apache License

/**
 * MatrixtoTriples:Convert term similarity matrix to linkage triple list.
 *
 *
 * @param keys/*from  w  w  w .  j a  v a2s  .  c o  m*/
 *          each key is a term
 * @param simMatirx
 *          term similarity matrix, in which each row and column is a term and
 *          the cell value is the similarity between the two terms
 * @return linkage triple list
 */
public static List<LinkageTriple> MatrixtoTriples(JavaRDD<String> keys, CoordinateMatrix simMatirx) {
    if (simMatirx.numCols() != keys.count()) {
        return null;
    }

    // index words
    JavaPairRDD<Long, String> keyIdRDD = JavaPairRDD
            .fromJavaRDD(keys.zipWithIndex().map(new Function<Tuple2<String, Long>, Tuple2<Long, String>>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<Long, String> call(Tuple2<String, Long> doc_id) {
                    return doc_id.swap();
                }
            }));

    JavaPairRDD<Long, LinkageTriple> entries_rowRDD = simMatirx.entries().toJavaRDD()
            .mapToPair(new PairFunction<MatrixEntry, Long, LinkageTriple>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<Long, LinkageTriple> call(MatrixEntry t) throws Exception {
                    LinkageTriple triple = new LinkageTriple();
                    triple.keyAId = t.i();
                    triple.keyBId = t.j();
                    triple.weight = t.value();
                    return new Tuple2<Long, LinkageTriple>(triple.keyAId, triple);
                }
            });

    JavaPairRDD<Long, LinkageTriple> entries_colRDD = entries_rowRDD.leftOuterJoin(keyIdRDD).values()
            .mapToPair(new PairFunction<Tuple2<LinkageTriple, Optional<String>>, Long, LinkageTriple>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<Long, LinkageTriple> call(Tuple2<LinkageTriple, Optional<String>> t)
                        throws Exception {
                    LinkageTriple triple = t._1;
                    Optional<String> stra = t._2;
                    if (stra.isPresent()) {
                        triple.keyA = stra.get();
                    }
                    return new Tuple2<Long, LinkageTriple>(triple.keyBId, triple);
                }
            });

    JavaRDD<LinkageTriple> tripleRDD = entries_colRDD.leftOuterJoin(keyIdRDD).values()
            .map(new Function<Tuple2<LinkageTriple, Optional<String>>, LinkageTriple>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public LinkageTriple call(Tuple2<LinkageTriple, Optional<String>> t) throws Exception {
                    LinkageTriple triple = t._1;
                    Optional<String> strb = t._2;
                    if (strb.isPresent()) {
                        triple.keyB = strb.get();
                    }
                    return triple;
                }
            });

    List<LinkageTriple> triples = tripleRDD.collect();
    return triples;
}

From source file:esiptestbed.mudrod.weblog.structure.SessionExtractor.java

License:Apache License

public JavaPairRDD<String, Double> bulidSessionItermRDD(JavaRDD<ClickStream> clickstreamRDD, int filterOpt) {
    JavaPairRDD<String, String> sessionItemRDD = clickstreamRDD
            .mapToPair(new PairFunction<ClickStream, String, String>() {
                /**/*from w ww .  ja v a 2  s. co  m*/
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, String> call(ClickStream click) throws Exception {

                    String sessionID = click.getSessionID();
                    return new Tuple2<String, String>(sessionID, click.getViewDataset());
                }
            }).distinct();

    // remove some sessions
    JavaPairRDD<String, Double> sessionItemNumRDD = sessionItemRDD.keys()
            .mapToPair(new PairFunction<String, String, Double>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Double> call(String item) throws Exception {
                    return new Tuple2<String, Double>(item, 1.0);
                }
            }).reduceByKey(new Function2<Double, Double, Double>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Double call(Double v1, Double v2) throws Exception {
                    return v1 + v2;
                }
            }).filter(new Function<Tuple2<String, Double>, Boolean>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Boolean call(Tuple2<String, Double> arg0) throws Exception {
                    Boolean b = true;
                    if (arg0._2 < 2) {
                        b = false;
                    }
                    return b;
                }
            });

    JavaPairRDD<String, Double> filteredSessionItemRDD = sessionItemNumRDD.leftOuterJoin(sessionItemRDD)
            .mapToPair(new PairFunction<Tuple2<String, Tuple2<Double, Optional<String>>>, String, Double>() {
                /**
                 * 
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Double> call(Tuple2<String, Tuple2<Double, Optional<String>>> arg0)
                        throws Exception {

                    Tuple2<Double, Optional<String>> test = arg0._2;
                    Optional<String> optStr = test._2;
                    String item = "";
                    if (optStr.isPresent()) {
                        item = optStr.get();
                    }
                    return new Tuple2<String, Double>(arg0._1 + "," + item, 1.0);
                }

            });

    return filteredSessionItemRDD;
}

From source file:gov.nasa.jpl.mudrod.utils.MatrixUtil.java

License:Apache License

public static LabeledRowMatrix createDocWordMatrix(JavaPairRDD<String, List<String>> uniqueDocRDD,
        JavaSparkContext sc) {//from  ww  w .  ja  v a2s . co  m
    // Index word with unique IDs
    JavaPairRDD<String, Long> wordIDRDD = uniqueDocRDD.values()
            .flatMap(new FlatMapFunction<List<String>, String>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Iterator<String> call(List<String> arg0) throws Exception {
                    return arg0.iterator();
                }
            }).distinct().zipWithIndex();

    //
    JavaPairRDD<Tuple2<String, String>, Double> docwordNumRDD = uniqueDocRDD.flatMapToPair(
            new PairFlatMapFunction<Tuple2<String, List<String>>, Tuple2<String, String>, Double>() {

                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Iterator<Tuple2<Tuple2<String, String>, Double>> call(
                        Tuple2<String, List<String>> docwords) throws Exception {
                    List<Tuple2<Tuple2<String, String>, Double>> pairs = new ArrayList<>();
                    List<String> words = docwords._2;
                    int n = words.size();
                    for (int i = 0; i < n; i++) {
                        Tuple2<String, String> worddoc = new Tuple2<>(docwords._1, words.get(i));
                        pairs.add(new Tuple2<Tuple2<String, String>, Double>(worddoc, 1.0));
                    }
                    return pairs.iterator();
                }
            }).reduceByKey(new Function2<Double, Double, Double>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Double call(Double first, Double second) throws Exception {
                    return first + second;
                }
            });

    //
    JavaPairRDD<String, Tuple2<String, Double>> wordDocnumRDD = docwordNumRDD.mapToPair(
            new PairFunction<Tuple2<Tuple2<String, String>, Double>, String, Tuple2<String, Double>>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Tuple2<String, Double>> call(Tuple2<Tuple2<String, String>, Double> arg0)
                        throws Exception {

                    Tuple2<String, Double> wordmums = new Tuple2<>(arg0._1._1, arg0._2);
                    return new Tuple2<>(arg0._1._2, wordmums);
                }
            });

    //

    JavaPairRDD<String, Tuple2<Tuple2<String, Double>, Optional<Long>>> testRDD = wordDocnumRDD
            .leftOuterJoin(wordIDRDD);

    int wordsize = (int) wordIDRDD.count();
    JavaPairRDD<String, Vector> docVectorRDD = testRDD.mapToPair(
            new PairFunction<Tuple2<String, Tuple2<Tuple2<String, Double>, Optional<Long>>>, String, Tuple2<List<Long>, List<Double>>>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Tuple2<List<Long>, List<Double>>> call(
                        Tuple2<String, Tuple2<Tuple2<String, Double>, Optional<Long>>> arg0) throws Exception {
                    Optional<Long> oid = arg0._2._2;
                    Long wordId = (long) 0;
                    if (oid.isPresent()) {
                        wordId = oid.get();
                    }

                    List<Long> word = new ArrayList<>();
                    word.add(wordId);

                    List<Double> count = new ArrayList<>();
                    count.add(arg0._2._1._2);

                    Tuple2<List<Long>, List<Double>> wordcount = new Tuple2<>(word, count);

                    return new Tuple2<>(arg0._2._1._1, wordcount);
                }

            }).reduceByKey(
                    new Function2<Tuple2<List<Long>, List<Double>>, Tuple2<List<Long>, List<Double>>, Tuple2<List<Long>, List<Double>>>() {
                        /**
                         *
                         */
                        private static final long serialVersionUID = 1L;

                        @Override
                        public Tuple2<List<Long>, List<Double>> call(Tuple2<List<Long>, List<Double>> arg0,
                                Tuple2<List<Long>, List<Double>> arg1) throws Exception {
                            arg0._1.addAll(arg1._1);
                            arg0._2.addAll(arg1._2);
                            return new Tuple2<>(arg0._1, arg0._2);
                        }
                    })
            .mapToPair(new PairFunction<Tuple2<String, Tuple2<List<Long>, List<Double>>>, String, Vector>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Vector> call(Tuple2<String, Tuple2<List<Long>, List<Double>>> arg0)
                        throws Exception {
                    int docsize = arg0._2._1.size();
                    int[] intArray = new int[docsize];
                    double[] doubleArray = new double[docsize];
                    for (int i = 0; i < docsize; i++) {
                        intArray[i] = arg0._2._1.get(i).intValue();
                        doubleArray[i] = arg0._2._2.get(i).intValue();
                    }
                    Vector sv = Vectors.sparse(wordsize, intArray, doubleArray);
                    return new Tuple2<>(arg0._1, sv);
                }
            });

    RowMatrix docwordMatrix = new RowMatrix(docVectorRDD.values().rdd());

    LabeledRowMatrix labeledRowMatrix = new LabeledRowMatrix();
    labeledRowMatrix.rowMatrix = docwordMatrix;
    labeledRowMatrix.rowkeys = docVectorRDD.keys().collect();
    labeledRowMatrix.colkeys = wordIDRDD.keys().collect();

    return labeledRowMatrix;
}

From source file:gov.nasa.jpl.mudrod.utils.SimilarityUtil.java

License:Apache License

/**
 * MatrixtoTriples:Convert term similarity matrix to linkage triple list.
 *
 * @param keys      each key is a term// w w w  .ja  va  2s .  co  m
 * @param simMatirx term similarity matrix, in which each row and column is a term and
 *                  the cell value is the similarity between the two terms
 * @return linkage triple list
 */
public static List<LinkageTriple> matrixToTriples(JavaRDD<String> keys, CoordinateMatrix simMatirx) {
    if (simMatirx.numCols() != keys.count()) {
        return null;
    }

    // index words
    JavaPairRDD<Long, String> keyIdRDD = JavaPairRDD
            .fromJavaRDD(keys.zipWithIndex().map(new Function<Tuple2<String, Long>, Tuple2<Long, String>>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<Long, String> call(Tuple2<String, Long> docId) {
                    return docId.swap();
                }
            }));

    JavaPairRDD<Long, LinkageTriple> entriesRowRDD = simMatirx.entries().toJavaRDD()
            .mapToPair(new PairFunction<MatrixEntry, Long, LinkageTriple>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<Long, LinkageTriple> call(MatrixEntry t) throws Exception {
                    LinkageTriple triple = new LinkageTriple();
                    triple.keyAId = t.i();
                    triple.keyBId = t.j();
                    triple.weight = t.value();
                    return new Tuple2<>(triple.keyAId, triple);
                }
            });

    JavaPairRDD<Long, LinkageTriple> entriesColRDD = entriesRowRDD.leftOuterJoin(keyIdRDD).values()
            .mapToPair(new PairFunction<Tuple2<LinkageTriple, Optional<String>>, Long, LinkageTriple>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<Long, LinkageTriple> call(Tuple2<LinkageTriple, Optional<String>> t)
                        throws Exception {
                    LinkageTriple triple = t._1;
                    Optional<String> stra = t._2;
                    if (stra.isPresent()) {
                        triple.keyA = stra.get();
                    }
                    return new Tuple2<>(triple.keyBId, triple);
                }
            });

    JavaRDD<LinkageTriple> tripleRDD = entriesColRDD.leftOuterJoin(keyIdRDD).values()
            .map(new Function<Tuple2<LinkageTriple, Optional<String>>, LinkageTriple>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public LinkageTriple call(Tuple2<LinkageTriple, Optional<String>> t) throws Exception {
                    LinkageTriple triple = t._1;
                    Optional<String> strb = t._2;
                    if (strb.isPresent()) {
                        triple.keyB = strb.get();
                    }
                    return triple;
                }
            });
    return tripleRDD.collect();
}

From source file:gov.nasa.jpl.mudrod.weblog.structure.SessionExtractor.java

License:Apache License

public JavaPairRDD<String, Double> bulidSessionItermRDD(JavaRDD<ClickStream> clickstreamRDD) {
    JavaPairRDD<String, String> sessionItemRDD = clickstreamRDD
            .mapToPair(new PairFunction<ClickStream, String, String>() {
                /**//ww  w .  j  a v  a  2s . c  o  m
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, String> call(ClickStream click) throws Exception {

                    String sessionID = click.getSessionID();
                    return new Tuple2<>(sessionID, click.getViewDataset());
                }
            }).distinct();

    // remove some sessions
    JavaPairRDD<String, Double> sessionItemNumRDD = sessionItemRDD.keys()
            .mapToPair(new PairFunction<String, String, Double>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Double> call(String item) throws Exception {
                    return new Tuple2<>(item, 1.0);
                }
            }).reduceByKey(new Function2<Double, Double, Double>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Double call(Double v1, Double v2) throws Exception {
                    return v1 + v2;
                }
            }).filter(new Function<Tuple2<String, Double>, Boolean>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Boolean call(Tuple2<String, Double> arg0) throws Exception {
                    Boolean b = true;
                    if (arg0._2 < 2) {
                        b = false;
                    }
                    return b;
                }
            });

    return sessionItemNumRDD.leftOuterJoin(sessionItemRDD)
            .mapToPair(new PairFunction<Tuple2<String, Tuple2<Double, Optional<String>>>, String, Double>() {
                /**
                 *
                 */
                private static final long serialVersionUID = 1L;

                @Override
                public Tuple2<String, Double> call(Tuple2<String, Tuple2<Double, Optional<String>>> arg0)
                        throws Exception {

                    Tuple2<Double, Optional<String>> test = arg0._2;
                    Optional<String> optStr = test._2;
                    String item = "";
                    if (optStr.isPresent()) {
                        item = optStr.get();
                    }
                    return new Tuple2<>(arg0._1 + "," + item, 1.0);
                }

            });
}