Example usage for org.apache.spark.sql.types DataTypes createArrayType

List of usage examples for org.apache.spark.sql.types DataTypes createArrayType

Introduction

In this page you can find the example usage for org.apache.spark.sql.types DataTypes createArrayType.

Prototype

public static ArrayType createArrayType(DataType elementType) 

Source Link

Document

Creates an ArrayType by specifying the data type of elements ( elementType ).

Usage

From source file:com.andado.spark.examples.ml.JavaNGramExample.java

License:Apache License

public static void main(String[] args) {
    SparkSession spark = SparkSession.builder().appName("JavaNGramExample").getOrCreate();

    // $example on$
    List<Row> data = Arrays.asList(RowFactory.create(0, Arrays.asList("Hi", "I", "heard", "about", "Spark")),
            RowFactory.create(1, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")),
            RowFactory.create(2, Arrays.asList("Logistic", "regression", "models", "are", "neat")));

    StructType schema = new StructType(new StructField[] {
            new StructField("id", DataTypes.IntegerType, false, Metadata.empty()), new StructField("words",
                    DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) });

    Dataset<Row> wordDataFrame = spark.createDataFrame(data, schema);

    NGram ngramTransformer = new NGram().setN(2).setInputCol("words").setOutputCol("ngrams");

    Dataset<Row> ngramDataFrame = ngramTransformer.transform(wordDataFrame);
    ngramDataFrame.select("ngrams").show(false);
    // $example off$

    spark.stop();/*from  w w w  . j ava  2s . c  o m*/
}

From source file:com.andado.spark.examples.ml.JavaStopWordsRemoverExample.java

License:Apache License

public static void main(String[] args) {
    SparkSession spark = SparkSession.builder().appName("JavaStopWordsRemoverExample").getOrCreate();

    // $example on$
    StopWordsRemover remover = new StopWordsRemover().setInputCol("raw").setOutputCol("filtered");

    List<Row> data = Arrays.asList(RowFactory.create(Arrays.asList("I", "saw", "the", "red", "balloon")),
            RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb")));

    StructType schema = new StructType(new StructField[] {
            new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) });

    Dataset<Row> dataset = spark.createDataFrame(data, schema);
    remover.transform(dataset).show(false);
    // $example off$
    spark.stop();//from  w w w .  j  a  v  a 2 s  .co  m
}

From source file:com.bosscs.spark.commons.utils.CellsUtils.java

License:Apache License

private static DataType getDataType(Object value) {
    Class cls = value.getClass();
    DataType dataType;/*from ww w .  j a v  a  2 s.c  o  m*/
    if (cls.equals(String.class)) {
        dataType = DataTypes.StringType;
    } else if (cls.equals(Byte[].class)) {
        dataType = DataTypes.BinaryType;
    } else if (cls.equals(Boolean.class)) {
        dataType = DataTypes.BooleanType;
    } else if (cls.equals(Timestamp.class)) {
        dataType = DataTypes.TimestampType;
    } else if (cls.equals(Double.class)) {
        dataType = DataTypes.DoubleType;
    } else if (cls.equals(Float.class)) {
        dataType = DataTypes.FloatType;
    } else if (cls.equals(Byte.class)) {
        dataType = DataTypes.ByteType;
    } else if (cls.equals(Integer.class)) {
        dataType = DataTypes.IntegerType;
    } else if (cls.equals(Long.class)) {
        dataType = DataTypes.LongType;
    } else if (cls.equals(Short.class)) {
        dataType = DataTypes.ShortType;
    } else if (value instanceof List) {
        List listValue = (List) value;
        if (listValue.isEmpty()) {
            dataType = DataTypes.createArrayType(DataTypes.StringType);
        } else {
            dataType = DataTypes.createArrayType(getDataType(listValue.get(0)));
        }
    } else if (value instanceof Map) {
        Map mapValue = (Map) value;
        if (mapValue.isEmpty()) {
            dataType = DataTypes.createMapType(DataTypes.StringType, DataTypes.StringType);
        } else {
            Map.Entry entry = (Map.Entry) mapValue.entrySet().iterator().next();
            dataType = DataTypes.createMapType(getDataType(entry.getKey()), getDataType(entry.getValue()));
        }
    } else {
        dataType = DataTypes.StringType;
    }
    return dataType;
}

From source file:com.ryft.spark.connector.examples.DataFrameJsonExampleJ.java

License:BSD License

public static void main(String[] args) {
    final SparkConf sparkConf = new SparkConf().setAppName("DataFrameJsonExampleJ");

    final SparkContext sc = new SparkContext(sparkConf);
    final SQLContext sqlContext = new SQLContext(sc);

    final StructType schema = DataTypes
            .createStructType(Arrays.asList(
                    DataTypes.createStructField("Actors",
                            DataTypes.createArrayType(DataTypes.createStructType(Collections.singletonList(
                                    DataTypes.createStructField("Name", DataTypes.StringType, true)))),
                            true),/*from w w w.jav  a  2 s . c  om*/
                    DataTypes.createStructField("AlterEgo", DataTypes.StringType, true),
                    DataTypes.createStructField("Name", DataTypes.StringType, true)));

    final DataFrame crimes = sqlContext.read().format("com.ryft.spark.connector.sql").schema(schema)
            .option("files", "CitizensOfGotham.json").option("format", "json").load();

    crimes.registerTempTable("gotham");

    final DataFrame df = sqlContext.sql(
            "" + "SELECT AlterEgo, Actor FROM" + " (SELECT AlterEgo, explode(Actors.Name) as Actor FROM gotham"
                    + " WHERE AlterEgo LIKE '%Batman%') sub_table" + " WHERE Actor LIKE '%Bale%'");

    final Row result = df.head();
    logger.info("Result first: {}", result);
}

From source file:com.santacruzintegration.spark.StanfordNaiveBayesTextClassificationData.java

License:Open Source License

private DataFrame createData(JavaRDD<Row> rdd, SQLContext sqlContext) {
    StructField id = null;//from   w w  w .j  a  v  a  2  s .  com
    id = new StructField("id", DataTypes.IntegerType, false, Metadata.empty());

    StructField label = null;
    label = new StructField("label", DataTypes.DoubleType, false, Metadata.empty());

    StructField words = null;
    words = new StructField("words", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty());

    StructType schema = new StructType(new StructField[] { id, label, words });
    DataFrame ret = sqlContext.createDataFrame(rdd, schema);

    return ret;
}

From source file:com.sdw.dream.spark.examples.ml.JavaNGramExample.java

License:Apache License

public static void main(String[] args) {
    SparkConf conf = new SparkConf().setAppName("JavaNGramExample");
    JavaSparkContext jsc = new JavaSparkContext(conf);
    SQLContext sqlContext = new SQLContext(jsc);

    // $example on$
    JavaRDD<Row> jrdd = jsc.parallelize(
            Arrays.asList(RowFactory.create(0.0, Arrays.asList("Hi", "I", "heard", "about", "Spark")),
                    RowFactory.create(1.0,
                            Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")),
                    RowFactory.create(2.0, Arrays.asList("Logistic", "regression", "models", "are", "neat"))));

    StructType schema = new StructType(new StructField[] {
            new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), new StructField("words",
                    DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) });

    DataFrame wordDataFrame = sqlContext.createDataFrame(jrdd, schema);

    NGram ngramTransformer = new NGram().setInputCol("words").setOutputCol("ngrams");

    DataFrame ngramDataFrame = ngramTransformer.transform(wordDataFrame);

    for (Row r : ngramDataFrame.select("ngrams", "label").take(3)) {
        java.util.List<String> ngrams = r.getList(0);
        for (String ngram : ngrams)
            System.out.print(ngram + " --- ");
        System.out.println();//from  w  w w.  ja  v  a2  s .c  o m
    }
    // $example off$
    jsc.stop();
}

From source file:com.sdw.dream.spark.examples.ml.JavaStopWordsRemoverExample.java

License:Apache License

public static void main(String[] args) {
    SparkConf conf = new SparkConf().setAppName("JavaStopWordsRemoverExample");
    JavaSparkContext jsc = new JavaSparkContext(conf);
    SQLContext jsql = new SQLContext(jsc);

    // $example on$
    StopWordsRemover remover = new StopWordsRemover().setInputCol("raw").setOutputCol("filtered");

    JavaRDD<Row> rdd = jsc//from   ww w .j  av  a 2s .  c  o m
            .parallelize(Arrays.asList(RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")),
                    RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb"))));

    StructType schema = new StructType(new StructField[] {
            new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) });

    DataFrame dataset = jsql.createDataFrame(rdd, schema);
    remover.transform(dataset).show();
    // $example off$
    jsc.stop();
}

From source file:com.splicemachine.db.iapi.types.SQLArray.java

License:Apache License

@Override
public StructField getStructField(String columnName) {
    if (type == null)
        throw new RuntimeException("type cannot be null");
    return DataTypes.createStructField(columnName,
            DataTypes.createArrayType(type.getStructField("co").dataType()), true);
}

From source file:dbx.compute.spark.jobs.ml.JavaNGramExample.java

License:Apache License

public static void main(String[] args) {
    SparkSession spark = SparkSession.builder().appName("JavaNGramExample").getOrCreate();

    // $example on$
    List<Row> data = Arrays.asList(RowFactory.create(0.0, Arrays.asList("Hi", "I", "heard", "about", "Spark")),
            RowFactory.create(1.0, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")),
            RowFactory.create(2.0, Arrays.asList("Logistic", "regression", "models", "are", "neat")));

    StructType schema = new StructType(new StructField[] {
            new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), new StructField("words",
                    DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) });

    Dataset<Row> wordDataFrame = spark.createDataFrame(data, schema);

    NGram ngramTransformer = new NGram().setInputCol("words").setOutputCol("ngrams");

    Dataset<Row> ngramDataFrame = ngramTransformer.transform(wordDataFrame);

    for (Row r : ngramDataFrame.select("ngrams", "label").takeAsList(3)) {
        java.util.List<String> ngrams = r.getList(0);
        for (String ngram : ngrams)
            System.out.print(ngram + " --- ");
        System.out.println();/*  ww  w.  jav a  2  s  .com*/
    }
    // $example off$
    spark.stop();
}

From source file:dbx.compute.spark.jobs.ml.JavaStopWordsRemoverExample.java

License:Apache License

public static void main(String[] args) {
    SparkSession spark = SparkSession.builder().appName("JavaStopWordsRemoverExample").getOrCreate();

    // $example on$
    StopWordsRemover remover = new StopWordsRemover().setInputCol("raw").setOutputCol("filtered");

    List<Row> data = Arrays.asList(RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")),
            RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb")));

    StructType schema = new StructType(new StructField[] {
            new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) });

    Dataset<Row> dataset = spark.createDataFrame(data, schema);
    remover.transform(dataset).show();//w  ww.ja v a 2  s  .  c  om
    // $example off$
    spark.stop();
}