List of usage examples for org.apache.spark.sql.types DataTypes createArrayType
public static ArrayType createArrayType(DataType elementType)
From source file:com.andado.spark.examples.ml.JavaNGramExample.java
License:Apache License
public static void main(String[] args) { SparkSession spark = SparkSession.builder().appName("JavaNGramExample").getOrCreate(); // $example on$ List<Row> data = Arrays.asList(RowFactory.create(0, Arrays.asList("Hi", "I", "heard", "about", "Spark")), RowFactory.create(1, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")), RowFactory.create(2, Arrays.asList("Logistic", "regression", "models", "are", "neat"))); StructType schema = new StructType(new StructField[] { new StructField("id", DataTypes.IntegerType, false, Metadata.empty()), new StructField("words", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) }); Dataset<Row> wordDataFrame = spark.createDataFrame(data, schema); NGram ngramTransformer = new NGram().setN(2).setInputCol("words").setOutputCol("ngrams"); Dataset<Row> ngramDataFrame = ngramTransformer.transform(wordDataFrame); ngramDataFrame.select("ngrams").show(false); // $example off$ spark.stop();/*from w w w . j ava 2s . c o m*/ }
From source file:com.andado.spark.examples.ml.JavaStopWordsRemoverExample.java
License:Apache License
public static void main(String[] args) { SparkSession spark = SparkSession.builder().appName("JavaStopWordsRemoverExample").getOrCreate(); // $example on$ StopWordsRemover remover = new StopWordsRemover().setInputCol("raw").setOutputCol("filtered"); List<Row> data = Arrays.asList(RowFactory.create(Arrays.asList("I", "saw", "the", "red", "balloon")), RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb"))); StructType schema = new StructType(new StructField[] { new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) }); Dataset<Row> dataset = spark.createDataFrame(data, schema); remover.transform(dataset).show(false); // $example off$ spark.stop();//from w w w . j a v a 2 s .co m }
From source file:com.bosscs.spark.commons.utils.CellsUtils.java
License:Apache License
private static DataType getDataType(Object value) { Class cls = value.getClass(); DataType dataType;/*from ww w . j a v a 2 s.c o m*/ if (cls.equals(String.class)) { dataType = DataTypes.StringType; } else if (cls.equals(Byte[].class)) { dataType = DataTypes.BinaryType; } else if (cls.equals(Boolean.class)) { dataType = DataTypes.BooleanType; } else if (cls.equals(Timestamp.class)) { dataType = DataTypes.TimestampType; } else if (cls.equals(Double.class)) { dataType = DataTypes.DoubleType; } else if (cls.equals(Float.class)) { dataType = DataTypes.FloatType; } else if (cls.equals(Byte.class)) { dataType = DataTypes.ByteType; } else if (cls.equals(Integer.class)) { dataType = DataTypes.IntegerType; } else if (cls.equals(Long.class)) { dataType = DataTypes.LongType; } else if (cls.equals(Short.class)) { dataType = DataTypes.ShortType; } else if (value instanceof List) { List listValue = (List) value; if (listValue.isEmpty()) { dataType = DataTypes.createArrayType(DataTypes.StringType); } else { dataType = DataTypes.createArrayType(getDataType(listValue.get(0))); } } else if (value instanceof Map) { Map mapValue = (Map) value; if (mapValue.isEmpty()) { dataType = DataTypes.createMapType(DataTypes.StringType, DataTypes.StringType); } else { Map.Entry entry = (Map.Entry) mapValue.entrySet().iterator().next(); dataType = DataTypes.createMapType(getDataType(entry.getKey()), getDataType(entry.getValue())); } } else { dataType = DataTypes.StringType; } return dataType; }
From source file:com.ryft.spark.connector.examples.DataFrameJsonExampleJ.java
License:BSD License
public static void main(String[] args) { final SparkConf sparkConf = new SparkConf().setAppName("DataFrameJsonExampleJ"); final SparkContext sc = new SparkContext(sparkConf); final SQLContext sqlContext = new SQLContext(sc); final StructType schema = DataTypes .createStructType(Arrays.asList( DataTypes.createStructField("Actors", DataTypes.createArrayType(DataTypes.createStructType(Collections.singletonList( DataTypes.createStructField("Name", DataTypes.StringType, true)))), true),/*from w w w.jav a 2 s . c om*/ DataTypes.createStructField("AlterEgo", DataTypes.StringType, true), DataTypes.createStructField("Name", DataTypes.StringType, true))); final DataFrame crimes = sqlContext.read().format("com.ryft.spark.connector.sql").schema(schema) .option("files", "CitizensOfGotham.json").option("format", "json").load(); crimes.registerTempTable("gotham"); final DataFrame df = sqlContext.sql( "" + "SELECT AlterEgo, Actor FROM" + " (SELECT AlterEgo, explode(Actors.Name) as Actor FROM gotham" + " WHERE AlterEgo LIKE '%Batman%') sub_table" + " WHERE Actor LIKE '%Bale%'"); final Row result = df.head(); logger.info("Result first: {}", result); }
From source file:com.santacruzintegration.spark.StanfordNaiveBayesTextClassificationData.java
License:Open Source License
private DataFrame createData(JavaRDD<Row> rdd, SQLContext sqlContext) { StructField id = null;//from w w w .j a v a 2 s . com id = new StructField("id", DataTypes.IntegerType, false, Metadata.empty()); StructField label = null; label = new StructField("label", DataTypes.DoubleType, false, Metadata.empty()); StructField words = null; words = new StructField("words", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()); StructType schema = new StructType(new StructField[] { id, label, words }); DataFrame ret = sqlContext.createDataFrame(rdd, schema); return ret; }
From source file:com.sdw.dream.spark.examples.ml.JavaNGramExample.java
License:Apache License
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("JavaNGramExample"); JavaSparkContext jsc = new JavaSparkContext(conf); SQLContext sqlContext = new SQLContext(jsc); // $example on$ JavaRDD<Row> jrdd = jsc.parallelize( Arrays.asList(RowFactory.create(0.0, Arrays.asList("Hi", "I", "heard", "about", "Spark")), RowFactory.create(1.0, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")), RowFactory.create(2.0, Arrays.asList("Logistic", "regression", "models", "are", "neat")))); StructType schema = new StructType(new StructField[] { new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), new StructField("words", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) }); DataFrame wordDataFrame = sqlContext.createDataFrame(jrdd, schema); NGram ngramTransformer = new NGram().setInputCol("words").setOutputCol("ngrams"); DataFrame ngramDataFrame = ngramTransformer.transform(wordDataFrame); for (Row r : ngramDataFrame.select("ngrams", "label").take(3)) { java.util.List<String> ngrams = r.getList(0); for (String ngram : ngrams) System.out.print(ngram + " --- "); System.out.println();//from w w w. ja v a2 s .c o m } // $example off$ jsc.stop(); }
From source file:com.sdw.dream.spark.examples.ml.JavaStopWordsRemoverExample.java
License:Apache License
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("JavaStopWordsRemoverExample"); JavaSparkContext jsc = new JavaSparkContext(conf); SQLContext jsql = new SQLContext(jsc); // $example on$ StopWordsRemover remover = new StopWordsRemover().setInputCol("raw").setOutputCol("filtered"); JavaRDD<Row> rdd = jsc//from ww w .j av a 2s . c o m .parallelize(Arrays.asList(RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")), RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb")))); StructType schema = new StructType(new StructField[] { new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) }); DataFrame dataset = jsql.createDataFrame(rdd, schema); remover.transform(dataset).show(); // $example off$ jsc.stop(); }
From source file:com.splicemachine.db.iapi.types.SQLArray.java
License:Apache License
@Override public StructField getStructField(String columnName) { if (type == null) throw new RuntimeException("type cannot be null"); return DataTypes.createStructField(columnName, DataTypes.createArrayType(type.getStructField("co").dataType()), true); }
From source file:dbx.compute.spark.jobs.ml.JavaNGramExample.java
License:Apache License
public static void main(String[] args) { SparkSession spark = SparkSession.builder().appName("JavaNGramExample").getOrCreate(); // $example on$ List<Row> data = Arrays.asList(RowFactory.create(0.0, Arrays.asList("Hi", "I", "heard", "about", "Spark")), RowFactory.create(1.0, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")), RowFactory.create(2.0, Arrays.asList("Logistic", "regression", "models", "are", "neat"))); StructType schema = new StructType(new StructField[] { new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), new StructField("words", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) }); Dataset<Row> wordDataFrame = spark.createDataFrame(data, schema); NGram ngramTransformer = new NGram().setInputCol("words").setOutputCol("ngrams"); Dataset<Row> ngramDataFrame = ngramTransformer.transform(wordDataFrame); for (Row r : ngramDataFrame.select("ngrams", "label").takeAsList(3)) { java.util.List<String> ngrams = r.getList(0); for (String ngram : ngrams) System.out.print(ngram + " --- "); System.out.println();/* ww w. jav a 2 s .com*/ } // $example off$ spark.stop(); }
From source file:dbx.compute.spark.jobs.ml.JavaStopWordsRemoverExample.java
License:Apache License
public static void main(String[] args) { SparkSession spark = SparkSession.builder().appName("JavaStopWordsRemoverExample").getOrCreate(); // $example on$ StopWordsRemover remover = new StopWordsRemover().setInputCol("raw").setOutputCol("filtered"); List<Row> data = Arrays.asList(RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")), RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb"))); StructType schema = new StructType(new StructField[] { new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) }); Dataset<Row> dataset = spark.createDataFrame(data, schema); remover.transform(dataset).show();//w ww.ja v a 2 s . c om // $example off$ spark.stop(); }