List of usage examples for org.apache.spark.sql.types DataTypes FloatType
DataType FloatType
To view the source code for org.apache.spark.sql.types DataTypes FloatType.
Click Source Link
From source file:com.bosscs.spark.commons.utils.CellsUtils.java
License:Apache License
private static DataType getDataType(Object value) { Class cls = value.getClass(); DataType dataType;//from w ww . j a va 2 s. c o m if (cls.equals(String.class)) { dataType = DataTypes.StringType; } else if (cls.equals(Byte[].class)) { dataType = DataTypes.BinaryType; } else if (cls.equals(Boolean.class)) { dataType = DataTypes.BooleanType; } else if (cls.equals(Timestamp.class)) { dataType = DataTypes.TimestampType; } else if (cls.equals(Double.class)) { dataType = DataTypes.DoubleType; } else if (cls.equals(Float.class)) { dataType = DataTypes.FloatType; } else if (cls.equals(Byte.class)) { dataType = DataTypes.ByteType; } else if (cls.equals(Integer.class)) { dataType = DataTypes.IntegerType; } else if (cls.equals(Long.class)) { dataType = DataTypes.LongType; } else if (cls.equals(Short.class)) { dataType = DataTypes.ShortType; } else if (value instanceof List) { List listValue = (List) value; if (listValue.isEmpty()) { dataType = DataTypes.createArrayType(DataTypes.StringType); } else { dataType = DataTypes.createArrayType(getDataType(listValue.get(0))); } } else if (value instanceof Map) { Map mapValue = (Map) value; if (mapValue.isEmpty()) { dataType = DataTypes.createMapType(DataTypes.StringType, DataTypes.StringType); } else { Map.Entry entry = (Map.Entry) mapValue.entrySet().iterator().next(); dataType = DataTypes.createMapType(getDataType(entry.getKey()), getDataType(entry.getValue())); } } else { dataType = DataTypes.StringType; } return dataType; }
From source file:com.cambitc.spark.streaming.KafkaDirectStreamGrouping.java
License:Apache License
public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: KafkaDirectStream <brokers> <topics>\n" + " <brokers> is a list of one or more Kafka brokers\n" + " <topics> is a list of one or more kafka topics to consume from\n\n" + " KafkaDirectStream localhost:9092 OBDTopics"); System.exit(1);/* www .j av a2 s .com*/ } //StreamingExamples.setStreamingLogLevels(); String brokers = args[0]; String topics = args[1]; // Create context with a 2 seconds batch interval //SparkConf sparkConf = new SparkConf().setAppName("JavaDirectKafkaWordCount"); JavaSparkContext sparkConf = new JavaSparkContext("local[5]", "JavaDirectKafkaWordCount"); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(10)); SQLContext sqlContext = new SQLContext(sparkConf); HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(topics.split(","))); HashMap<String, String> kafkaParams = new HashMap<String, String>(); kafkaParams.put("metadata.broker.list", brokers); kafkaParams.put("zookeeper.connect", "localhost:2181"); kafkaParams.put("group.id", "spark-app"); System.out.println("Kafka parameters: " + kafkaParams); // Create direct kafka stream with brokers and topics JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet); // Generate the schema based on the string of schema List<StructField> fields = new ArrayList<StructField>(); fields.add(DataTypes.createStructField("auctionid", DataTypes.StringType, true)); fields.add(DataTypes.createStructField("bid", DataTypes.FloatType, true)); fields.add(DataTypes.createStructField("bidtime", DataTypes.FloatType, true)); fields.add(DataTypes.createStructField("bidder", DataTypes.StringType, true)); fields.add(DataTypes.createStructField("bidderrate", DataTypes.IntegerType, true)); fields.add(DataTypes.createStructField("openbid", DataTypes.FloatType, true)); fields.add(DataTypes.createStructField("price", DataTypes.FloatType, true)); fields.add(DataTypes.createStructField("item", DataTypes.StringType, true)); fields.add(DataTypes.createStructField("daystolive", DataTypes.IntegerType, true)); StructType schema = DataTypes.createStructType(fields); // Get the lines, split them into words JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { System.out.println("*************MY OUTPUT: processing lines: tuple2._1() = " + tuple2._1() + "; tuple2._2()=" + tuple2._2()); return tuple2._2(); } }); lines.print(); //Creating Data Frame DataFrame dFrame = sqlContext.createDataFrame(lines, schema); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String x) { return Arrays.asList(SPACE.split(x)); } }); //words.print(); // Reduce function adding two integers, defined separately for clarity Function2<Integer, Integer, Integer> reduceFunc = new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }; // Count each word in each batch JavaPairDStream<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }); /* JavaPairDStream<String, Integer> wordCounts = pairs.reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); /* */ // Reduce last 30 seconds of data, every 10 seconds JavaPairDStream<String, Integer> windowedWordCounts = pairs.reduceByKeyAndWindow(reduceFunc, Durations.seconds(30), Durations.seconds(10)); windowedWordCounts.print(); // Start the computation jssc.start(); jssc.awaitTermination(); }
From source file:com.estonteco.spark.frames.conf.factory.creator.impl.AbstractDataFrameCreator.java
protected Object cast(String value, DataType dataType) { if (value == null || dataType == null) { return null; }//w w w . ja v a 2s . c o m if (dataType == DataTypes.StringType) { return value; } if (dataType == DataTypes.BooleanType) { return Boolean.valueOf(value); } if (dataType == DataTypes.ByteType) { return Byte.valueOf(value); } if (dataType == DataTypes.DateType) { try { return DateUtils.fromJavaDate(Date.valueOf(value)); } catch (IllegalArgumentException ex) { ex.printStackTrace(); return null; } } if (dataType == DataTypes.DoubleType) { return Double.valueOf(value); } if (dataType == DataTypes.FloatType) { return Float.valueOf(value); } if (dataType == DataTypes.LongType) { return Long.valueOf(value); } if (dataType == DataTypes.ShortType) { return Short.valueOf(value); } if (dataType == DataTypes.TimestampType) { // Throw away extra if more than 9 decimal places int periodIdx = value.indexOf("."); String str = value; if (periodIdx != -1 && str.length() - periodIdx > 9) { str = str.substring(0, periodIdx + 10); } try { return java.sql.Timestamp.valueOf(str); } catch (IllegalArgumentException ex) { ex.printStackTrace(); return null; } } if (dataType == DataTypes.BinaryType) { return value.getBytes(); } return null; }
From source file:com.getcake.sparkjdbc.SparkJDBCServer.java
License:Apache License
private String loadSingleFileWithMeta(String registerTableName, String fullPathTableName, String metaFileName) throws IOException { DataFrame dynamicDataFrame;/*from w ww. j a v a 2 s . co m*/ long startTime, firstStartTime; float durSeconds, durMinutes; String respMsg; startTime = System.currentTimeMillis(); firstStartTime = startTime; try { dynamicDataFrame = hiveContext.table(registerTableName); respMsg = "table " + registerTableName + " at " + fullPathTableName + " was already loaded"; log(respMsg); return respMsg; } catch (Throwable exc) { // hiveContext.table does not declare that it throws NoSuchTableException, so cannot use it in catch clause and // have to check for it explicitly if (exc instanceof NoSuchTableException) { respMsg = "table " + registerTableName + " at " + fullPathTableName + " was not loaded => load it next"; log(respMsg); } else { throw exc; } } FileInputStream propFileInputStream; propFileInputStream = new FileInputStream(metaFileName); properties = new Properties(); properties.load(propFileInputStream); Stream<Entry<Object, Object>> stream = properties.entrySet().stream(); Map<String, String> options = stream.collect(Collectors.toMap(entry -> String.valueOf(entry.getKey()), entry -> String.valueOf(entry.getValue()))); int numColumns = Integer.parseInt(properties.getProperty("numColumns")); StructField structFields[] = new StructField[numColumns]; String colName, colType; StructField structField; for (int i = 1; i <= numColumns; i++) { colName = properties.getProperty("col" + i + ".name"); colType = properties.getProperty("col" + i + ".type"); switch (colType) { case "TimeStamp": structField = DataTypes.createStructField(colName, DataTypes.TimestampType, true); break; case "Date": structField = DataTypes.createStructField(colName, DataTypes.DateType, true); break; case "Float": structField = DataTypes.createStructField(colName, DataTypes.FloatType, true); break; case "Integer": structField = DataTypes.createStructField(colName, DataTypes.IntegerType, true); break; case "Long": structField = DataTypes.createStructField(colName, DataTypes.LongType, true); break; case "Short": structField = DataTypes.createStructField(colName, DataTypes.ShortType, true); break; case "Double": structField = DataTypes.createStructField(colName, DataTypes.DoubleType, true); break; case "Boolean": structField = DataTypes.createStructField(colName, DataTypes.BooleanType, true); break; case "Binary": structField = DataTypes.createStructField(colName, DataTypes.BinaryType, true); break; case "Byte": structField = DataTypes.createStructField(colName, DataTypes.ByteType, true); break; case "Null": structField = DataTypes.createStructField(colName, DataTypes.NullType, true); break; default: structField = DataTypes.createStructField(colName, DataTypes.StringType, true); } structFields[i - 1] = structField; } // dynamicDataFrame = hiveContext.read().format("com.databricks.spark.csv"). // option("header", Boolean.toString(headerInCSVFileFlag)).option("inferSchema", Boolean.toString(inferSchemaFlag)).load(fullPathTableName); // Map<String, String> options = new HashMap<>(properties); options.put("path", "file:///" + fullPathTableName); // options.put("header", "false"); // options.put("delimiter", ","); // DataType dataType = new DataType (); /* StructField structField1 = DataTypes.createStructField("LogType", DataTypes.StringType, false); StructField structField2 = DataTypes.createStructField("EntryTime", DataTypes.TimestampType, false); StructField structField3 = DataTypes.createStructField("Code_Class", DataTypes.StringType, false); StructField structField4 = DataTypes.createStructField("Code_Method", DataTypes.StringType, false); StructField structField5 = DataTypes.createStructField("Log_Message", DataTypes.StringType, false); structFields[0] = structField1; structFields[1] = structField2; structFields[2] = structField3; structFields[3] = structField4; structFields[4] = structField5; */ StructType schema = new StructType(structFields); dynamicDataFrame = hiveContext.load("com.databricks.spark.csv", schema, options); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("loaded table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); schema = dynamicDataFrame.schema(); structFields = schema.fields(); for (StructField structFieldLocal : structFields) { DataType dataType = structFieldLocal.dataType(); logger.debug(structFieldLocal.name() + " - dataType: " + dataType.typeName()); } startTime = System.currentTimeMillis(); dynamicDataFrame.cache(); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("cache table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); startTime = System.currentTimeMillis(); dynamicDataFrame.registerTempTable(registerTableName); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("registerTempTable table " + registerTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); durSeconds = (float) (System.currentTimeMillis() - firstStartTime) / 1000F; durMinutes = durSeconds / 60F; respMsg = "Completed loading table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes; log(respMsg); return respMsg; }
From source file:com.getcake.sparkjdbc.SparkJDBCServer.java
License:Apache License
private String loadFilesWithMeta(String registerTableName, String fullPathTableName, String metaFileName, String fileListName) throws IOException { DataFrame combinedDynamicDataFrame = null, dynamicDataFrame = null; long startTime, firstStartTime; float durSeconds, durMinutes; String respMsg;/* w ww . j a va 2 s .c o m*/ startTime = System.currentTimeMillis(); firstStartTime = startTime; try { combinedDynamicDataFrame = hiveContext.table(registerTableName); respMsg = "table " + registerTableName + " at " + fullPathTableName + " was already loaded"; log(respMsg); return respMsg; } catch (Throwable exc) { // hiveContext.table does not declare that it throws NoSuchTableException, so cannot use it in catch clause and // have to check for it explicitly if (exc instanceof NoSuchTableException) { respMsg = "table " + registerTableName + " at " + fullPathTableName + " was not loaded => load it next"; log(respMsg); } else { throw exc; } } FileInputStream propFileInputStream; propFileInputStream = new FileInputStream(metaFileName); properties = new Properties(); properties.load(propFileInputStream); Stream<Entry<Object, Object>> stream = properties.entrySet().stream(); Map<String, String> options = stream.collect(Collectors.toMap(entry -> String.valueOf(entry.getKey()), entry -> String.valueOf(entry.getValue()))); int numColumns = Integer.parseInt(properties.getProperty("numColumns")); StructField structFields[] = new StructField[numColumns]; String colName, colType; StructField structField; // structField = DataTypes.createStructField("File_Source", DataTypes.StringType, true); // structFields[0] = structField; for (int i = 1; i <= numColumns; i++) { colName = properties.getProperty("col" + i + ".name"); colType = properties.getProperty("col" + i + ".type"); switch (colType) { case "TimeStamp": structField = DataTypes.createStructField(colName, DataTypes.TimestampType, true); break; case "Date": structField = DataTypes.createStructField(colName, DataTypes.DateType, true); break; case "Float": structField = DataTypes.createStructField(colName, DataTypes.FloatType, true); break; case "Integer": structField = DataTypes.createStructField(colName, DataTypes.IntegerType, true); break; case "Long": structField = DataTypes.createStructField(colName, DataTypes.LongType, true); break; case "Short": structField = DataTypes.createStructField(colName, DataTypes.ShortType, true); break; case "Double": structField = DataTypes.createStructField(colName, DataTypes.DoubleType, true); break; case "Boolean": structField = DataTypes.createStructField(colName, DataTypes.BooleanType, true); break; case "Binary": structField = DataTypes.createStructField(colName, DataTypes.BinaryType, true); break; case "Byte": structField = DataTypes.createStructField(colName, DataTypes.ByteType, true); break; case "Null": structField = DataTypes.createStructField(colName, DataTypes.NullType, true); break; default: structField = DataTypes.createStructField(colName, DataTypes.StringType, true); } structFields[i - 1] = structField; } StructType schema = new StructType(structFields); List<String> fileLlist = new ArrayList<>(); try (BufferedReader br = Files.newBufferedReader(Paths.get(fileListName))) { //br returns as stream and convert it into a List fileLlist = br.lines().collect(Collectors.toList()); } catch (IOException e) { e.printStackTrace(); } for (String file : fileLlist) { options.put("path", "file:///" + file); dynamicDataFrame = hiveContext.load("com.databricks.spark.csv", schema, options); if (combinedDynamicDataFrame == null) { combinedDynamicDataFrame = dynamicDataFrame; } else { combinedDynamicDataFrame = combinedDynamicDataFrame.unionAll(dynamicDataFrame); } } durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("loaded table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); schema = combinedDynamicDataFrame.schema(); structFields = schema.fields(); for (StructField structFieldLocal : structFields) { DataType dataType = structFieldLocal.dataType(); logger.debug(structFieldLocal.name() + " - dataType: " + dataType.typeName()); } startTime = System.currentTimeMillis(); combinedDynamicDataFrame.cache(); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("cache table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); startTime = System.currentTimeMillis(); combinedDynamicDataFrame.registerTempTable(registerTableName); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("registerTempTable table " + registerTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); durSeconds = (float) (System.currentTimeMillis() - firstStartTime) / 1000F; durMinutes = durSeconds / 60F; respMsg = "Completed loading table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes; log(respMsg); return respMsg; }
From source file:com.splicemachine.db.iapi.types.SQLReal.java
License:Apache License
@Override public StructField getStructField(String columnName) { return DataTypes.createStructField(columnName, DataTypes.FloatType, true); }
From source file:com.thinkbiganalytics.spark.metadata.SqlTransformStage.java
License:Apache License
/** * Gets the Spark SQL data type for the specified JDBC data type. *///from w w w .j a v a 2s. co m @Nonnull @SuppressWarnings("squid:S1479") private DataType getCatalystType(final int sqlType, final int precision, final int scale, final boolean signed) { switch (sqlType) { case Types.BINARY: case Types.BLOB: case Types.LONGVARBINARY: case Types.VARBINARY: return DataTypes.BinaryType; case Types.BIT: case Types.BOOLEAN: return DataTypes.BooleanType; case Types.DATE: return DataTypes.DateType; case Types.DECIMAL: case Types.NUMERIC: if (precision != 0 || scale != 0) { return new DecimalType(precision, scale); } else { return DecimalType.SYSTEM_DEFAULT(); } case Types.DOUBLE: case Types.REAL: return DataTypes.DoubleType; case Types.FLOAT: return DataTypes.FloatType; case Types.SMALLINT: case Types.TINYINT: return DataTypes.IntegerType; case Types.INTEGER: return signed ? DataTypes.IntegerType : DataTypes.LongType; case Types.ROWID: return DataTypes.LongType; case Types.BIGINT: return signed ? DataTypes.LongType : new DecimalType(20, 0); case Types.CHAR: case Types.CLOB: case Types.LONGNVARCHAR: case Types.LONGVARCHAR: case Types.NCHAR: case Types.NCLOB: case Types.NVARCHAR: case Types.REF: case Types.SQLXML: case Types.STRUCT: case Types.VARCHAR: return DataTypes.StringType; case Types.TIME: case Types.TIMESTAMP: return DataTypes.TimestampType; default: log.debug("Unsupported SQL type: {}", sqlType); return DataTypes.StringType; } }
From source file:gaffer.accumulostore.operation.spark.handler.AccumuloStoreRelation.java
License:Apache License
private static DataType getType(final String className) { switch (className) { case "java.lang.String": return DataTypes.StringType; case "java.lang.Integer": return DataTypes.IntegerType; case "java.lang.Long": return DataTypes.LongType; case "java.lang.Boolean": return DataTypes.BooleanType; case "java.lang.Double": return DataTypes.DoubleType; case "java.lang.Float": return DataTypes.FloatType; case "java.lang.Byte": return DataTypes.ByteType; case "java.lang.Short": return DataTypes.ShortType; default://w w w. ja v a 2 s . co m return null; } }
From source file:org.apache.sysml.runtime.instructions.spark.utils.FrameRDDConverterUtils.java
License:Apache License
/** * NOTE: regarding the support of vector columns, we make the following * schema restriction: single vector column, which allows inference of * the vector length without data access and covers the common case. * // w w w . j a v a2 s .c om * @param dfschema schema as StructType * @param colnames column names * @param fschema array of SystemML ValueTypes * @param containsID if true, contains ID column * @return 0-based column index of vector column, -1 if no vector. */ public static int convertDFSchemaToFrameSchema(StructType dfschema, String[] colnames, ValueType[] fschema, boolean containsID) { //basic meta data int off = containsID ? 1 : 0; boolean containsVect = false; int lenVect = fschema.length - (dfschema.fields().length - off) + 1; int colVect = -1; //process individual columns for (int i = off, pos = 0; i < dfschema.fields().length; i++) { StructField structType = dfschema.apply(i); colnames[pos] = structType.name(); if (structType.dataType() == DataTypes.DoubleType || structType.dataType() == DataTypes.FloatType) fschema[pos++] = ValueType.DOUBLE; else if (structType.dataType() == DataTypes.LongType || structType.dataType() == DataTypes.IntegerType) fschema[pos++] = ValueType.INT; else if (structType.dataType() == DataTypes.BooleanType) fschema[pos++] = ValueType.BOOLEAN; else if (structType.dataType() instanceof VectorUDT) { if (containsVect) throw new RuntimeException("Found invalid second vector column."); String name = colnames[pos]; colVect = pos; for (int j = 0; j < lenVect; j++) { colnames[pos] = name + "v" + j; fschema[pos++] = ValueType.DOUBLE; } containsVect = true; } else fschema[pos++] = ValueType.STRING; } return colVect; }
From source file:org.eclairjs.nashorn.sql.JSUDF.java
License:Apache License
public Object castValueToReturnType(Object value) { Object ret = value;//www . ja va 2s .co m /* Nashorn interprets numbers as java.lang.Double, java.lang.Long, or java.lang.Integer objects, depending on the computation performed So we need to force the "Number" from Nashorn to the correct type based on the schema. JavaScript parseInt returns a java.lang.Double and that seems to be the only java type we will need to convert. */ if (this.getReturnType() == DataTypes.IntegerType && value instanceof Double) { ret = ((Double) value).intValue(); } else if (this.getReturnType() == DataTypes.FloatType && value instanceof Double) { ret = ((Double) value).floatValue(); } else if (this.getReturnType() == DataTypes.DoubleType && value instanceof Integer) { ret = ((Integer) value).doubleValue(); } else if (this.getReturnType() == DataTypes.FloatType && value instanceof Integer) { ret = ((Integer) value).floatValue(); } else if (this.getReturnType() == DataTypes.TimestampType) { ret = ((SqlTimestamp) value).getJavaObject(); } else if (this.getReturnType() == DataTypes.DateType) { ret = ((SqlDate) value).getJavaObject(); } return ret; }