List of usage examples for org.apache.spark.sql.types DataTypes TimestampType
DataType TimestampType
To view the source code for org.apache.spark.sql.types DataTypes TimestampType.
Click Source Link
From source file:KafkaSparkMongo.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaDirectKafkaWordCount <brokers> <topics>\n" + " <brokers> is a list of one or more Kafka brokers\n" + " <topics> is a list of one or more kafka topics to consume from\n\n"); System.exit(1);//from w ww . java 2 s.c o m } String brokers = args[0]; String topics = args[1]; String UriMongo = "mongodb://localhost/streamSparkFinal.coll"; dropDatabase(UriMongo); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount") .set("spark.app.id", "MongoSparkConnectorTour").set("spark.mongodb.input.uri", UriMongo) .set("spark.mongodb.output.uri", UriMongo); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(5)); /** Create a JavaReceiverInputDStream on target ip:port and count the * words in input stream of \n delimited text (eg. generated by 'nc') * Note that no duplication in storage level only for running locally. * Replication necessary in distributed scenario for fault tolerance. */ Set<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(","))); Map<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("metadata.broker.list", brokers); // Create direct kafka stream with brokers and topics JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(ssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet); messages.print(); JavaDStream<String> lines = messages.map(x -> x._2()); JavaDStream<Tuple7<String, String, String, String, String, String, String>> words = lines.map(y -> { String[] wordy = SPACE.split(y); return new Tuple7<>(wordy[0], wordy[1], wordy[2], wordy[3], wordy[4], wordy[5], wordy[6]); }); words.foreachRDD(rdd -> { List<StructField> subFields = new ArrayList<>(); subFields.add(DataTypes.createStructField("X", DataTypes.DoubleType, true)); subFields.add(DataTypes.createStructField("Y", DataTypes.DoubleType, true)); subFields.add(DataTypes.createStructField("z", DataTypes.DoubleType, true)); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("Serial", DataTypes.StringType, true)); fields.add(DataTypes.createStructField("Zone", DataTypes.StringType, true)); fields.add(DataTypes.createStructField("Group", DataTypes.StringType, true)); fields.add(DataTypes.createStructField("coord", DataTypes.createStructType(subFields), true)); fields.add(DataTypes.createStructField("Time", DataTypes.TimestampType, true)); StructType schema = DataTypes.createStructType(fields); SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context().getConf()); JavaRDD<Row> rowRDD = rdd .map(palabra -> RowFactory.create(palabra._1(), palabra._2(), palabra._3(), RowFactory.create(Double.parseDouble(palabra._4()), Double.parseDouble(palabra._5()), Double.parseDouble(palabra._6())), Timestamp.from(Instant.parse(palabra._7())))); Dataset<Row> wordsDataFrame = spark.createDataFrame(rowRDD, schema); wordsDataFrame.show(); MongoSpark.write(wordsDataFrame).option("collection", "pruebaF").mode("append").save(); }); ssc.start(); ssc.awaitTermination(); }
From source file:com.bosscs.spark.commons.utils.CellsUtils.java
License:Apache License
private static DataType getDataType(Object value) { Class cls = value.getClass(); DataType dataType;// w w w .ja v a2s . c o m if (cls.equals(String.class)) { dataType = DataTypes.StringType; } else if (cls.equals(Byte[].class)) { dataType = DataTypes.BinaryType; } else if (cls.equals(Boolean.class)) { dataType = DataTypes.BooleanType; } else if (cls.equals(Timestamp.class)) { dataType = DataTypes.TimestampType; } else if (cls.equals(Double.class)) { dataType = DataTypes.DoubleType; } else if (cls.equals(Float.class)) { dataType = DataTypes.FloatType; } else if (cls.equals(Byte.class)) { dataType = DataTypes.ByteType; } else if (cls.equals(Integer.class)) { dataType = DataTypes.IntegerType; } else if (cls.equals(Long.class)) { dataType = DataTypes.LongType; } else if (cls.equals(Short.class)) { dataType = DataTypes.ShortType; } else if (value instanceof List) { List listValue = (List) value; if (listValue.isEmpty()) { dataType = DataTypes.createArrayType(DataTypes.StringType); } else { dataType = DataTypes.createArrayType(getDataType(listValue.get(0))); } } else if (value instanceof Map) { Map mapValue = (Map) value; if (mapValue.isEmpty()) { dataType = DataTypes.createMapType(DataTypes.StringType, DataTypes.StringType); } else { Map.Entry entry = (Map.Entry) mapValue.entrySet().iterator().next(); dataType = DataTypes.createMapType(getDataType(entry.getKey()), getDataType(entry.getValue())); } } else { dataType = DataTypes.StringType; } return dataType; }
From source file:com.estonteco.spark.frames.conf.factory.creator.impl.AbstractDataFrameCreator.java
protected Object cast(String value, DataType dataType) { if (value == null || dataType == null) { return null; }//from www . j a v a 2s. c o m if (dataType == DataTypes.StringType) { return value; } if (dataType == DataTypes.BooleanType) { return Boolean.valueOf(value); } if (dataType == DataTypes.ByteType) { return Byte.valueOf(value); } if (dataType == DataTypes.DateType) { try { return DateUtils.fromJavaDate(Date.valueOf(value)); } catch (IllegalArgumentException ex) { ex.printStackTrace(); return null; } } if (dataType == DataTypes.DoubleType) { return Double.valueOf(value); } if (dataType == DataTypes.FloatType) { return Float.valueOf(value); } if (dataType == DataTypes.LongType) { return Long.valueOf(value); } if (dataType == DataTypes.ShortType) { return Short.valueOf(value); } if (dataType == DataTypes.TimestampType) { // Throw away extra if more than 9 decimal places int periodIdx = value.indexOf("."); String str = value; if (periodIdx != -1 && str.length() - periodIdx > 9) { str = str.substring(0, periodIdx + 10); } try { return java.sql.Timestamp.valueOf(str); } catch (IllegalArgumentException ex) { ex.printStackTrace(); return null; } } if (dataType == DataTypes.BinaryType) { return value.getBytes(); } return null; }
From source file:com.getcake.sparkjdbc.SparkJDBCServer.java
License:Apache License
private String loadSingleFileWithMeta(String registerTableName, String fullPathTableName, String metaFileName) throws IOException { DataFrame dynamicDataFrame;//from w ww .j a va2s . co m long startTime, firstStartTime; float durSeconds, durMinutes; String respMsg; startTime = System.currentTimeMillis(); firstStartTime = startTime; try { dynamicDataFrame = hiveContext.table(registerTableName); respMsg = "table " + registerTableName + " at " + fullPathTableName + " was already loaded"; log(respMsg); return respMsg; } catch (Throwable exc) { // hiveContext.table does not declare that it throws NoSuchTableException, so cannot use it in catch clause and // have to check for it explicitly if (exc instanceof NoSuchTableException) { respMsg = "table " + registerTableName + " at " + fullPathTableName + " was not loaded => load it next"; log(respMsg); } else { throw exc; } } FileInputStream propFileInputStream; propFileInputStream = new FileInputStream(metaFileName); properties = new Properties(); properties.load(propFileInputStream); Stream<Entry<Object, Object>> stream = properties.entrySet().stream(); Map<String, String> options = stream.collect(Collectors.toMap(entry -> String.valueOf(entry.getKey()), entry -> String.valueOf(entry.getValue()))); int numColumns = Integer.parseInt(properties.getProperty("numColumns")); StructField structFields[] = new StructField[numColumns]; String colName, colType; StructField structField; for (int i = 1; i <= numColumns; i++) { colName = properties.getProperty("col" + i + ".name"); colType = properties.getProperty("col" + i + ".type"); switch (colType) { case "TimeStamp": structField = DataTypes.createStructField(colName, DataTypes.TimestampType, true); break; case "Date": structField = DataTypes.createStructField(colName, DataTypes.DateType, true); break; case "Float": structField = DataTypes.createStructField(colName, DataTypes.FloatType, true); break; case "Integer": structField = DataTypes.createStructField(colName, DataTypes.IntegerType, true); break; case "Long": structField = DataTypes.createStructField(colName, DataTypes.LongType, true); break; case "Short": structField = DataTypes.createStructField(colName, DataTypes.ShortType, true); break; case "Double": structField = DataTypes.createStructField(colName, DataTypes.DoubleType, true); break; case "Boolean": structField = DataTypes.createStructField(colName, DataTypes.BooleanType, true); break; case "Binary": structField = DataTypes.createStructField(colName, DataTypes.BinaryType, true); break; case "Byte": structField = DataTypes.createStructField(colName, DataTypes.ByteType, true); break; case "Null": structField = DataTypes.createStructField(colName, DataTypes.NullType, true); break; default: structField = DataTypes.createStructField(colName, DataTypes.StringType, true); } structFields[i - 1] = structField; } // dynamicDataFrame = hiveContext.read().format("com.databricks.spark.csv"). // option("header", Boolean.toString(headerInCSVFileFlag)).option("inferSchema", Boolean.toString(inferSchemaFlag)).load(fullPathTableName); // Map<String, String> options = new HashMap<>(properties); options.put("path", "file:///" + fullPathTableName); // options.put("header", "false"); // options.put("delimiter", ","); // DataType dataType = new DataType (); /* StructField structField1 = DataTypes.createStructField("LogType", DataTypes.StringType, false); StructField structField2 = DataTypes.createStructField("EntryTime", DataTypes.TimestampType, false); StructField structField3 = DataTypes.createStructField("Code_Class", DataTypes.StringType, false); StructField structField4 = DataTypes.createStructField("Code_Method", DataTypes.StringType, false); StructField structField5 = DataTypes.createStructField("Log_Message", DataTypes.StringType, false); structFields[0] = structField1; structFields[1] = structField2; structFields[2] = structField3; structFields[3] = structField4; structFields[4] = structField5; */ StructType schema = new StructType(structFields); dynamicDataFrame = hiveContext.load("com.databricks.spark.csv", schema, options); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("loaded table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); schema = dynamicDataFrame.schema(); structFields = schema.fields(); for (StructField structFieldLocal : structFields) { DataType dataType = structFieldLocal.dataType(); logger.debug(structFieldLocal.name() + " - dataType: " + dataType.typeName()); } startTime = System.currentTimeMillis(); dynamicDataFrame.cache(); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("cache table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); startTime = System.currentTimeMillis(); dynamicDataFrame.registerTempTable(registerTableName); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("registerTempTable table " + registerTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); durSeconds = (float) (System.currentTimeMillis() - firstStartTime) / 1000F; durMinutes = durSeconds / 60F; respMsg = "Completed loading table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes; log(respMsg); return respMsg; }
From source file:com.getcake.sparkjdbc.SparkJDBCServer.java
License:Apache License
private String loadFilesWithMeta(String registerTableName, String fullPathTableName, String metaFileName, String fileListName) throws IOException { DataFrame combinedDynamicDataFrame = null, dynamicDataFrame = null; long startTime, firstStartTime; float durSeconds, durMinutes; String respMsg;/*from w ww . j a v a 2s .co m*/ startTime = System.currentTimeMillis(); firstStartTime = startTime; try { combinedDynamicDataFrame = hiveContext.table(registerTableName); respMsg = "table " + registerTableName + " at " + fullPathTableName + " was already loaded"; log(respMsg); return respMsg; } catch (Throwable exc) { // hiveContext.table does not declare that it throws NoSuchTableException, so cannot use it in catch clause and // have to check for it explicitly if (exc instanceof NoSuchTableException) { respMsg = "table " + registerTableName + " at " + fullPathTableName + " was not loaded => load it next"; log(respMsg); } else { throw exc; } } FileInputStream propFileInputStream; propFileInputStream = new FileInputStream(metaFileName); properties = new Properties(); properties.load(propFileInputStream); Stream<Entry<Object, Object>> stream = properties.entrySet().stream(); Map<String, String> options = stream.collect(Collectors.toMap(entry -> String.valueOf(entry.getKey()), entry -> String.valueOf(entry.getValue()))); int numColumns = Integer.parseInt(properties.getProperty("numColumns")); StructField structFields[] = new StructField[numColumns]; String colName, colType; StructField structField; // structField = DataTypes.createStructField("File_Source", DataTypes.StringType, true); // structFields[0] = structField; for (int i = 1; i <= numColumns; i++) { colName = properties.getProperty("col" + i + ".name"); colType = properties.getProperty("col" + i + ".type"); switch (colType) { case "TimeStamp": structField = DataTypes.createStructField(colName, DataTypes.TimestampType, true); break; case "Date": structField = DataTypes.createStructField(colName, DataTypes.DateType, true); break; case "Float": structField = DataTypes.createStructField(colName, DataTypes.FloatType, true); break; case "Integer": structField = DataTypes.createStructField(colName, DataTypes.IntegerType, true); break; case "Long": structField = DataTypes.createStructField(colName, DataTypes.LongType, true); break; case "Short": structField = DataTypes.createStructField(colName, DataTypes.ShortType, true); break; case "Double": structField = DataTypes.createStructField(colName, DataTypes.DoubleType, true); break; case "Boolean": structField = DataTypes.createStructField(colName, DataTypes.BooleanType, true); break; case "Binary": structField = DataTypes.createStructField(colName, DataTypes.BinaryType, true); break; case "Byte": structField = DataTypes.createStructField(colName, DataTypes.ByteType, true); break; case "Null": structField = DataTypes.createStructField(colName, DataTypes.NullType, true); break; default: structField = DataTypes.createStructField(colName, DataTypes.StringType, true); } structFields[i - 1] = structField; } StructType schema = new StructType(structFields); List<String> fileLlist = new ArrayList<>(); try (BufferedReader br = Files.newBufferedReader(Paths.get(fileListName))) { //br returns as stream and convert it into a List fileLlist = br.lines().collect(Collectors.toList()); } catch (IOException e) { e.printStackTrace(); } for (String file : fileLlist) { options.put("path", "file:///" + file); dynamicDataFrame = hiveContext.load("com.databricks.spark.csv", schema, options); if (combinedDynamicDataFrame == null) { combinedDynamicDataFrame = dynamicDataFrame; } else { combinedDynamicDataFrame = combinedDynamicDataFrame.unionAll(dynamicDataFrame); } } durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("loaded table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); schema = combinedDynamicDataFrame.schema(); structFields = schema.fields(); for (StructField structFieldLocal : structFields) { DataType dataType = structFieldLocal.dataType(); logger.debug(structFieldLocal.name() + " - dataType: " + dataType.typeName()); } startTime = System.currentTimeMillis(); combinedDynamicDataFrame.cache(); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("cache table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); startTime = System.currentTimeMillis(); combinedDynamicDataFrame.registerTempTable(registerTableName); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("registerTempTable table " + registerTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); durSeconds = (float) (System.currentTimeMillis() - firstStartTime) / 1000F; durMinutes = durSeconds / 60F; respMsg = "Completed loading table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes; log(respMsg); return respMsg; }
From source file:com.github.sadikovi.riff.tree.expression.TimestampExpression.java
License:Open Source License
@Override public DataType dataType() { return DataTypes.TimestampType; }
From source file:com.splicemachine.db.iapi.types.SQLTimestamp.java
License:Apache License
@Override public StructField getStructField(String columnName) { return DataTypes.createStructField(columnName, DataTypes.TimestampType, true); }
From source file:com.thinkbiganalytics.kylo.catalog.spark.sources.AbstractJdbcDataSetProvider.java
License:Apache License
/** * Scans the specified field and updates the specified high water mark. *///w w w . j a v a2 s . c o m @Nonnull @VisibleForTesting T updateHighWaterMark(@Nonnull final T dataSet, @Nonnull final String fieldName, @Nonnull final JdbcHighWaterMark highWaterMark, @Nonnull final KyloCatalogClient<T> client) { // Determine function to convert column to Long final DataType fieldType = schema(dataSet).apply(fieldName).dataType(); final Function1<?, Long> toLong; if (fieldType == DataTypes.DateType) { toLong = new DateToLong(); } else if (fieldType == DataTypes.TimestampType) { toLong = new TimestampToLong(); } else { throw new KyloCatalogException("Unsupported column type for high water mark: " + fieldType); } // Create UDF and apply to field final String accumulableId = (highWaterMark.getName() != null) ? highWaterMark.getName() : UUID.randomUUID().toString(); final Accumulable<JdbcHighWaterMark, Long> accumulable = accumulable(highWaterMark, accumulableId, new JdbcHighWaterMarkAccumulableParam(), client); final JdbcHighWaterMarkVisitor<?> visitor = new JdbcHighWaterMarkVisitor<>(accumulable, toLong); return map(dataSet, fieldName, visitor, fieldType); }
From source file:com.thinkbiganalytics.spark.metadata.SqlTransformStage.java
License:Apache License
/** * Gets the Spark SQL data type for the specified JDBC data type. *///from w w w.java 2 s . c o m @Nonnull @SuppressWarnings("squid:S1479") private DataType getCatalystType(final int sqlType, final int precision, final int scale, final boolean signed) { switch (sqlType) { case Types.BINARY: case Types.BLOB: case Types.LONGVARBINARY: case Types.VARBINARY: return DataTypes.BinaryType; case Types.BIT: case Types.BOOLEAN: return DataTypes.BooleanType; case Types.DATE: return DataTypes.DateType; case Types.DECIMAL: case Types.NUMERIC: if (precision != 0 || scale != 0) { return new DecimalType(precision, scale); } else { return DecimalType.SYSTEM_DEFAULT(); } case Types.DOUBLE: case Types.REAL: return DataTypes.DoubleType; case Types.FLOAT: return DataTypes.FloatType; case Types.SMALLINT: case Types.TINYINT: return DataTypes.IntegerType; case Types.INTEGER: return signed ? DataTypes.IntegerType : DataTypes.LongType; case Types.ROWID: return DataTypes.LongType; case Types.BIGINT: return signed ? DataTypes.LongType : new DecimalType(20, 0); case Types.CHAR: case Types.CLOB: case Types.LONGNVARCHAR: case Types.LONGVARCHAR: case Types.NCHAR: case Types.NCLOB: case Types.NVARCHAR: case Types.REF: case Types.SQLXML: case Types.STRUCT: case Types.VARCHAR: return DataTypes.StringType; case Types.TIME: case Types.TIMESTAMP: return DataTypes.TimestampType; default: log.debug("Unsupported SQL type: {}", sqlType); return DataTypes.StringType; } }
From source file:org.apache.carbondata.converter.SparkDataTypeConverterImpl.java
License:Apache License
private static org.apache.spark.sql.types.DataType convertCarbonToSparkDataType(DataType carbonDataType) { if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.STRING) { return DataTypes.StringType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.SHORT) { return DataTypes.ShortType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.INT) { return DataTypes.IntegerType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.LONG) { return DataTypes.LongType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.DOUBLE) { return DataTypes.DoubleType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.BOOLEAN) { return DataTypes.BooleanType; } else if (org.apache.carbondata.core.metadata.datatype.DataTypes.isDecimal(carbonDataType)) { return DataTypes.createDecimalType(); } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.TIMESTAMP) { return DataTypes.TimestampType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.DATE) { return DataTypes.DateType; } else {/*w w w .java 2 s.co m*/ return null; } }