List of usage examples for org.apache.spark.sql.types DataTypes NullType
DataType NullType
To view the source code for org.apache.spark.sql.types DataTypes NullType.
Click Source Link
From source file:com.getcake.sparkjdbc.SparkJDBCServer.java
License:Apache License
private String loadSingleFileWithMeta(String registerTableName, String fullPathTableName, String metaFileName) throws IOException { DataFrame dynamicDataFrame;/*w w w. j a v a2s. c om*/ long startTime, firstStartTime; float durSeconds, durMinutes; String respMsg; startTime = System.currentTimeMillis(); firstStartTime = startTime; try { dynamicDataFrame = hiveContext.table(registerTableName); respMsg = "table " + registerTableName + " at " + fullPathTableName + " was already loaded"; log(respMsg); return respMsg; } catch (Throwable exc) { // hiveContext.table does not declare that it throws NoSuchTableException, so cannot use it in catch clause and // have to check for it explicitly if (exc instanceof NoSuchTableException) { respMsg = "table " + registerTableName + " at " + fullPathTableName + " was not loaded => load it next"; log(respMsg); } else { throw exc; } } FileInputStream propFileInputStream; propFileInputStream = new FileInputStream(metaFileName); properties = new Properties(); properties.load(propFileInputStream); Stream<Entry<Object, Object>> stream = properties.entrySet().stream(); Map<String, String> options = stream.collect(Collectors.toMap(entry -> String.valueOf(entry.getKey()), entry -> String.valueOf(entry.getValue()))); int numColumns = Integer.parseInt(properties.getProperty("numColumns")); StructField structFields[] = new StructField[numColumns]; String colName, colType; StructField structField; for (int i = 1; i <= numColumns; i++) { colName = properties.getProperty("col" + i + ".name"); colType = properties.getProperty("col" + i + ".type"); switch (colType) { case "TimeStamp": structField = DataTypes.createStructField(colName, DataTypes.TimestampType, true); break; case "Date": structField = DataTypes.createStructField(colName, DataTypes.DateType, true); break; case "Float": structField = DataTypes.createStructField(colName, DataTypes.FloatType, true); break; case "Integer": structField = DataTypes.createStructField(colName, DataTypes.IntegerType, true); break; case "Long": structField = DataTypes.createStructField(colName, DataTypes.LongType, true); break; case "Short": structField = DataTypes.createStructField(colName, DataTypes.ShortType, true); break; case "Double": structField = DataTypes.createStructField(colName, DataTypes.DoubleType, true); break; case "Boolean": structField = DataTypes.createStructField(colName, DataTypes.BooleanType, true); break; case "Binary": structField = DataTypes.createStructField(colName, DataTypes.BinaryType, true); break; case "Byte": structField = DataTypes.createStructField(colName, DataTypes.ByteType, true); break; case "Null": structField = DataTypes.createStructField(colName, DataTypes.NullType, true); break; default: structField = DataTypes.createStructField(colName, DataTypes.StringType, true); } structFields[i - 1] = structField; } // dynamicDataFrame = hiveContext.read().format("com.databricks.spark.csv"). // option("header", Boolean.toString(headerInCSVFileFlag)).option("inferSchema", Boolean.toString(inferSchemaFlag)).load(fullPathTableName); // Map<String, String> options = new HashMap<>(properties); options.put("path", "file:///" + fullPathTableName); // options.put("header", "false"); // options.put("delimiter", ","); // DataType dataType = new DataType (); /* StructField structField1 = DataTypes.createStructField("LogType", DataTypes.StringType, false); StructField structField2 = DataTypes.createStructField("EntryTime", DataTypes.TimestampType, false); StructField structField3 = DataTypes.createStructField("Code_Class", DataTypes.StringType, false); StructField structField4 = DataTypes.createStructField("Code_Method", DataTypes.StringType, false); StructField structField5 = DataTypes.createStructField("Log_Message", DataTypes.StringType, false); structFields[0] = structField1; structFields[1] = structField2; structFields[2] = structField3; structFields[3] = structField4; structFields[4] = structField5; */ StructType schema = new StructType(structFields); dynamicDataFrame = hiveContext.load("com.databricks.spark.csv", schema, options); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("loaded table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); schema = dynamicDataFrame.schema(); structFields = schema.fields(); for (StructField structFieldLocal : structFields) { DataType dataType = structFieldLocal.dataType(); logger.debug(structFieldLocal.name() + " - dataType: " + dataType.typeName()); } startTime = System.currentTimeMillis(); dynamicDataFrame.cache(); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("cache table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); startTime = System.currentTimeMillis(); dynamicDataFrame.registerTempTable(registerTableName); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("registerTempTable table " + registerTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); durSeconds = (float) (System.currentTimeMillis() - firstStartTime) / 1000F; durMinutes = durSeconds / 60F; respMsg = "Completed loading table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes; log(respMsg); return respMsg; }
From source file:com.getcake.sparkjdbc.SparkJDBCServer.java
License:Apache License
private String loadFilesWithMeta(String registerTableName, String fullPathTableName, String metaFileName, String fileListName) throws IOException { DataFrame combinedDynamicDataFrame = null, dynamicDataFrame = null; long startTime, firstStartTime; float durSeconds, durMinutes; String respMsg;// w ww . ja v a2 s. c o m startTime = System.currentTimeMillis(); firstStartTime = startTime; try { combinedDynamicDataFrame = hiveContext.table(registerTableName); respMsg = "table " + registerTableName + " at " + fullPathTableName + " was already loaded"; log(respMsg); return respMsg; } catch (Throwable exc) { // hiveContext.table does not declare that it throws NoSuchTableException, so cannot use it in catch clause and // have to check for it explicitly if (exc instanceof NoSuchTableException) { respMsg = "table " + registerTableName + " at " + fullPathTableName + " was not loaded => load it next"; log(respMsg); } else { throw exc; } } FileInputStream propFileInputStream; propFileInputStream = new FileInputStream(metaFileName); properties = new Properties(); properties.load(propFileInputStream); Stream<Entry<Object, Object>> stream = properties.entrySet().stream(); Map<String, String> options = stream.collect(Collectors.toMap(entry -> String.valueOf(entry.getKey()), entry -> String.valueOf(entry.getValue()))); int numColumns = Integer.parseInt(properties.getProperty("numColumns")); StructField structFields[] = new StructField[numColumns]; String colName, colType; StructField structField; // structField = DataTypes.createStructField("File_Source", DataTypes.StringType, true); // structFields[0] = structField; for (int i = 1; i <= numColumns; i++) { colName = properties.getProperty("col" + i + ".name"); colType = properties.getProperty("col" + i + ".type"); switch (colType) { case "TimeStamp": structField = DataTypes.createStructField(colName, DataTypes.TimestampType, true); break; case "Date": structField = DataTypes.createStructField(colName, DataTypes.DateType, true); break; case "Float": structField = DataTypes.createStructField(colName, DataTypes.FloatType, true); break; case "Integer": structField = DataTypes.createStructField(colName, DataTypes.IntegerType, true); break; case "Long": structField = DataTypes.createStructField(colName, DataTypes.LongType, true); break; case "Short": structField = DataTypes.createStructField(colName, DataTypes.ShortType, true); break; case "Double": structField = DataTypes.createStructField(colName, DataTypes.DoubleType, true); break; case "Boolean": structField = DataTypes.createStructField(colName, DataTypes.BooleanType, true); break; case "Binary": structField = DataTypes.createStructField(colName, DataTypes.BinaryType, true); break; case "Byte": structField = DataTypes.createStructField(colName, DataTypes.ByteType, true); break; case "Null": structField = DataTypes.createStructField(colName, DataTypes.NullType, true); break; default: structField = DataTypes.createStructField(colName, DataTypes.StringType, true); } structFields[i - 1] = structField; } StructType schema = new StructType(structFields); List<String> fileLlist = new ArrayList<>(); try (BufferedReader br = Files.newBufferedReader(Paths.get(fileListName))) { //br returns as stream and convert it into a List fileLlist = br.lines().collect(Collectors.toList()); } catch (IOException e) { e.printStackTrace(); } for (String file : fileLlist) { options.put("path", "file:///" + file); dynamicDataFrame = hiveContext.load("com.databricks.spark.csv", schema, options); if (combinedDynamicDataFrame == null) { combinedDynamicDataFrame = dynamicDataFrame; } else { combinedDynamicDataFrame = combinedDynamicDataFrame.unionAll(dynamicDataFrame); } } durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("loaded table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); schema = combinedDynamicDataFrame.schema(); structFields = schema.fields(); for (StructField structFieldLocal : structFields) { DataType dataType = structFieldLocal.dataType(); logger.debug(structFieldLocal.name() + " - dataType: " + dataType.typeName()); } startTime = System.currentTimeMillis(); combinedDynamicDataFrame.cache(); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("cache table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); startTime = System.currentTimeMillis(); combinedDynamicDataFrame.registerTempTable(registerTableName); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("registerTempTable table " + registerTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); durSeconds = (float) (System.currentTimeMillis() - firstStartTime) / 1000F; durMinutes = durSeconds / 60F; respMsg = "Completed loading table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes; log(respMsg); return respMsg; }
From source file:org.icgc.dcc.release.job.export.util.SchemaGenerator.java
License:Open Source License
private static DataType parseDataType(String dataTypeString) { switch (dataTypeString) { case "StringType": return DataTypes.StringType; case "BinaryType": return DataTypes.BinaryType; case "BooleanType": return DataTypes.BooleanType; case "DateType": return DataTypes.DateType; case "TimestampType": return DataTypes.TimestampType; case "CalendarIntervalType": return DataTypes.CalendarIntervalType; case "DoubleType": return DataTypes.DoubleType; case "ByteType": return DataTypes.ByteType; case "IntegerType": return DataTypes.IntegerType; case "LongType": return DataTypes.LongType; case "ShortType": return DataTypes.ShortType; case "NullType": return DataTypes.NullType; default://from w w w.j a va 2 s. c o m throw new IllegalArgumentException(format("Unknown datatype %s", dataTypeString)); } }
From source file:org.wso2.carbon.analytics.spark.core.util.AnalyticsCommonUtils.java
License:Open Source License
public static DataType getDataType(Type returnType) throws AnalyticsUDFException { DataType udfReturnType = null;//ww w .j a v a 2 s. co m if (returnType == Integer.TYPE || returnType == Integer.class) { udfReturnType = DataTypes.IntegerType; } else if (returnType == Double.TYPE || returnType == Double.class) { udfReturnType = DataTypes.DoubleType; } else if (returnType == Float.TYPE || returnType == Float.class) { udfReturnType = DataTypes.FloatType; } else if (returnType == Long.TYPE || returnType == Long.class) { udfReturnType = DataTypes.LongType; } else if (returnType == Boolean.TYPE || returnType == Boolean.class) { udfReturnType = DataTypes.BooleanType; } else if (returnType == String.class) { udfReturnType = DataTypes.StringType; } else if (returnType == Short.TYPE || returnType == Short.class) { udfReturnType = DataTypes.ShortType; } else if (returnType == NullType.class) { udfReturnType = DataTypes.NullType; } else if (returnType == Byte.TYPE || returnType == Byte.class) { udfReturnType = DataTypes.ByteType; } else if (returnType == byte[].class || returnType == Byte[].class) { udfReturnType = DataTypes.BinaryType; } else if (returnType == Date.class) { udfReturnType = DataTypes.DateType; } else if (returnType == Timestamp.class) { udfReturnType = DataTypes.TimestampType; } else if (returnType == BigDecimal.class) { udfReturnType = DataTypes.createDecimalType(); } else if (returnType instanceof ParameterizedType) { ParameterizedType type = (ParameterizedType) returnType; /*if return type is a List types will contain only 1 element, if return type is Map it will have 2 elements types representing key and the value.*/ Type[] types = type.getActualTypeArguments(); if (types != null && types.length > 0) { switch (types.length) { case 1: { udfReturnType = DataTypes.createArrayType(getDataType(types[0])); break; } case 2: { udfReturnType = DataTypes.createMapType(getDataType(types[0]), getDataType(types[1])); break; } default: throw new AnalyticsUDFException("Cannot Map the return type either to ArrayType or MapType"); } } } else { throw new AnalyticsUDFException("Cannot determine the return DataType: " + returnType.toString()); } return udfReturnType; }