List of usage examples for org.apache.spark.sql.types DataTypes ShortType
DataType ShortType
To view the source code for org.apache.spark.sql.types DataTypes ShortType.
Click Source Link
From source file:com.bosscs.spark.commons.utils.CellsUtils.java
License:Apache License
private static DataType getDataType(Object value) { Class cls = value.getClass(); DataType dataType;//from w w w .j a v a 2s . c o m if (cls.equals(String.class)) { dataType = DataTypes.StringType; } else if (cls.equals(Byte[].class)) { dataType = DataTypes.BinaryType; } else if (cls.equals(Boolean.class)) { dataType = DataTypes.BooleanType; } else if (cls.equals(Timestamp.class)) { dataType = DataTypes.TimestampType; } else if (cls.equals(Double.class)) { dataType = DataTypes.DoubleType; } else if (cls.equals(Float.class)) { dataType = DataTypes.FloatType; } else if (cls.equals(Byte.class)) { dataType = DataTypes.ByteType; } else if (cls.equals(Integer.class)) { dataType = DataTypes.IntegerType; } else if (cls.equals(Long.class)) { dataType = DataTypes.LongType; } else if (cls.equals(Short.class)) { dataType = DataTypes.ShortType; } else if (value instanceof List) { List listValue = (List) value; if (listValue.isEmpty()) { dataType = DataTypes.createArrayType(DataTypes.StringType); } else { dataType = DataTypes.createArrayType(getDataType(listValue.get(0))); } } else if (value instanceof Map) { Map mapValue = (Map) value; if (mapValue.isEmpty()) { dataType = DataTypes.createMapType(DataTypes.StringType, DataTypes.StringType); } else { Map.Entry entry = (Map.Entry) mapValue.entrySet().iterator().next(); dataType = DataTypes.createMapType(getDataType(entry.getKey()), getDataType(entry.getValue())); } } else { dataType = DataTypes.StringType; } return dataType; }
From source file:com.estonteco.spark.frames.conf.factory.creator.impl.AbstractDataFrameCreator.java
protected Object cast(String value, DataType dataType) { if (value == null || dataType == null) { return null; }//from ww w .j a v a2 s. c o m if (dataType == DataTypes.StringType) { return value; } if (dataType == DataTypes.BooleanType) { return Boolean.valueOf(value); } if (dataType == DataTypes.ByteType) { return Byte.valueOf(value); } if (dataType == DataTypes.DateType) { try { return DateUtils.fromJavaDate(Date.valueOf(value)); } catch (IllegalArgumentException ex) { ex.printStackTrace(); return null; } } if (dataType == DataTypes.DoubleType) { return Double.valueOf(value); } if (dataType == DataTypes.FloatType) { return Float.valueOf(value); } if (dataType == DataTypes.LongType) { return Long.valueOf(value); } if (dataType == DataTypes.ShortType) { return Short.valueOf(value); } if (dataType == DataTypes.TimestampType) { // Throw away extra if more than 9 decimal places int periodIdx = value.indexOf("."); String str = value; if (periodIdx != -1 && str.length() - periodIdx > 9) { str = str.substring(0, periodIdx + 10); } try { return java.sql.Timestamp.valueOf(str); } catch (IllegalArgumentException ex) { ex.printStackTrace(); return null; } } if (dataType == DataTypes.BinaryType) { return value.getBytes(); } return null; }
From source file:com.getcake.sparkjdbc.SparkJDBCServer.java
License:Apache License
private String loadSingleFileWithMeta(String registerTableName, String fullPathTableName, String metaFileName) throws IOException { DataFrame dynamicDataFrame;/*from w w w. j a v a 2s .c o m*/ long startTime, firstStartTime; float durSeconds, durMinutes; String respMsg; startTime = System.currentTimeMillis(); firstStartTime = startTime; try { dynamicDataFrame = hiveContext.table(registerTableName); respMsg = "table " + registerTableName + " at " + fullPathTableName + " was already loaded"; log(respMsg); return respMsg; } catch (Throwable exc) { // hiveContext.table does not declare that it throws NoSuchTableException, so cannot use it in catch clause and // have to check for it explicitly if (exc instanceof NoSuchTableException) { respMsg = "table " + registerTableName + " at " + fullPathTableName + " was not loaded => load it next"; log(respMsg); } else { throw exc; } } FileInputStream propFileInputStream; propFileInputStream = new FileInputStream(metaFileName); properties = new Properties(); properties.load(propFileInputStream); Stream<Entry<Object, Object>> stream = properties.entrySet().stream(); Map<String, String> options = stream.collect(Collectors.toMap(entry -> String.valueOf(entry.getKey()), entry -> String.valueOf(entry.getValue()))); int numColumns = Integer.parseInt(properties.getProperty("numColumns")); StructField structFields[] = new StructField[numColumns]; String colName, colType; StructField structField; for (int i = 1; i <= numColumns; i++) { colName = properties.getProperty("col" + i + ".name"); colType = properties.getProperty("col" + i + ".type"); switch (colType) { case "TimeStamp": structField = DataTypes.createStructField(colName, DataTypes.TimestampType, true); break; case "Date": structField = DataTypes.createStructField(colName, DataTypes.DateType, true); break; case "Float": structField = DataTypes.createStructField(colName, DataTypes.FloatType, true); break; case "Integer": structField = DataTypes.createStructField(colName, DataTypes.IntegerType, true); break; case "Long": structField = DataTypes.createStructField(colName, DataTypes.LongType, true); break; case "Short": structField = DataTypes.createStructField(colName, DataTypes.ShortType, true); break; case "Double": structField = DataTypes.createStructField(colName, DataTypes.DoubleType, true); break; case "Boolean": structField = DataTypes.createStructField(colName, DataTypes.BooleanType, true); break; case "Binary": structField = DataTypes.createStructField(colName, DataTypes.BinaryType, true); break; case "Byte": structField = DataTypes.createStructField(colName, DataTypes.ByteType, true); break; case "Null": structField = DataTypes.createStructField(colName, DataTypes.NullType, true); break; default: structField = DataTypes.createStructField(colName, DataTypes.StringType, true); } structFields[i - 1] = structField; } // dynamicDataFrame = hiveContext.read().format("com.databricks.spark.csv"). // option("header", Boolean.toString(headerInCSVFileFlag)).option("inferSchema", Boolean.toString(inferSchemaFlag)).load(fullPathTableName); // Map<String, String> options = new HashMap<>(properties); options.put("path", "file:///" + fullPathTableName); // options.put("header", "false"); // options.put("delimiter", ","); // DataType dataType = new DataType (); /* StructField structField1 = DataTypes.createStructField("LogType", DataTypes.StringType, false); StructField structField2 = DataTypes.createStructField("EntryTime", DataTypes.TimestampType, false); StructField structField3 = DataTypes.createStructField("Code_Class", DataTypes.StringType, false); StructField structField4 = DataTypes.createStructField("Code_Method", DataTypes.StringType, false); StructField structField5 = DataTypes.createStructField("Log_Message", DataTypes.StringType, false); structFields[0] = structField1; structFields[1] = structField2; structFields[2] = structField3; structFields[3] = structField4; structFields[4] = structField5; */ StructType schema = new StructType(structFields); dynamicDataFrame = hiveContext.load("com.databricks.spark.csv", schema, options); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("loaded table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); schema = dynamicDataFrame.schema(); structFields = schema.fields(); for (StructField structFieldLocal : structFields) { DataType dataType = structFieldLocal.dataType(); logger.debug(structFieldLocal.name() + " - dataType: " + dataType.typeName()); } startTime = System.currentTimeMillis(); dynamicDataFrame.cache(); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("cache table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); startTime = System.currentTimeMillis(); dynamicDataFrame.registerTempTable(registerTableName); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("registerTempTable table " + registerTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); durSeconds = (float) (System.currentTimeMillis() - firstStartTime) / 1000F; durMinutes = durSeconds / 60F; respMsg = "Completed loading table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes; log(respMsg); return respMsg; }
From source file:com.getcake.sparkjdbc.SparkJDBCServer.java
License:Apache License
private String loadFilesWithMeta(String registerTableName, String fullPathTableName, String metaFileName, String fileListName) throws IOException { DataFrame combinedDynamicDataFrame = null, dynamicDataFrame = null; long startTime, firstStartTime; float durSeconds, durMinutes; String respMsg;//from w w w. j a v a 2 s. c o m startTime = System.currentTimeMillis(); firstStartTime = startTime; try { combinedDynamicDataFrame = hiveContext.table(registerTableName); respMsg = "table " + registerTableName + " at " + fullPathTableName + " was already loaded"; log(respMsg); return respMsg; } catch (Throwable exc) { // hiveContext.table does not declare that it throws NoSuchTableException, so cannot use it in catch clause and // have to check for it explicitly if (exc instanceof NoSuchTableException) { respMsg = "table " + registerTableName + " at " + fullPathTableName + " was not loaded => load it next"; log(respMsg); } else { throw exc; } } FileInputStream propFileInputStream; propFileInputStream = new FileInputStream(metaFileName); properties = new Properties(); properties.load(propFileInputStream); Stream<Entry<Object, Object>> stream = properties.entrySet().stream(); Map<String, String> options = stream.collect(Collectors.toMap(entry -> String.valueOf(entry.getKey()), entry -> String.valueOf(entry.getValue()))); int numColumns = Integer.parseInt(properties.getProperty("numColumns")); StructField structFields[] = new StructField[numColumns]; String colName, colType; StructField structField; // structField = DataTypes.createStructField("File_Source", DataTypes.StringType, true); // structFields[0] = structField; for (int i = 1; i <= numColumns; i++) { colName = properties.getProperty("col" + i + ".name"); colType = properties.getProperty("col" + i + ".type"); switch (colType) { case "TimeStamp": structField = DataTypes.createStructField(colName, DataTypes.TimestampType, true); break; case "Date": structField = DataTypes.createStructField(colName, DataTypes.DateType, true); break; case "Float": structField = DataTypes.createStructField(colName, DataTypes.FloatType, true); break; case "Integer": structField = DataTypes.createStructField(colName, DataTypes.IntegerType, true); break; case "Long": structField = DataTypes.createStructField(colName, DataTypes.LongType, true); break; case "Short": structField = DataTypes.createStructField(colName, DataTypes.ShortType, true); break; case "Double": structField = DataTypes.createStructField(colName, DataTypes.DoubleType, true); break; case "Boolean": structField = DataTypes.createStructField(colName, DataTypes.BooleanType, true); break; case "Binary": structField = DataTypes.createStructField(colName, DataTypes.BinaryType, true); break; case "Byte": structField = DataTypes.createStructField(colName, DataTypes.ByteType, true); break; case "Null": structField = DataTypes.createStructField(colName, DataTypes.NullType, true); break; default: structField = DataTypes.createStructField(colName, DataTypes.StringType, true); } structFields[i - 1] = structField; } StructType schema = new StructType(structFields); List<String> fileLlist = new ArrayList<>(); try (BufferedReader br = Files.newBufferedReader(Paths.get(fileListName))) { //br returns as stream and convert it into a List fileLlist = br.lines().collect(Collectors.toList()); } catch (IOException e) { e.printStackTrace(); } for (String file : fileLlist) { options.put("path", "file:///" + file); dynamicDataFrame = hiveContext.load("com.databricks.spark.csv", schema, options); if (combinedDynamicDataFrame == null) { combinedDynamicDataFrame = dynamicDataFrame; } else { combinedDynamicDataFrame = combinedDynamicDataFrame.unionAll(dynamicDataFrame); } } durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("loaded table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); schema = combinedDynamicDataFrame.schema(); structFields = schema.fields(); for (StructField structFieldLocal : structFields) { DataType dataType = structFieldLocal.dataType(); logger.debug(structFieldLocal.name() + " - dataType: " + dataType.typeName()); } startTime = System.currentTimeMillis(); combinedDynamicDataFrame.cache(); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("cache table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); startTime = System.currentTimeMillis(); combinedDynamicDataFrame.registerTempTable(registerTableName); durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F; durMinutes = durSeconds / 60F; log("registerTempTable table " + registerTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes); durSeconds = (float) (System.currentTimeMillis() - firstStartTime) / 1000F; durMinutes = durSeconds / 60F; respMsg = "Completed loading table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes; log(respMsg); return respMsg; }
From source file:com.github.sadikovi.riff.tree.expression.ShortExpression.java
License:Open Source License
@Override public DataType dataType() { return DataTypes.ShortType; }
From source file:com.splicemachine.db.iapi.types.SQLSmallint.java
License:Apache License
@Override public StructField getStructField(String columnName) { return DataTypes.createStructField(columnName, DataTypes.ShortType, true); }
From source file:gaffer.accumulostore.operation.spark.handler.AccumuloStoreRelation.java
License:Apache License
private static DataType getType(final String className) { switch (className) { case "java.lang.String": return DataTypes.StringType; case "java.lang.Integer": return DataTypes.IntegerType; case "java.lang.Long": return DataTypes.LongType; case "java.lang.Boolean": return DataTypes.BooleanType; case "java.lang.Double": return DataTypes.DoubleType; case "java.lang.Float": return DataTypes.FloatType; case "java.lang.Byte": return DataTypes.ByteType; case "java.lang.Short": return DataTypes.ShortType; default://from ww w .j a v a2 s .com return null; } }
From source file:org.apache.carbondata.converter.SparkDataTypeConverterImpl.java
License:Apache License
private static org.apache.spark.sql.types.DataType convertCarbonToSparkDataType(DataType carbonDataType) { if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.STRING) { return DataTypes.StringType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.SHORT) { return DataTypes.ShortType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.INT) { return DataTypes.IntegerType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.LONG) { return DataTypes.LongType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.DOUBLE) { return DataTypes.DoubleType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.BOOLEAN) { return DataTypes.BooleanType; } else if (org.apache.carbondata.core.metadata.datatype.DataTypes.isDecimal(carbonDataType)) { return DataTypes.createDecimalType(); } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.TIMESTAMP) { return DataTypes.TimestampType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.DATE) { return DataTypes.DateType; } else {/*from w w w . j a v a2 s. co m*/ return null; } }
From source file:org.apache.carbondata.spark.util.Util.java
License:Apache License
public static org.apache.spark.sql.types.DataType convertCarbonToSparkDataType(DataType carbonDataType) { if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.STRING) { return DataTypes.StringType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.SHORT) { return DataTypes.ShortType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.INT) { return DataTypes.IntegerType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.LONG) { return DataTypes.LongType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.DOUBLE) { return DataTypes.DoubleType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.BOOLEAN) { return DataTypes.BooleanType; } else if (org.apache.carbondata.core.metadata.datatype.DataTypes.isDecimal(carbonDataType)) { return DataTypes.createDecimalType(); } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.TIMESTAMP) { return DataTypes.TimestampType; } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.DATE) { return DataTypes.DateType; } else {//from ww w . j a v a 2 s.c om return null; } }
From source file:org.icgc.dcc.release.job.export.util.SchemaGenerator.java
License:Open Source License
private static DataType parseDataType(String dataTypeString) { switch (dataTypeString) { case "StringType": return DataTypes.StringType; case "BinaryType": return DataTypes.BinaryType; case "BooleanType": return DataTypes.BooleanType; case "DateType": return DataTypes.DateType; case "TimestampType": return DataTypes.TimestampType; case "CalendarIntervalType": return DataTypes.CalendarIntervalType; case "DoubleType": return DataTypes.DoubleType; case "ByteType": return DataTypes.ByteType; case "IntegerType": return DataTypes.IntegerType; case "LongType": return DataTypes.LongType; case "ShortType": return DataTypes.ShortType; case "NullType": return DataTypes.NullType; default://from w w w .j a v a 2s. c o m throw new IllegalArgumentException(format("Unknown datatype %s", dataTypeString)); } }