Example usage for org.apache.spark.sql.types DataTypes BooleanType

List of usage examples for org.apache.spark.sql.types DataTypes BooleanType

Introduction

In this page you can find the example usage for org.apache.spark.sql.types DataTypes BooleanType.

Prototype

DataType BooleanType

To view the source code for org.apache.spark.sql.types DataTypes BooleanType.

Click Source Link

Document

Gets the BooleanType object.

Usage

From source file:com.bosscs.spark.commons.utils.CellsUtils.java

License:Apache License

private static DataType getDataType(Object value) {
    Class cls = value.getClass();
    DataType dataType;//from  w  w w . ja  v  a 2s .c om
    if (cls.equals(String.class)) {
        dataType = DataTypes.StringType;
    } else if (cls.equals(Byte[].class)) {
        dataType = DataTypes.BinaryType;
    } else if (cls.equals(Boolean.class)) {
        dataType = DataTypes.BooleanType;
    } else if (cls.equals(Timestamp.class)) {
        dataType = DataTypes.TimestampType;
    } else if (cls.equals(Double.class)) {
        dataType = DataTypes.DoubleType;
    } else if (cls.equals(Float.class)) {
        dataType = DataTypes.FloatType;
    } else if (cls.equals(Byte.class)) {
        dataType = DataTypes.ByteType;
    } else if (cls.equals(Integer.class)) {
        dataType = DataTypes.IntegerType;
    } else if (cls.equals(Long.class)) {
        dataType = DataTypes.LongType;
    } else if (cls.equals(Short.class)) {
        dataType = DataTypes.ShortType;
    } else if (value instanceof List) {
        List listValue = (List) value;
        if (listValue.isEmpty()) {
            dataType = DataTypes.createArrayType(DataTypes.StringType);
        } else {
            dataType = DataTypes.createArrayType(getDataType(listValue.get(0)));
        }
    } else if (value instanceof Map) {
        Map mapValue = (Map) value;
        if (mapValue.isEmpty()) {
            dataType = DataTypes.createMapType(DataTypes.StringType, DataTypes.StringType);
        } else {
            Map.Entry entry = (Map.Entry) mapValue.entrySet().iterator().next();
            dataType = DataTypes.createMapType(getDataType(entry.getKey()), getDataType(entry.getValue()));
        }
    } else {
        dataType = DataTypes.StringType;
    }
    return dataType;
}

From source file:com.estonteco.spark.frames.conf.factory.creator.impl.AbstractDataFrameCreator.java

protected Object cast(String value, DataType dataType) {
    if (value == null || dataType == null) {
        return null;
    }// w w w . j  a va 2 s  .  c  o  m
    if (dataType == DataTypes.StringType) {
        return value;
    }
    if (dataType == DataTypes.BooleanType) {
        return Boolean.valueOf(value);
    }
    if (dataType == DataTypes.ByteType) {
        return Byte.valueOf(value);
    }
    if (dataType == DataTypes.DateType) {
        try {
            return DateUtils.fromJavaDate(Date.valueOf(value));
        } catch (IllegalArgumentException ex) {
            ex.printStackTrace();
            return null;
        }
    }
    if (dataType == DataTypes.DoubleType) {
        return Double.valueOf(value);
    }
    if (dataType == DataTypes.FloatType) {
        return Float.valueOf(value);
    }
    if (dataType == DataTypes.LongType) {
        return Long.valueOf(value);
    }
    if (dataType == DataTypes.ShortType) {
        return Short.valueOf(value);
    }
    if (dataType == DataTypes.TimestampType) {
        // Throw away extra if more than 9 decimal places
        int periodIdx = value.indexOf(".");
        String str = value;
        if (periodIdx != -1 && str.length() - periodIdx > 9) {
            str = str.substring(0, periodIdx + 10);
        }
        try {
            return java.sql.Timestamp.valueOf(str);
        } catch (IllegalArgumentException ex) {
            ex.printStackTrace();
            return null;
        }
    }
    if (dataType == DataTypes.BinaryType) {
        return value.getBytes();
    }
    return null;
}

From source file:com.getcake.sparkjdbc.SparkJDBCServer.java

License:Apache License

private String loadSingleFileWithMeta(String registerTableName, String fullPathTableName, String metaFileName)
        throws IOException {
    DataFrame dynamicDataFrame;/*from ww  w.ja v  a  2  s . c o m*/
    long startTime, firstStartTime;
    float durSeconds, durMinutes;
    String respMsg;

    startTime = System.currentTimeMillis();
    firstStartTime = startTime;
    try {
        dynamicDataFrame = hiveContext.table(registerTableName);
        respMsg = "table " + registerTableName + " at " + fullPathTableName + " was already loaded";
        log(respMsg);
        return respMsg;
    } catch (Throwable exc) {
        // hiveContext.table does not declare that it throws NoSuchTableException, so cannot use it in catch clause and
        // have to check for it explicitly
        if (exc instanceof NoSuchTableException) {
            respMsg = "table " + registerTableName + " at " + fullPathTableName
                    + " was not loaded => load it next";
            log(respMsg);
        } else {
            throw exc;
        }
    }

    FileInputStream propFileInputStream;
    propFileInputStream = new FileInputStream(metaFileName);
    properties = new Properties();
    properties.load(propFileInputStream);

    Stream<Entry<Object, Object>> stream = properties.entrySet().stream();
    Map<String, String> options = stream.collect(Collectors.toMap(entry -> String.valueOf(entry.getKey()),
            entry -> String.valueOf(entry.getValue())));

    int numColumns = Integer.parseInt(properties.getProperty("numColumns"));
    StructField structFields[] = new StructField[numColumns];
    String colName, colType;
    StructField structField;

    for (int i = 1; i <= numColumns; i++) {
        colName = properties.getProperty("col" + i + ".name");
        colType = properties.getProperty("col" + i + ".type");
        switch (colType) {
        case "TimeStamp":
            structField = DataTypes.createStructField(colName, DataTypes.TimestampType, true);
            break;

        case "Date":
            structField = DataTypes.createStructField(colName, DataTypes.DateType, true);
            break;

        case "Float":
            structField = DataTypes.createStructField(colName, DataTypes.FloatType, true);
            break;

        case "Integer":
            structField = DataTypes.createStructField(colName, DataTypes.IntegerType, true);
            break;

        case "Long":
            structField = DataTypes.createStructField(colName, DataTypes.LongType, true);
            break;

        case "Short":
            structField = DataTypes.createStructField(colName, DataTypes.ShortType, true);
            break;

        case "Double":
            structField = DataTypes.createStructField(colName, DataTypes.DoubleType, true);
            break;

        case "Boolean":
            structField = DataTypes.createStructField(colName, DataTypes.BooleanType, true);
            break;

        case "Binary":
            structField = DataTypes.createStructField(colName, DataTypes.BinaryType, true);
            break;

        case "Byte":
            structField = DataTypes.createStructField(colName, DataTypes.ByteType, true);
            break;

        case "Null":
            structField = DataTypes.createStructField(colName, DataTypes.NullType, true);
            break;

        default:
            structField = DataTypes.createStructField(colName, DataTypes.StringType, true);
        }

        structFields[i - 1] = structField;
    }

    // dynamicDataFrame = hiveContext.read().format("com.databricks.spark.csv").
    //   option("header", Boolean.toString(headerInCSVFileFlag)).option("inferSchema", Boolean.toString(inferSchemaFlag)).load(fullPathTableName);
    // Map<String, String> options = new HashMap<>(properties);
    options.put("path", "file:///" + fullPathTableName);
    // options.put("header", "false");
    // options.put("delimiter", ",");

    // DataType dataType = new DataType ();
    /*
    StructField structField1 = DataTypes.createStructField("LogType", DataTypes.StringType, false);
    StructField structField2 = DataTypes.createStructField("EntryTime", DataTypes.TimestampType, false);
    StructField structField3 = DataTypes.createStructField("Code_Class", DataTypes.StringType, false);
    StructField structField4 = DataTypes.createStructField("Code_Method", DataTypes.StringType, false);
    StructField structField5 = DataTypes.createStructField("Log_Message", DataTypes.StringType, false);
    structFields[0] = structField1;
    structFields[1] = structField2;
    structFields[2] = structField3;
    structFields[3] = structField4;
    structFields[4] = structField5;
    */

    StructType schema = new StructType(structFields);

    dynamicDataFrame = hiveContext.load("com.databricks.spark.csv", schema, options);

    durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F;
    durMinutes = durSeconds / 60F;
    log("loaded table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes);

    schema = dynamicDataFrame.schema();
    structFields = schema.fields();
    for (StructField structFieldLocal : structFields) {
        DataType dataType = structFieldLocal.dataType();
        logger.debug(structFieldLocal.name() + " - dataType: " + dataType.typeName());
    }

    startTime = System.currentTimeMillis();
    dynamicDataFrame.cache();
    durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F;
    durMinutes = durSeconds / 60F;
    log("cache table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes);

    startTime = System.currentTimeMillis();
    dynamicDataFrame.registerTempTable(registerTableName);

    durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F;
    durMinutes = durSeconds / 60F;
    log("registerTempTable table " + registerTableName + " in seconds: " + durSeconds + " / in minutes: "
            + durMinutes);

    durSeconds = (float) (System.currentTimeMillis() - firstStartTime) / 1000F;
    durMinutes = durSeconds / 60F;
    respMsg = "Completed loading table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: "
            + durMinutes;
    log(respMsg);
    return respMsg;
}

From source file:com.getcake.sparkjdbc.SparkJDBCServer.java

License:Apache License

private String loadFilesWithMeta(String registerTableName, String fullPathTableName, String metaFileName,
        String fileListName) throws IOException {
    DataFrame combinedDynamicDataFrame = null, dynamicDataFrame = null;
    long startTime, firstStartTime;
    float durSeconds, durMinutes;
    String respMsg;/*from ww  w. ja  v a 2s  .c  o  m*/

    startTime = System.currentTimeMillis();
    firstStartTime = startTime;
    try {
        combinedDynamicDataFrame = hiveContext.table(registerTableName);
        respMsg = "table " + registerTableName + " at " + fullPathTableName + " was already loaded";
        log(respMsg);
        return respMsg;
    } catch (Throwable exc) {
        // hiveContext.table does not declare that it throws NoSuchTableException, so cannot use it in catch clause and
        // have to check for it explicitly
        if (exc instanceof NoSuchTableException) {
            respMsg = "table " + registerTableName + " at " + fullPathTableName
                    + " was not loaded => load it next";
            log(respMsg);
        } else {
            throw exc;
        }
    }

    FileInputStream propFileInputStream;
    propFileInputStream = new FileInputStream(metaFileName);
    properties = new Properties();
    properties.load(propFileInputStream);

    Stream<Entry<Object, Object>> stream = properties.entrySet().stream();
    Map<String, String> options = stream.collect(Collectors.toMap(entry -> String.valueOf(entry.getKey()),
            entry -> String.valueOf(entry.getValue())));

    int numColumns = Integer.parseInt(properties.getProperty("numColumns"));
    StructField structFields[] = new StructField[numColumns];
    String colName, colType;
    StructField structField;

    // structField = DataTypes.createStructField("File_Source", DataTypes.StringType, true);
    // structFields[0] = structField;

    for (int i = 1; i <= numColumns; i++) {
        colName = properties.getProperty("col" + i + ".name");
        colType = properties.getProperty("col" + i + ".type");
        switch (colType) {
        case "TimeStamp":
            structField = DataTypes.createStructField(colName, DataTypes.TimestampType, true);
            break;

        case "Date":
            structField = DataTypes.createStructField(colName, DataTypes.DateType, true);
            break;

        case "Float":
            structField = DataTypes.createStructField(colName, DataTypes.FloatType, true);
            break;

        case "Integer":
            structField = DataTypes.createStructField(colName, DataTypes.IntegerType, true);
            break;

        case "Long":
            structField = DataTypes.createStructField(colName, DataTypes.LongType, true);
            break;

        case "Short":
            structField = DataTypes.createStructField(colName, DataTypes.ShortType, true);
            break;

        case "Double":
            structField = DataTypes.createStructField(colName, DataTypes.DoubleType, true);
            break;

        case "Boolean":
            structField = DataTypes.createStructField(colName, DataTypes.BooleanType, true);
            break;

        case "Binary":
            structField = DataTypes.createStructField(colName, DataTypes.BinaryType, true);
            break;

        case "Byte":
            structField = DataTypes.createStructField(colName, DataTypes.ByteType, true);
            break;

        case "Null":
            structField = DataTypes.createStructField(colName, DataTypes.NullType, true);
            break;

        default:
            structField = DataTypes.createStructField(colName, DataTypes.StringType, true);
        }

        structFields[i - 1] = structField;
    }

    StructType schema = new StructType(structFields);

    List<String> fileLlist = new ArrayList<>();
    try (BufferedReader br = Files.newBufferedReader(Paths.get(fileListName))) {

        //br returns as stream and convert it into a List
        fileLlist = br.lines().collect(Collectors.toList());

    } catch (IOException e) {
        e.printStackTrace();
    }

    for (String file : fileLlist) {
        options.put("path", "file:///" + file);
        dynamicDataFrame = hiveContext.load("com.databricks.spark.csv", schema, options);
        if (combinedDynamicDataFrame == null) {
            combinedDynamicDataFrame = dynamicDataFrame;
        } else {
            combinedDynamicDataFrame = combinedDynamicDataFrame.unionAll(dynamicDataFrame);
        }
    }

    durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F;
    durMinutes = durSeconds / 60F;
    log("loaded table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes);

    schema = combinedDynamicDataFrame.schema();
    structFields = schema.fields();
    for (StructField structFieldLocal : structFields) {
        DataType dataType = structFieldLocal.dataType();
        logger.debug(structFieldLocal.name() + " - dataType: " + dataType.typeName());
    }

    startTime = System.currentTimeMillis();
    combinedDynamicDataFrame.cache();
    durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F;
    durMinutes = durSeconds / 60F;
    log("cache table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: " + durMinutes);

    startTime = System.currentTimeMillis();
    combinedDynamicDataFrame.registerTempTable(registerTableName);

    durSeconds = (float) (System.currentTimeMillis() - startTime) / 1000F;
    durMinutes = durSeconds / 60F;
    log("registerTempTable table " + registerTableName + " in seconds: " + durSeconds + " / in minutes: "
            + durMinutes);

    durSeconds = (float) (System.currentTimeMillis() - firstStartTime) / 1000F;
    durMinutes = durSeconds / 60F;
    respMsg = "Completed loading table " + fullPathTableName + " in seconds: " + durSeconds + " / in minutes: "
            + durMinutes;
    log(respMsg);
    return respMsg;
}

From source file:com.github.sadikovi.riff.tree.expression.BooleanExpression.java

License:Open Source License

@Override
public DataType dataType() {
    return DataTypes.BooleanType;
}

From source file:com.ryft.spark.connector.examples.DataFrameExampleJ.java

License:BSD License

public static void main(String[] args) {
    final SparkConf sparkConf = new SparkConf().setAppName("SimplePairRDDExampleJ").setMaster("local[2]");

    final SparkContext sc = new SparkContext(sparkConf);
    final SQLContext sqlContext = new SQLContext(sc);

    final List<StructField> index = Arrays.asList(
            DataTypes.createStructField("file", DataTypes.IntegerType, true),
            DataTypes.createStructField("offset", DataTypes.IntegerType, true),
            DataTypes.createStructField("length", DataTypes.IntegerType, true),
            DataTypes.createStructField("fuzziness", DataTypes.IntegerType, true));

    final StructType schema = DataTypes
            .createStructType(Arrays.asList(DataTypes.createStructField("Arrest", DataTypes.StringType, true),
                    DataTypes.createStructField("Beat", DataTypes.IntegerType, true),
                    DataTypes.createStructField("Block", DataTypes.StringType, true),
                    DataTypes.createStructField("CaseNumber", DataTypes.StringType, true),
                    DataTypes.createStructField("CommunityArea", DataTypes.IntegerType, true),
                    DataTypes.createStructField("Date", DataTypes.StringType, true),
                    DataTypes.createStructField("Description", DataTypes.StringType, true),
                    DataTypes.createStructField("Domestic", DataTypes.IntegerType, true),
                    DataTypes.createStructField("FBICode", DataTypes.BooleanType, true),
                    DataTypes.createStructField("ID", DataTypes.StringType, true),
                    DataTypes.createStructField("IUCR", DataTypes.IntegerType, true),
                    DataTypes.createStructField("Latitude", DataTypes.DoubleType, true),
                    DataTypes.createStructField("Location", DataTypes.StringType, true),
                    DataTypes.createStructField("LocationDescription", DataTypes.StringType, true),
                    DataTypes.createStructField("Longitude", DataTypes.DoubleType, true),
                    DataTypes.createStructField("PrimaryType", DataTypes.StringType, true),
                    DataTypes.createStructField("UpdatedOn", DataTypes.StringType, true),
                    DataTypes.createStructField("Ward", DataTypes.IntegerType, true),
                    DataTypes.createStructField("XCoordinate", DataTypes.IntegerType, true),
                    DataTypes.createStructField("YCoordinate", DataTypes.IntegerType, true),
                    DataTypes.createStructField("Year", DataTypes.IntegerType, true),
                    DataTypes.createStructField("_index", DataTypes.createStructType(index), true)));

    final DataFrame crimes = sqlContext.read().format("com.ryft.spark.connector.sql").schema(schema)
            .option("files", "*.pcrime").load();

    crimes.registerTempTable("crimes");

    final DataFrame df = sqlContext
            .sql("" + "select Date, ID, Description, Arrest from crimes" + " where Description LIKE '%VEHICLE%'"
                    + " AND (Date LIKE '%04/15/2015%' OR Date LIKE '%04/14/2015%' OR Date LIKE '%04/13/2015%')"
                    + " ORDER BY Date");

    final Integer result = df.collect().length;
    logger.info("Result count: {}", result);
}

From source file:com.splicemachine.db.iapi.types.SQLBoolean.java

License:Apache License

@Override
public StructField getStructField(String columnName) {
    return DataTypes.createStructField(columnName, DataTypes.BooleanType, true);
}

From source file:com.thinkbiganalytics.spark.metadata.SqlTransformStage.java

License:Apache License

/**
 * Gets the Spark SQL data type for the specified JDBC data type.
 *//*from   www . j  a  v a 2 s  . c  o  m*/
@Nonnull
@SuppressWarnings("squid:S1479")
private DataType getCatalystType(final int sqlType, final int precision, final int scale,
        final boolean signed) {
    switch (sqlType) {
    case Types.BINARY:
    case Types.BLOB:
    case Types.LONGVARBINARY:
    case Types.VARBINARY:
        return DataTypes.BinaryType;

    case Types.BIT:
    case Types.BOOLEAN:
        return DataTypes.BooleanType;

    case Types.DATE:
        return DataTypes.DateType;

    case Types.DECIMAL:
    case Types.NUMERIC:
        if (precision != 0 || scale != 0) {
            return new DecimalType(precision, scale);
        } else {
            return DecimalType.SYSTEM_DEFAULT();
        }

    case Types.DOUBLE:
    case Types.REAL:
        return DataTypes.DoubleType;

    case Types.FLOAT:
        return DataTypes.FloatType;

    case Types.SMALLINT:
    case Types.TINYINT:
        return DataTypes.IntegerType;

    case Types.INTEGER:
        return signed ? DataTypes.IntegerType : DataTypes.LongType;

    case Types.ROWID:
        return DataTypes.LongType;

    case Types.BIGINT:
        return signed ? DataTypes.LongType : new DecimalType(20, 0);

    case Types.CHAR:
    case Types.CLOB:
    case Types.LONGNVARCHAR:
    case Types.LONGVARCHAR:
    case Types.NCHAR:
    case Types.NCLOB:
    case Types.NVARCHAR:
    case Types.REF:
    case Types.SQLXML:
    case Types.STRUCT:
    case Types.VARCHAR:
        return DataTypes.StringType;

    case Types.TIME:
    case Types.TIMESTAMP:
        return DataTypes.TimestampType;

    default:
        log.debug("Unsupported SQL type: {}", sqlType);
        return DataTypes.StringType;
    }
}

From source file:gaffer.accumulostore.operation.spark.handler.AccumuloStoreRelation.java

License:Apache License

private static DataType getType(final String className) {
    switch (className) {
    case "java.lang.String":
        return DataTypes.StringType;
    case "java.lang.Integer":
        return DataTypes.IntegerType;
    case "java.lang.Long":
        return DataTypes.LongType;
    case "java.lang.Boolean":
        return DataTypes.BooleanType;
    case "java.lang.Double":
        return DataTypes.DoubleType;
    case "java.lang.Float":
        return DataTypes.FloatType;
    case "java.lang.Byte":
        return DataTypes.ByteType;
    case "java.lang.Short":
        return DataTypes.ShortType;
    default:/*from   w  w  w  .  j  ava2s . c  o m*/
        return null;
    }
}

From source file:org.apache.carbondata.converter.SparkDataTypeConverterImpl.java

License:Apache License

private static org.apache.spark.sql.types.DataType convertCarbonToSparkDataType(DataType carbonDataType) {
    if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.STRING) {
        return DataTypes.StringType;
    } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.SHORT) {
        return DataTypes.ShortType;
    } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.INT) {
        return DataTypes.IntegerType;
    } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.LONG) {
        return DataTypes.LongType;
    } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.DOUBLE) {
        return DataTypes.DoubleType;
    } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.BOOLEAN) {
        return DataTypes.BooleanType;
    } else if (org.apache.carbondata.core.metadata.datatype.DataTypes.isDecimal(carbonDataType)) {
        return DataTypes.createDecimalType();
    } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.TIMESTAMP) {
        return DataTypes.TimestampType;
    } else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.DATE) {
        return DataTypes.DateType;
    } else {//ww w .  j  av  a2s.  c o  m
        return null;
    }
}