gobblin.converter.avro.JsonElementConversionFactory.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.converter.avro.JsonElementConversionFactory.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.converter.avro;

import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.util.Utf8;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;

import sun.util.calendar.ZoneInfo;

import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;

import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.WorkUnitState;

/**
 * <p>
 * Creates a JsonElement to Avro converter for all supported data types.
 * </p>
 *
 * @author kgoodhop
 *
 */
public class JsonElementConversionFactory {

    public enum Type {
        DATE, TIMESTAMP, TIME, FIXED, STRING, BYTES, INT, LONG, FLOAT, DOUBLE, BOOLEAN, ARRAY, MAP, ENUM
    }

    /**
     * Use to create a converter for a single field from a schema.
     *
     * @param fieldName
     * @param fieldType
     * @param nullable
     * @param schemaNode
     * @param state
     * @return
     * @throws UnsupportedDateTypeException
     */
    public static JsonElementConverter getConvertor(String fieldName, String fieldType, JsonObject schemaNode,
            WorkUnitState state, boolean nullable) throws UnsupportedDateTypeException {

        Type type;
        try {
            type = Type.valueOf(fieldType.toUpperCase());
        } catch (IllegalArgumentException e) {
            throw new UnsupportedDateTypeException(fieldType + " is unsupported");
        }

        DateTimeZone timeZone = getTimeZone(state.getProp(ConfigurationKeys.CONVERTER_AVRO_DATE_TIMEZONE, "UTC"));
        switch (type) {
        case DATE:
            return new DateConverter(fieldName, nullable, type.toString(),
                    state.getProp(ConfigurationKeys.CONVERTER_AVRO_DATE_FORMAT, "yyyy-MM-dd HH:mm:ss"), timeZone,
                    state);

        case TIMESTAMP:
            return new DateConverter(fieldName, nullable, type.toString(),
                    state.getProp(ConfigurationKeys.CONVERTER_AVRO_TIMESTAMP_FORMAT, "yyyy-MM-dd HH:mm:ss"),
                    timeZone, state);

        case TIME:
            return new DateConverter(fieldName, nullable, type.toString(),
                    state.getProp(ConfigurationKeys.CONVERTER_AVRO_TIME_FORMAT, "HH:mm:ss"), timeZone, state);

        case FIXED:
            throw new UnsupportedDateTypeException(fieldType + " is unsupported");

        case STRING:
            return new StringConverter(fieldName, nullable, type.toString());

        case BYTES:
            return new BinaryConverter(fieldName, nullable, type.toString(),
                    state.getProp(ConfigurationKeys.CONVERTER_AVRO_BINARY_CHARSET, "UTF8"));

        case INT:
            return new IntConverter(fieldName, nullable, type.toString());

        case LONG:
            return new LongConverter(fieldName, nullable, type.toString());

        case FLOAT:
            return new FloatConverter(fieldName, nullable, type.toString());

        case DOUBLE:
            return new DoubleConverter(fieldName, nullable, type.toString());

        case BOOLEAN:
            return new BooleanConverter(fieldName, nullable, type.toString());

        case ARRAY:
            return new ArrayConverter(fieldName, nullable, type.toString(), schemaNode, state);

        case MAP:
            return new MapConverter(fieldName, nullable, type.toString(), schemaNode, state);

        case ENUM:
            return new EnumConverter(fieldName, nullable, type.toString(), schemaNode);

        default:
            throw new UnsupportedDateTypeException(fieldType + " is unsupported");
        }
    }

    private static DateTimeZone getTimeZone(String id) {
        DateTimeZone zone;
        try {
            zone = DateTimeZone.forID(id);
        } catch (IllegalArgumentException e) {
            TimeZone timeZone = ZoneInfo.getTimeZone(id);

            //throw error if unrecognized zone
            if (timeZone == null) {
                throw new IllegalArgumentException("TimeZone " + id + " not recognized");
            }
            zone = DateTimeZone.forTimeZone(timeZone);
        }
        return zone;
    }

    /**
     * Converts a JsonElement into a supported AvroType
     * @author kgoodhop
     *
     */
    public static abstract class JsonElementConverter {
        private String name;
        private boolean nullable;
        private String sourceType;

        /**
         *
         * @param fieldName
         * @param nullable
         */
        public JsonElementConverter(String fieldName, boolean nullable, String sourceType) {
            this.name = fieldName;
            this.nullable = nullable;
            this.sourceType = sourceType;
        }

        /**
         * Field name from schema
         * @return
         */
        public String getName() {
            return this.name;
        }

        /**
         * is field nullable
         * @return
         */
        public boolean isNullable() {
            return this.nullable;
        }

        /**
         * avro schema for the converted type
         * @return
         */
        public Schema getSchema() {
            if (this.nullable) {
                List<Schema> list = new ArrayList<>();
                list.add(Schema.create(Schema.Type.NULL));
                list.add(schema());
                return Schema.createUnion(list);
            }
            return schema();
        }

        protected Schema schema() {
            Schema schema = Schema.create(getTargetType());
            schema.addProp("source.type", this.sourceType.toLowerCase());
            return schema;
        }

        /**
         * Convert value
         * @param value is JsonNull will return null if allowed or exception if not allowed
         * @return Avro safe type
         */
        public Object convert(JsonElement value) {
            if (value.isJsonNull()) {
                if (this.nullable) {
                    return null;
                }
                throw new RuntimeException("Field: " + getName() + " is not nullable and contains a null value");
            }
            return convertField(value);
        }

        /**
         * Convert JsonElement to Avro type
         * @param value
         * @return
         */
        abstract Object convertField(JsonElement value);

        /**
         * Avro data type after conversion
         * @return
         */
        public abstract Schema.Type getTargetType();
    }

    public static class StringConverter extends JsonElementConverter {

        public StringConverter(String fieldName, boolean nullable, String sourceType) {
            super(fieldName, nullable, sourceType);
        }

        @Override
        Object convertField(JsonElement value) {
            return new Utf8(value.getAsString());
        }

        @Override
        public org.apache.avro.Schema.Type getTargetType() {
            return Schema.Type.STRING;
        }
    }

    public static class IntConverter extends JsonElementConverter {

        public IntConverter(String fieldName, boolean nullable, String sourceType) {
            super(fieldName, nullable, sourceType);
        }

        @Override
        Object convertField(JsonElement value) {

            return value.getAsInt();
        }

        @Override
        public org.apache.avro.Schema.Type getTargetType() {
            return Schema.Type.INT;
        }
    }

    public static class LongConverter extends JsonElementConverter {

        public LongConverter(String fieldName, boolean nullable, String sourceType) {
            super(fieldName, nullable, sourceType);
        }

        @Override
        Object convertField(JsonElement value) {

            return value.getAsLong();
        }

        @Override
        public org.apache.avro.Schema.Type getTargetType() {
            return Schema.Type.LONG;
        }
    }

    public static class DoubleConverter extends JsonElementConverter {

        public DoubleConverter(String fieldName, boolean nullable, String sourceType) {
            super(fieldName, nullable, sourceType);
        }

        @Override
        Object convertField(JsonElement value) {
            return value.getAsDouble();
        }

        @Override
        public org.apache.avro.Schema.Type getTargetType() {
            return Schema.Type.DOUBLE;
        }
    }

    public static class FloatConverter extends JsonElementConverter {

        public FloatConverter(String fieldName, boolean nullable, String sourceType) {
            super(fieldName, nullable, sourceType);
        }

        @Override
        Object convertField(JsonElement value) {
            return value.getAsFloat();
        }

        @Override
        public org.apache.avro.Schema.Type getTargetType() {
            return Schema.Type.FLOAT;
        }
    }

    public static class BooleanConverter extends JsonElementConverter {

        public BooleanConverter(String fieldName, boolean nullable, String sourceType) {
            super(fieldName, nullable, sourceType);
        }

        @Override
        Object convertField(JsonElement value) {

            return value.getAsBoolean();
        }

        @Override
        public org.apache.avro.Schema.Type getTargetType() {
            return Schema.Type.BOOLEAN;
        }
    }

    public static class DateConverter extends JsonElementConverter {
        private String inputPatterns;
        private DateTimeZone timeZone;
        private WorkUnitState state;

        public DateConverter(String fieldName, boolean nullable, String sourceType, String pattern,
                DateTimeZone zone, WorkUnitState state) {
            super(fieldName, nullable, sourceType);
            this.inputPatterns = pattern;
            this.timeZone = zone;
            this.state = state;
        }

        @Override
        Object convertField(JsonElement value) {
            List<String> patterns = Arrays.asList(this.inputPatterns.split(","));
            int patternFailCount = 0;
            Object formattedDate = null;
            for (String pattern : patterns) {
                DateTimeFormatter dtf = DateTimeFormat.forPattern(pattern).withZone(this.timeZone);
                try {
                    formattedDate = dtf.parseDateTime(value.getAsString()).withZone(DateTimeZone.forID("UTC"))
                            .getMillis();
                    if (Boolean.valueOf(this.state.getProp(ConfigurationKeys.CONVERTER_IS_EPOCH_TIME_IN_SECONDS))) {
                        formattedDate = (Long) formattedDate / 1000;
                    }
                    break;
                } catch (Exception e) {
                    patternFailCount++;
                }
            }

            if (patternFailCount == patterns.size()) {
                throw new RuntimeException("Failed to parse the date");
            }

            return formattedDate;
        }

        @Override
        public org.apache.avro.Schema.Type getTargetType() {
            return Schema.Type.LONG;
        }
    }

    public static class BinaryConverter extends JsonElementConverter {
        private String charSet;

        public BinaryConverter(String fieldName, boolean nullable, String sourceType, String charSet) {
            super(fieldName, nullable, sourceType);
            this.charSet = charSet;
        }

        @Override
        Object convertField(JsonElement value) {
            try {
                return ByteBuffer.wrap(value.getAsString().getBytes(this.charSet));
            } catch (UnsupportedEncodingException e) {
                throw new RuntimeException(e);
            }
        }

        @Override
        public org.apache.avro.Schema.Type getTargetType() {
            return Schema.Type.BYTES;
        }
    }

    public static abstract class ComplexConverter extends JsonElementConverter {
        private JsonElementConverter elementConverter;

        public ComplexConverter(String fieldName, boolean nullable, String sourceType) {
            super(fieldName, nullable, sourceType);
        }

        protected void setElementConverter(JsonElementConverter elementConverter) {
            this.elementConverter = elementConverter;
        }

        public JsonElementConverter getElementConverter() {
            return this.elementConverter;
        }
    }

    public static class ArrayConverter extends ComplexConverter {

        public ArrayConverter(String fieldName, boolean nullable, String sourceType, JsonObject schemaNode,
                WorkUnitState state) throws UnsupportedDateTypeException {
            super(fieldName, nullable, sourceType);
            super.setElementConverter(
                    getConvertor(fieldName, schemaNode.get("dataType").getAsJsonObject().get("items").getAsString(),
                            schemaNode.get("dataType").getAsJsonObject(), state, isNullable()));
        }

        @Override
        Object convertField(JsonElement value) {
            List<Object> list = new ArrayList<>();

            for (JsonElement elem : (JsonArray) value) {
                list.add(getElementConverter().convertField(elem));
            }

            return new GenericData.Array<>(schema(), list);
        }

        @Override
        public org.apache.avro.Schema.Type getTargetType() {
            return Schema.Type.ARRAY;
        }

        @Override
        public Schema schema() {
            Schema schema = Schema.createArray(getElementConverter().schema());
            schema.addProp("source.type", "array");
            return schema;
        }
    }

    public static class MapConverter extends ComplexConverter {

        public MapConverter(String fieldName, boolean nullable, String sourceType, JsonObject schemaNode,
                WorkUnitState state) throws UnsupportedDateTypeException {
            super(fieldName, nullable, sourceType);
            super.setElementConverter(getConvertor(fieldName,
                    schemaNode.get("dataType").getAsJsonObject().get("values").getAsString(),
                    schemaNode.get("dataType").getAsJsonObject(), state, isNullable()));
        }

        @Override
        Object convertField(JsonElement value) {
            Map<String, Object> map = new HashMap<>();

            for (Map.Entry<String, JsonElement> entry : ((JsonObject) value).entrySet()) {
                map.put(entry.getKey(), getElementConverter().convertField(entry.getValue()));
            }

            return map;
        }

        @Override
        public org.apache.avro.Schema.Type getTargetType() {
            return Schema.Type.MAP;
        }

        @Override
        public Schema schema() {
            Schema schema = Schema.createMap(getElementConverter().schema());
            schema.addProp("source.type", "map");
            return schema;
        }
    }

    public static class EnumConverter extends JsonElementConverter {
        String enumName;
        List<String> enumSet = new ArrayList<>();
        Schema schema;

        public EnumConverter(String fieldName, boolean nullable, String sourceType, JsonObject schemaNode) {
            super(fieldName, nullable, sourceType);

            for (JsonElement elem : schemaNode.get("dataType").getAsJsonObject().get("symbols").getAsJsonArray()) {
                this.enumSet.add(elem.getAsString());
            }
            this.enumName = schemaNode.get("dataType").getAsJsonObject().get("name").getAsString();
        }

        @Override
        Object convertField(JsonElement value) {
            return new GenericData.EnumSymbol(this.schema, value.getAsString());
        }

        @Override
        public org.apache.avro.Schema.Type getTargetType() {
            return Schema.Type.ENUM;
        }

        @Override
        public Schema schema() {
            this.schema = Schema.createEnum(this.enumName, "", "", this.enumSet);
            this.schema.addProp("source.type", "enum");
            return this.schema;
        }
    }
}