Java tutorial
/** * Copyright 2016 Otto (GmbH & Co KG) * <p> * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * <p> * http://www.apache.org/licenses/LICENSE-2.0 * <p> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.schedoscope.export.kafka.avro; import com.fasterxml.jackson.databind.JsonNode; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecordBuilder; import org.apache.hive.hcatalog.data.HCatRecord; import org.schedoscope.export.utils.HCatRecordJsonSerializer; import org.schedoscope.export.utils.HCatUtils; import java.io.IOException; import java.util.*; /** * This class converts an HCatRecord to an AvroRecord. It uses the Avro schema * to recursively construct the record, requesting the fields from the JsonNode * as needed. */ public class HCatToAvroRecordConverter { private HCatRecordJsonSerializer serializer; private Set<String> anonFields; private String salt; /** * Create a new record converter instance, pass in a json serializer and a * list with field names to anonymize. * * @param serializer A Json serializer * @param anonFields A list with fields to anonymize * @param salt An optional salt to use when anonymizing fields */ public HCatToAvroRecordConverter(HCatRecordJsonSerializer serializer, Set<String> anonFields, String salt) { this.serializer = serializer; this.anonFields = anonFields; this.salt = salt; } /** * Create a new record converter instance, pass in a json serializer. * * @param serializer A Json serializer. */ public HCatToAvroRecordConverter(HCatRecordJsonSerializer serializer) { this.serializer = serializer; this.anonFields = new HashSet<String>(0); this.salt = ""; } /** * This function converts an HCatRecord to an Avro GenericRecord. * * @param hcatRecord The HCatRecord * @param avroSchema The Avro Schema * @return Returns an Avro GenericRecord * @throws IOException Is thrown if an error occurs */ public GenericRecord convert(HCatRecord hcatRecord, Schema avroSchema) throws IOException { JsonNode json = serializer.getRecordAsJson(hcatRecord); GenericRecord avroRecord = convertRecord(json, avroSchema); return avroRecord; } private GenericRecord convertRecord(JsonNode json, Schema schema) throws IOException { GenericRecordBuilder builder = new GenericRecordBuilder(schema); List<Field> fields = schema.getFields(); if (!json.isNull()) { for (Field f : fields) { for (Schema s : f.schema().getTypes()) { if (s.getType().equals(Schema.Type.STRING)) { builder.set(f.name(), HCatUtils.getHashValueIfInList(f.name(), json.get(f.name()).asText(), anonFields, salt)); } else if (s.getType().equals(Schema.Type.INT)) { builder.set(f.name(), json.get(f.name()).asInt()); } else if (s.getType().equals(Schema.Type.LONG)) { builder.set(f.name(), json.get(f.name()).asLong()); } else if (s.getType().equals(Schema.Type.BOOLEAN)) { builder.set(f.name(), json.get(f.name()).asBoolean()); } else if (s.getType().equals(Schema.Type.DOUBLE)) { builder.set(f.name(), json.get(f.name()).asDouble()); } else if (s.getType().equals(Schema.Type.FLOAT)) { builder.set(f.name(), json.get(f.name()).asDouble()); } else if (s.getType().equals(Schema.Type.RECORD)) { builder.set(f.name(), convertRecord(json.get(f.name()), s)); } else if (s.getType().equals(Schema.Type.ARRAY)) { builder.set(f.name(), convertArray(json.get(f.name()), s)); } else if (s.getType().equals(Schema.Type.MAP)) { builder.set(f.name(), convertMap(json.get(f.name()), s)); } } } } return builder.build(); } private Map<String, Object> convertMap(JsonNode json, Schema schema) throws IOException { Map<String, Object> res = new HashMap<>(); Iterator<Map.Entry<String, JsonNode>> it = json.fields(); while (it.hasNext()) { Map.Entry<String, JsonNode> n = it.next(); if (!n.getValue().isNull()) { for (Schema s : schema.getValueType().getTypes()) { if (s.getType().equals(Schema.Type.STRING)) { res.put(n.getKey(), n.getValue().asText()); } else if (s.getType().equals(Schema.Type.INT)) { res.put(n.getKey(), n.getValue().asInt()); } else if (s.getType().equals(Schema.Type.LONG)) { res.put(n.getKey(), n.getValue().asLong()); } else if (s.getType().equals(Schema.Type.BOOLEAN)) { res.put(n.getKey(), n.getValue().asBoolean()); } else if (s.getType().equals(Schema.Type.DOUBLE)) { res.put(n.getKey(), n.getValue().asDouble()); } else if (s.getType().equals(Schema.Type.FLOAT)) { res.put(n.getKey(), n.getValue().asDouble()); } else if (s.getType().equals(Schema.Type.RECORD)) { res.put(n.getKey(), convertRecord(n.getValue(), s)); } else if (s.getType().equals(Schema.Type.ARRAY)) { res.put(n.getKey(), convertArray(n.getValue(), s)); } else if (s.getType().equals(Schema.Type.MAP)) { res.put(n.getKey(), convertMap(n.getValue(), s)); } } } } return res; } private List<Object> convertArray(JsonNode json, Schema schema) throws IOException { List<Object> res = new ArrayList<>(); Iterator<JsonNode> it = json.elements(); while (it.hasNext()) { JsonNode n = it.next(); if (!n.isNull()) { for (Schema s : schema.getElementType().getTypes()) { if (s.getType().equals(Schema.Type.STRING)) { res.add(n.asText()); } else if (s.getType().equals(Schema.Type.INT)) { res.add(n.asInt()); } else if (s.getType().equals(Schema.Type.LONG)) { res.add(n.asLong()); } else if (s.getType().equals(Schema.Type.BOOLEAN)) { res.add(n.asBoolean()); } else if (s.getType().equals(Schema.Type.DOUBLE)) { res.add(n.asDouble()); } else if (s.getType().equals(Schema.Type.FLOAT)) { res.add(n.asDouble()); } else if (s.getType().equals(Schema.Type.RECORD)) { res.add(convertRecord(n, s)); } else if (s.getType().equals(Schema.Type.ARRAY)) { res.addAll(convertArray(n, s)); } else if (s.getType().equals(Schema.Type.MAP)) { res.add(convertMap(n, s)); } } } } return res; } }