org.apache.druid.indexer.InputRowSerde.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.druid.indexer.InputRowSerde.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.indexer;

import com.google.common.base.Supplier;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.ByteArrayDataInput;
import com.google.common.io.ByteArrayDataOutput;
import com.google.common.io.ByteStreams;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.MapBasedInputRow;
import org.apache.druid.data.input.Rows;
import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.common.parsers.ParseException;
import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.serde.ComplexMetricSerde;
import org.apache.druid.segment.serde.ComplexMetrics;
import org.apache.hadoop.io.WritableUtils;

import javax.annotation.Nullable;
import java.io.DataInput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 */
public class InputRowSerde {
    private static final Logger log = new Logger(InputRowSerde.class);

    private static final IndexSerdeTypeHelper STRING_HELPER = new StringIndexSerdeTypeHelper();
    private static final IndexSerdeTypeHelper LONG_HELPER = new LongIndexSerdeTypeHelper();
    private static final IndexSerdeTypeHelper FLOAT_HELPER = new FloatIndexSerdeTypeHelper();
    private static final IndexSerdeTypeHelper DOUBLE_HELPER = new DoubleIndexSerdeTypeHelper();

    public interface IndexSerdeTypeHelper<T> {
        ValueType getType();

        void serialize(ByteArrayDataOutput out, Object value);

        T deserialize(ByteArrayDataInput in);
    }

    public static Map<String, IndexSerdeTypeHelper> getTypeHelperMap(DimensionsSpec dimensionsSpec) {
        Map<String, IndexSerdeTypeHelper> typeHelperMap = Maps.newHashMap();
        for (DimensionSchema dimensionSchema : dimensionsSpec.getDimensions()) {
            IndexSerdeTypeHelper typeHelper;
            switch (dimensionSchema.getValueType()) {
            case STRING:
                typeHelper = STRING_HELPER;
                break;
            case LONG:
                typeHelper = LONG_HELPER;
                break;
            case FLOAT:
                typeHelper = FLOAT_HELPER;
                break;
            case DOUBLE:
                typeHelper = DOUBLE_HELPER;
                break;
            default:
                throw new IAE("Invalid type: [%s]", dimensionSchema.getValueType());
            }
            typeHelperMap.put(dimensionSchema.getName(), typeHelper);
        }
        return typeHelperMap;
    }

    public static class SerializeResult {
        private final byte[] serializedRow;
        private final List<String> parseExceptionMessages;

        public SerializeResult(final byte[] serializedRow, final List<String> parseExceptionMessages) {
            this.serializedRow = serializedRow;
            this.parseExceptionMessages = parseExceptionMessages;
        }

        public byte[] getSerializedRow() {
            return serializedRow;
        }

        public List<String> getParseExceptionMessages() {
            return parseExceptionMessages;
        }
    }

    public static class StringIndexSerdeTypeHelper implements IndexSerdeTypeHelper<List<String>> {
        @Override
        public ValueType getType() {
            return ValueType.STRING;
        }

        @Override
        public void serialize(ByteArrayDataOutput out, Object value) {
            List<String> values = Rows.objectToStrings(value);
            try {
                writeStringArray(values, out);
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
        }

        @Override
        public List<String> deserialize(ByteArrayDataInput in) {
            try {
                return readStringArray(in);
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
        }
    }

    public static class LongIndexSerdeTypeHelper implements IndexSerdeTypeHelper<Long> {
        @Override
        public ValueType getType() {
            return ValueType.LONG;
        }

        @Override
        public void serialize(ByteArrayDataOutput out, Object value) {
            ParseException exceptionToThrow = null;
            Long ret = null;
            try {
                ret = DimensionHandlerUtils.convertObjectToLong(value, true);
            } catch (ParseException pe) {
                exceptionToThrow = pe;
            }

            if (ret == null) {
                // remove null -> zero conversion when https://github.com/apache/incubator-druid/pull/5278 series of patches is merged
                // we'll also need to change the serialized encoding so that it can represent numeric nulls
                ret = DimensionHandlerUtils.ZERO_LONG;
            }
            out.writeLong(ret);

            if (exceptionToThrow != null) {
                throw exceptionToThrow;
            }
        }

        @Override
        public Long deserialize(ByteArrayDataInput in) {
            return in.readLong();
        }
    }

    public static class FloatIndexSerdeTypeHelper implements IndexSerdeTypeHelper<Float> {
        @Override
        public ValueType getType() {
            return ValueType.FLOAT;
        }

        @Override
        public void serialize(ByteArrayDataOutput out, Object value) {
            ParseException exceptionToThrow = null;
            Float ret = null;
            try {
                ret = DimensionHandlerUtils.convertObjectToFloat(value, true);
            } catch (ParseException pe) {
                exceptionToThrow = pe;
            }

            if (ret == null) {
                // remove null -> zero conversion when https://github.com/apache/incubator-druid/pull/5278 series of patches is merged
                // we'll also need to change the serialized encoding so that it can represent numeric nulls
                ret = DimensionHandlerUtils.ZERO_FLOAT;
            }
            out.writeFloat(ret);

            if (exceptionToThrow != null) {
                throw exceptionToThrow;
            }
        }

        @Override
        public Float deserialize(ByteArrayDataInput in) {
            return in.readFloat();
        }
    }

    public static class DoubleIndexSerdeTypeHelper implements IndexSerdeTypeHelper<Double> {
        @Override
        public ValueType getType() {
            return ValueType.DOUBLE;
        }

        @Override
        public void serialize(ByteArrayDataOutput out, Object value) {
            ParseException exceptionToThrow = null;
            Double ret = null;
            try {
                ret = DimensionHandlerUtils.convertObjectToDouble(value, true);
            } catch (ParseException pe) {
                exceptionToThrow = pe;
            }

            if (ret == null) {
                // remove null -> zero conversion when https://github.com/apache/incubator-druid/pull/5278 series of patches is merged
                // we'll also need to change the serialized encoding so that it can represent numeric nulls
                ret = DimensionHandlerUtils.ZERO_DOUBLE;
            }
            out.writeDouble(ret);

            if (exceptionToThrow != null) {
                throw exceptionToThrow;
            }
        }

        @Override
        public Double deserialize(ByteArrayDataInput in) {
            return in.readDouble();
        }
    }

    public static final SerializeResult toBytes(final Map<String, IndexSerdeTypeHelper> typeHelperMap,
            final InputRow row, AggregatorFactory[] aggs) {
        try {
            List<String> parseExceptionMessages = new ArrayList<>();
            ByteArrayDataOutput out = ByteStreams.newDataOutput();

            //write timestamp
            out.writeLong(row.getTimestampFromEpoch());

            //writing all dimensions
            List<String> dimList = row.getDimensions();

            WritableUtils.writeVInt(out, dimList.size());
            for (String dim : dimList) {
                IndexSerdeTypeHelper typeHelper = typeHelperMap.get(dim);
                if (typeHelper == null) {
                    typeHelper = STRING_HELPER;
                }
                writeString(dim, out);

                try {
                    typeHelper.serialize(out, row.getRaw(dim));
                } catch (ParseException pe) {
                    parseExceptionMessages.add(pe.getMessage());
                }
            }

            //writing all metrics
            Supplier<InputRow> supplier = () -> row;
            WritableUtils.writeVInt(out, aggs.length);
            for (AggregatorFactory aggFactory : aggs) {
                String k = aggFactory.getName();
                writeString(k, out);

                try (Aggregator agg = aggFactory.factorize(IncrementalIndex
                        .makeColumnSelectorFactory(VirtualColumns.EMPTY, aggFactory, supplier, true))) {
                    try {
                        agg.aggregate();
                    } catch (ParseException e) {
                        // "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
                        log.debug(e, "Encountered parse error, skipping aggregator[%s].", k);
                        parseExceptionMessages.add(e.getMessage());
                    }

                    String t = aggFactory.getTypeName();
                    if (agg.isNull()) {
                        out.writeByte(NullHandling.IS_NULL_BYTE);
                    } else {
                        out.writeByte(NullHandling.IS_NOT_NULL_BYTE);
                        if ("float".equals(t)) {
                            out.writeFloat(agg.getFloat());
                        } else if ("long".equals(t)) {
                            WritableUtils.writeVLong(out, agg.getLong());
                        } else if ("double".equals(t)) {
                            out.writeDouble(agg.getDouble());
                        } else {
                            //its a complex metric
                            Object val = agg.get();
                            ComplexMetricSerde serde = getComplexMetricSerde(t);
                            writeBytes(serde.toBytes(val), out);
                        }
                    }
                }
            }

            return new SerializeResult(out.toByteArray(), parseExceptionMessages);
        } catch (IOException ex) {
            throw new RuntimeException(ex);
        }
    }

    private static void writeBytes(@Nullable byte[] value, ByteArrayDataOutput out) throws IOException {
        int length = value == null ? -1 : value.length;
        WritableUtils.writeVInt(out, length);
        if (value != null) {
            out.write(value, 0, value.length);
        }
    }

    private static void writeString(String value, ByteArrayDataOutput out) throws IOException {
        writeBytes(StringUtils.toUtf8(value), out);
    }

    private static void writeStringArray(List<String> values, ByteArrayDataOutput out) throws IOException {
        if (values == null || values.size() == 0) {
            WritableUtils.writeVInt(out, 0);
            return;
        }
        WritableUtils.writeVInt(out, values.size());
        for (String value : values) {
            writeString(value, out);
        }
    }

    private static String readString(DataInput in) throws IOException {
        byte[] result = readBytes(in);
        return StringUtils.fromUtf8(result);
    }

    private static byte[] readBytes(DataInput in) throws IOException {
        int size = WritableUtils.readVInt(in);
        byte[] result = new byte[size];
        in.readFully(result, 0, size);
        return result;
    }

    private static List<String> readStringArray(DataInput in) throws IOException {
        int count = WritableUtils.readVInt(in);
        if (count == 0) {
            return null;
        }
        List<String> values = Lists.newArrayListWithCapacity(count);
        for (int i = 0; i < count; i++) {
            values.add(readString(in));
        }
        return values;
    }

    public static final InputRow fromBytes(final Map<String, IndexSerdeTypeHelper> typeHelperMap, byte[] data,
            AggregatorFactory[] aggs) {
        try {
            ByteArrayDataInput in = ByteStreams.newDataInput(data);

            //Read timestamp
            long timestamp = in.readLong();

            Map<String, Object> event = Maps.newHashMap();

            //Read dimensions
            List<String> dimensions = Lists.newArrayList();
            int dimNum = WritableUtils.readVInt(in);
            for (int i = 0; i < dimNum; i++) {
                String dimension = readString(in);
                dimensions.add(dimension);

                IndexSerdeTypeHelper typeHelper = typeHelperMap.get(dimension);
                if (typeHelper == null) {
                    typeHelper = STRING_HELPER;
                }
                Object dimValues = typeHelper.deserialize(in);
                if (dimValues == null) {
                    continue;
                }

                if (typeHelper.getType() == ValueType.STRING) {
                    List<String> dimensionValues = (List<String>) dimValues;
                    if (dimensionValues.size() == 1) {
                        event.put(dimension, dimensionValues.get(0));
                    } else {
                        event.put(dimension, dimensionValues);
                    }
                } else {
                    event.put(dimension, dimValues);
                }
            }

            //Read metrics
            int metricSize = WritableUtils.readVInt(in);
            for (int i = 0; i < metricSize; i++) {
                String metric = readString(in);
                String type = getType(metric, aggs, i);
                byte metricNullability = in.readByte();
                if (metricNullability == NullHandling.IS_NULL_BYTE) {
                    // metric value is null.
                    continue;
                }
                if ("float".equals(type)) {
                    event.put(metric, in.readFloat());
                } else if ("long".equals(type)) {
                    event.put(metric, WritableUtils.readVLong(in));
                } else if ("double".equals(type)) {
                    event.put(metric, in.readDouble());
                } else {
                    ComplexMetricSerde serde = getComplexMetricSerde(type);
                    byte[] value = readBytes(in);
                    event.put(metric, serde.fromBytes(value, 0, value.length));
                }
            }

            return new MapBasedInputRow(timestamp, dimensions, event);
        } catch (IOException ex) {
            throw new RuntimeException(ex);
        }
    }

    private static String getType(String metric, AggregatorFactory[] aggs, int i) {
        if (aggs[i].getName().equals(metric)) {
            return aggs[i].getTypeName();
        }
        log.warn("Aggs disordered, fall backs to loop.");
        for (AggregatorFactory agg : aggs) {
            if (agg.getName().equals(metric)) {
                return agg.getTypeName();
            }
        }
        return null;
    }

    private static ComplexMetricSerde getComplexMetricSerde(String type) {
        ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(type);
        if (serde == null) {
            throw new IAE("Unknown type[%s]", type);
        }
        return serde;
    }
}