org.apache.druid.indexer.HadoopDruidIndexerMapperTest.java Source code

Introduction

Here is the source code for org.apache.druid.indexer.HadoopDruidIndexerMapperTest.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.indexer;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.JSONParseSpec;
import org.apache.druid.data.input.impl.TimestampSpec;
import org.apache.druid.indexer.path.StaticPathSpec;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.jackson.JacksonUtils;
import org.apache.druid.java.util.common.parsers.JSONPathSpec;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.filter.SelectorDimFilter;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.indexing.DataSchema;
import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec;
import org.apache.druid.segment.transform.ExpressionTransform;
import org.apache.druid.segment.transform.TransformSpec;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Mapper;
import org.easymock.EasyMock;
import org.junit.Assert;
import org.junit.Test;

import java.io.DataInput;
import java.io.DataOutput;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class HadoopDruidIndexerMapperTest {
    private static final ObjectMapper JSON_MAPPER = TestHelper.makeJsonMapper();
    private static final DataSchema DATA_SCHEMA = new DataSchema("test_ds",
            JSON_MAPPER
                    .convertValue(
                            new HadoopyStringInputRowParser(new JSONParseSpec(new TimestampSpec("t", "auto", null),
                                    new DimensionsSpec(DimensionsSpec.getDefaultSchemas(
                                            ImmutableList.of("dim1", "dim1t", "dim2")), null, null),
                                    new JSONPathSpec(true, ImmutableList.of()), ImmutableMap.of())),
                            JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT),
            new AggregatorFactory[] { new CountAggregatorFactory("rows") },
            new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), null, JSON_MAPPER);

    private static final HadoopIOConfig IO_CONFIG = new HadoopIOConfig(JSON_MAPPER
            .convertValue(new StaticPathSpec("dummyPath", null), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT),
            null, "dummyOutputPath");

    private static final HadoopTuningConfig TUNING_CONFIG = HadoopTuningConfig.makeDefaultTuningConfig()
            .withWorkingPath("dummyWorkingPath");

    @Test
    public void testHadoopyStringParser() throws Exception {
        final HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(
                new HadoopIngestionSpec(DATA_SCHEMA, IO_CONFIG, TUNING_CONFIG));

        final MyMapper mapper = new MyMapper();
        final Configuration hadoopConfig = new Configuration();
        hadoopConfig.set(HadoopDruidIndexerConfig.CONFIG_PROPERTY,
                HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(config));
        final Mapper.Context mapContext = EasyMock.mock(Mapper.Context.class);
        EasyMock.expect(mapContext.getConfiguration()).andReturn(hadoopConfig).once();
        EasyMock.replay(mapContext);
        mapper.setup(mapContext);
        final List<Map<String, Object>> rows = ImmutableList.of(
                ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "x", "m1", 1.0),
                ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim2", "y", "m1", 1.0));
        for (Map<String, Object> row : rows) {
            mapper.map(NullWritable.get(), new Text(JSON_MAPPER.writeValueAsString(row)), mapContext);
        }
        assertRowListEquals(rows, mapper.getRows());
    }

    @Test
    public void testHadoopyStringParserWithTransformSpec() throws Exception {
        final HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(
                new HadoopIngestionSpec(
                        DATA_SCHEMA
                                .withTransformSpec(
                                        new TransformSpec(new SelectorDimFilter("dim1", "foo", null),
                                                ImmutableList.of(new ExpressionTransform("dim1t",
                                                        "concat(dim1,dim1)", ExprMacroTable.nil())))),
                        IO_CONFIG, TUNING_CONFIG));

        final MyMapper mapper = new MyMapper();
        final Configuration hadoopConfig = new Configuration();
        hadoopConfig.set(HadoopDruidIndexerConfig.CONFIG_PROPERTY,
                HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(config));
        final Mapper.Context mapContext = EasyMock.mock(Mapper.Context.class);
        EasyMock.expect(mapContext.getConfiguration()).andReturn(hadoopConfig).once();
        EasyMock.expect(mapContext.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER))
                .andReturn(getTestCounter());
        EasyMock.replay(mapContext);
        mapper.setup(mapContext);
        final List<Map<String, Object>> rows = ImmutableList.of(
                ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "foo", "dim2", "x", "m1", 1.0),
                ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "bar", "dim2", "y", "m1", 1.0),
                ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "foo", "dim2", "z", "m1", 1.0));
        for (Map<String, Object> row : rows) {
            mapper.map(NullWritable.get(), new Text(JSON_MAPPER.writeValueAsString(row)), mapContext);
        }
        assertRowListEquals(ImmutableList.of(
                ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "foo", "dim1t", "foofoo", "dim2", "x",
                        "m1", 1.0),
                ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "foo", "dim1t", "foofoo", "dim2", "z",
                        "m1", 1.0)),
                mapper.getRows());
    }

    private static void assertRowListEquals(final List<Map<String, Object>> expected, final List<InputRow> actual) {
        Assert.assertEquals(expected,
                actual.stream().map(HadoopDruidIndexerMapperTest::rowToMap).collect(Collectors.toList()));
    }

    private static Map<String, Object> rowToMap(final InputRow row) {
        // Normalize input row for the purposes of testing.
        final ImmutableMap.Builder<String, Object> builder = ImmutableMap.<String, Object>builder().put("t",
                row.getTimestamp().toString());

        for (String dim : row.getDimensions()) {
            final Object val = row.getRaw(dim);
            if (val != null) {
                builder.put(dim, val);
            }
        }

        // other, non-dimension fields are not self describing so much be specified individually
        builder.put("m1", row.getRaw("m1"));
        return builder.build();
    }

    private static Counter getTestCounter() {
        return new Counter() {
            @Override
            public void setDisplayName(String displayName) {

            }

            @Override
            public String getName() {
                return null;
            }

            @Override
            public String getDisplayName() {
                return null;
            }

            @Override
            public long getValue() {
                return 0;
            }

            @Override
            public void setValue(long value) {

            }

            @Override
            public void increment(long incr) {

            }

            @Override
            public Counter getUnderlyingCounter() {
                return null;
            }

            @Override
            public void write(DataOutput out) {

            }

            @Override
            public void readFields(DataInput in) {

            }
        };
    }

    public static class MyMapper extends HadoopDruidIndexerMapper {
        private final List<InputRow> rows = new ArrayList<>();

        @Override
        protected void innerMap(final InputRow inputRow, final Context context) {
            rows.add(inputRow);
        }

        public List<InputRow> getRows() {
            return rows;
        }
    }
}