org.apache.druid.query.aggregation.AggregationTestHelper.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.druid.query.aggregation.AggregationTestHelper.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.query.aggregation;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.ObjectCodec;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.InjectableValues;
import com.fasterxml.jackson.databind.Module;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Function;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.Closeables;
import com.google.common.util.concurrent.MoreExecutors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.apache.druid.collections.CloseableStupidPool;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.Row;
import org.apache.druid.data.input.impl.InputRowParser;
import org.apache.druid.data.input.impl.StringInputRowParser;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.java.util.common.guava.CloseQuietly;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Sequences;
import org.apache.druid.java.util.common.guava.Yielder;
import org.apache.druid.java.util.common.guava.YieldingAccumulator;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.query.FinalizeResultsQueryRunner;
import org.apache.druid.query.Query;
import org.apache.druid.query.QueryPlus;
import org.apache.druid.query.QueryRunner;
import org.apache.druid.query.QueryRunnerFactory;
import org.apache.druid.query.QueryRunnerTestHelper;
import org.apache.druid.query.QueryToolChest;
import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.apache.druid.query.groupby.GroupByQueryRunnerFactory;
import org.apache.druid.query.groupby.GroupByQueryRunnerTest;
import org.apache.druid.query.select.SelectQueryConfig;
import org.apache.druid.query.select.SelectQueryEngine;
import org.apache.druid.query.select.SelectQueryQueryToolChest;
import org.apache.druid.query.select.SelectQueryRunnerFactory;
import org.apache.druid.query.timeseries.TimeseriesQueryEngine;
import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest;
import org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory;
import org.apache.druid.query.topn.TopNQueryConfig;
import org.apache.druid.query.topn.TopNQueryQueryToolChest;
import org.apache.druid.query.topn.TopNQueryRunnerFactory;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.IndexIO;
import org.apache.druid.segment.IndexMerger;
import org.apache.druid.segment.IndexMergerV9;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.QueryableIndexSegment;
import org.apache.druid.segment.Segment;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.column.ColumnConfig;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
import org.junit.rules.TemporaryFolder;

import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Array;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/**
 * This class provides general utility to test any druid aggregation implementation given raw data,
 * parser spec, aggregator specs and a group-by query.
 * It allows you to create index from raw data, run a group by query on it which simulates query processing inside
 * of a druid cluster exercising most of the features from aggregation and returns the results that you could verify.
 */
public class AggregationTestHelper implements Closeable {
    private final ObjectMapper mapper;
    private final IndexMerger indexMerger;
    private final IndexIO indexIO;
    private final QueryToolChest toolChest;
    private final QueryRunnerFactory factory;

    private final TemporaryFolder tempFolder;
    private final Closer resourceCloser;

    private AggregationTestHelper(ObjectMapper mapper, IndexMerger indexMerger, IndexIO indexIO,
            QueryToolChest toolchest, QueryRunnerFactory factory, TemporaryFolder tempFolder,
            List<? extends Module> jsonModulesToRegister, Closer resourceCloser) {
        this.mapper = mapper;
        this.indexMerger = indexMerger;
        this.indexIO = indexIO;
        this.toolChest = toolchest;
        this.factory = factory;
        this.tempFolder = tempFolder;
        this.resourceCloser = resourceCloser;

        for (Module mod : jsonModulesToRegister) {
            mapper.registerModule(mod);
        }
    }

    public static AggregationTestHelper createGroupByQueryAggregationTestHelper(
            List<? extends Module> jsonModulesToRegister, GroupByQueryConfig config, TemporaryFolder tempFolder) {
        final ObjectMapper mapper = TestHelper.makeJsonMapper();
        final Pair<GroupByQueryRunnerFactory, Closer> factoryAndCloser = GroupByQueryRunnerTest
                .makeQueryRunnerFactory(mapper, config);
        final GroupByQueryRunnerFactory factory = factoryAndCloser.lhs;
        final Closer closer = factoryAndCloser.rhs;

        IndexIO indexIO = new IndexIO(mapper, new ColumnConfig() {
            @Override
            public int columnCacheSizeBytes() {
                return 0;
            }
        });

        return new AggregationTestHelper(mapper,
                new IndexMergerV9(mapper, indexIO, OffHeapMemorySegmentWriteOutMediumFactory.instance()), indexIO,
                factory.getToolchest(), factory, tempFolder, jsonModulesToRegister, closer);
    }

    public static AggregationTestHelper createSelectQueryAggregationTestHelper(
            List<? extends Module> jsonModulesToRegister, TemporaryFolder tempFolder) {
        ObjectMapper mapper = TestHelper.makeJsonMapper();
        mapper.setInjectableValues(
                new InjectableValues.Std().addValue(SelectQueryConfig.class, new SelectQueryConfig(true)));

        Supplier<SelectQueryConfig> configSupplier = Suppliers.ofInstance(new SelectQueryConfig(true));

        SelectQueryQueryToolChest toolchest = new SelectQueryQueryToolChest(TestHelper.makeJsonMapper(),
                QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator(), configSupplier);

        SelectQueryRunnerFactory factory = new SelectQueryRunnerFactory(
                new SelectQueryQueryToolChest(TestHelper.makeJsonMapper(),
                        QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator(), configSupplier),
                new SelectQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER);

        IndexIO indexIO = new IndexIO(mapper, new ColumnConfig() {
            @Override
            public int columnCacheSizeBytes() {
                return 0;
            }
        });

        return new AggregationTestHelper(mapper,
                new IndexMergerV9(mapper, indexIO, OffHeapMemorySegmentWriteOutMediumFactory.instance()), indexIO,
                toolchest, factory, tempFolder, jsonModulesToRegister, Closer.create());
    }

    public static AggregationTestHelper createTimeseriesQueryAggregationTestHelper(
            List<? extends Module> jsonModulesToRegister, TemporaryFolder tempFolder) {
        ObjectMapper mapper = TestHelper.makeJsonMapper();

        TimeseriesQueryQueryToolChest toolchest = new TimeseriesQueryQueryToolChest(
                QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator());

        TimeseriesQueryRunnerFactory factory = new TimeseriesQueryRunnerFactory(toolchest,
                new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER);

        IndexIO indexIO = new IndexIO(mapper, new ColumnConfig() {
            @Override
            public int columnCacheSizeBytes() {
                return 0;
            }
        });

        return new AggregationTestHelper(mapper,
                new IndexMergerV9(mapper, indexIO, OffHeapMemorySegmentWriteOutMediumFactory.instance()), indexIO,
                toolchest, factory, tempFolder, jsonModulesToRegister, Closer.create());
    }

    public static AggregationTestHelper createTopNQueryAggregationTestHelper(
            List<? extends Module> jsonModulesToRegister, TemporaryFolder tempFolder) {
        ObjectMapper mapper = TestHelper.makeJsonMapper();

        TopNQueryQueryToolChest toolchest = new TopNQueryQueryToolChest(new TopNQueryConfig(),
                QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator());

        final CloseableStupidPool<ByteBuffer> pool = new CloseableStupidPool<>("TopNQueryRunnerFactory-bufferPool",
                new Supplier<ByteBuffer>() {
                    @Override
                    public ByteBuffer get() {
                        return ByteBuffer.allocate(10 * 1024 * 1024);
                    }
                });
        final Closer resourceCloser = Closer.create();
        TopNQueryRunnerFactory factory = new TopNQueryRunnerFactory(pool, toolchest,
                QueryRunnerTestHelper.NOOP_QUERYWATCHER);

        IndexIO indexIO = new IndexIO(mapper, new ColumnConfig() {
            @Override
            public int columnCacheSizeBytes() {
                return 0;
            }
        });

        return new AggregationTestHelper(mapper,
                new IndexMergerV9(mapper, indexIO, OffHeapMemorySegmentWriteOutMediumFactory.instance()), indexIO,
                toolchest, factory, tempFolder, jsonModulesToRegister, resourceCloser);
    }

    public Sequence<Row> createIndexAndRunQueryOnSegment(File inputDataFile, String parserJson, String aggregators,
            long minTimestamp, Granularity gran, int maxRowCount, String groupByQueryJson) throws Exception {
        File segmentDir = tempFolder.newFolder();
        createIndex(inputDataFile, parserJson, aggregators, segmentDir, minTimestamp, gran, maxRowCount, true);
        return runQueryOnSegments(Collections.singletonList(segmentDir), groupByQueryJson);
    }

    public Sequence<Row> createIndexAndRunQueryOnSegment(File inputDataFile, String parserJson, String aggregators,
            long minTimestamp, Granularity gran, int maxRowCount, boolean rollup, String groupByQueryJson)
            throws Exception {
        File segmentDir = tempFolder.newFolder();
        createIndex(inputDataFile, parserJson, aggregators, segmentDir, minTimestamp, gran, maxRowCount, rollup);
        return runQueryOnSegments(Collections.singletonList(segmentDir), groupByQueryJson);
    }

    public Sequence<Row> createIndexAndRunQueryOnSegment(InputStream inputDataStream, String parserJson,
            String aggregators, long minTimestamp, Granularity gran, int maxRowCount, String groupByQueryJson)
            throws Exception {
        return createIndexAndRunQueryOnSegment(inputDataStream, parserJson, aggregators, minTimestamp, gran,
                maxRowCount, true, groupByQueryJson);
    }

    public Sequence<Row> createIndexAndRunQueryOnSegment(InputStream inputDataStream, String parserJson,
            String aggregators, long minTimestamp, Granularity gran, int maxRowCount, boolean rollup,
            String groupByQueryJson) throws Exception {
        File segmentDir = tempFolder.newFolder();
        createIndex(inputDataStream, parserJson, aggregators, segmentDir, minTimestamp, gran, maxRowCount, rollup);
        return runQueryOnSegments(Collections.singletonList(segmentDir), groupByQueryJson);
    }

    public void createIndex(File inputDataFile, String parserJson, String aggregators, File outDir,
            long minTimestamp, Granularity gran, int maxRowCount) throws Exception {
        createIndex(new FileInputStream(inputDataFile), parserJson, aggregators, outDir, minTimestamp, gran,
                maxRowCount, true);
    }

    public void createIndex(File inputDataFile, String parserJson, String aggregators, File outDir,
            long minTimestamp, Granularity gran, int maxRowCount, boolean rollup) throws Exception {
        createIndex(new FileInputStream(inputDataFile), parserJson, aggregators, outDir, minTimestamp, gran,
                maxRowCount, rollup);
    }

    public void createIndex(InputStream inputDataStream, String parserJson, String aggregators, File outDir,
            long minTimestamp, Granularity gran, int maxRowCount, boolean rollup) throws Exception {
        try {
            StringInputRowParser parser = mapper.readValue(parserJson, StringInputRowParser.class);

            LineIterator iter = IOUtils.lineIterator(inputDataStream, "UTF-8");
            List<AggregatorFactory> aggregatorSpecs = mapper.readValue(aggregators,
                    new TypeReference<List<AggregatorFactory>>() {
                    });

            createIndex(iter, parser, aggregatorSpecs.toArray(new AggregatorFactory[0]), outDir, minTimestamp, gran,
                    true, maxRowCount, rollup);
        } finally {
            Closeables.close(inputDataStream, true);
        }
    }

    public void createIndex(Iterator rows, InputRowParser parser, final AggregatorFactory[] metrics, File outDir,
            long minTimestamp, Granularity gran, boolean deserializeComplexMetrics, int maxRowCount, boolean rollup)
            throws Exception {
        IncrementalIndex index = null;
        List<File> toMerge = new ArrayList<>();

        try {
            index = new IncrementalIndex.Builder()
                    .setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(minTimestamp)
                            .withQueryGranularity(gran).withMetrics(metrics).withRollup(rollup).build())
                    .setDeserializeComplexMetrics(deserializeComplexMetrics).setMaxRowCount(maxRowCount)
                    .buildOnheap();

            while (rows.hasNext()) {
                Object row = rows.next();
                if (!index.canAppendRow()) {
                    File tmp = tempFolder.newFolder();
                    toMerge.add(tmp);
                    indexMerger.persist(index, tmp, new IndexSpec(), null);
                    index.close();
                    index = new IncrementalIndex.Builder()
                            .setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(minTimestamp)
                                    .withQueryGranularity(gran).withMetrics(metrics).withRollup(rollup).build())
                            .setDeserializeComplexMetrics(deserializeComplexMetrics).setMaxRowCount(maxRowCount)
                            .buildOnheap();
                }
                if (row instanceof String && parser instanceof StringInputRowParser) {
                    //Note: this is required because StringInputRowParser is InputRowParser<ByteBuffer> as opposed to
                    //InputRowsParser<String>
                    index.add(((StringInputRowParser) parser).parse((String) row));
                } else {
                    index.add(((List<InputRow>) parser.parseBatch(row)).get(0));
                }
            }

            if (toMerge.size() > 0) {
                File tmp = tempFolder.newFolder();
                toMerge.add(tmp);
                indexMerger.persist(index, tmp, new IndexSpec(), null);

                List<QueryableIndex> indexes = new ArrayList<>(toMerge.size());
                for (File file : toMerge) {
                    indexes.add(indexIO.loadIndex(file));
                }
                indexMerger.mergeQueryableIndex(indexes, rollup, metrics, outDir, new IndexSpec(), null);

                for (QueryableIndex qi : indexes) {
                    qi.close();
                }
            } else {
                indexMerger.persist(index, outDir, new IndexSpec(), null);
            }
        } finally {
            if (index != null) {
                index.close();
            }
        }
    }

    //Simulates running group-by query on individual segments as historicals would do, json serialize the results
    //from each segment, later deserialize and merge and finally return the results
    public Sequence<Row> runQueryOnSegments(final List<File> segmentDirs, final String queryJson) throws Exception {
        return runQueryOnSegments(segmentDirs, mapper.readValue(queryJson, Query.class));
    }

    public Sequence<Row> runQueryOnSegments(final List<File> segmentDirs, final Query query) {
        final List<Segment> segments = Lists.transform(segmentDirs, new Function<File, Segment>() {
            @Override
            public Segment apply(File segmentDir) {
                try {
                    return new QueryableIndexSegment("", indexIO.loadIndex(segmentDir));
                } catch (IOException ex) {
                    throw Throwables.propagate(ex);
                }
            }
        });

        try {
            return runQueryOnSegmentsObjs(segments, query);
        } finally {
            for (Segment segment : segments) {
                CloseQuietly.close(segment);
            }
        }
    }

    public Sequence<Row> runQueryOnSegmentsObjs(final List<Segment> segments, final Query query) {
        final FinalizeResultsQueryRunner baseRunner = new FinalizeResultsQueryRunner(
                toolChest.postMergeQueryDecoration(toolChest.mergeResults(
                        toolChest.preMergeQueryDecoration(factory.mergeRunners(MoreExecutors.sameThreadExecutor(),
                                Lists.transform(segments, new Function<Segment, QueryRunner>() {
                                    @Override
                                    public QueryRunner apply(final Segment segment) {
                                        try {
                                            return makeStringSerdeQueryRunner(mapper, toolChest,
                                                    factory.createRunner(segment));
                                        } catch (Exception ex) {
                                            throw Throwables.propagate(ex);
                                        }
                                    }
                                }))))),
                toolChest);

        return baseRunner.run(QueryPlus.wrap(query), Maps.newHashMap());
    }

    public QueryRunner<Row> makeStringSerdeQueryRunner(final ObjectMapper mapper, final QueryToolChest toolChest,
            final QueryRunner<Row> baseRunner) {
        return new QueryRunner<Row>() {
            @Override
            public Sequence<Row> run(QueryPlus<Row> queryPlus, Map<String, Object> map) {
                try {
                    Sequence<Row> resultSeq = baseRunner.run(queryPlus, Maps.newHashMap());
                    final Yielder yielder = resultSeq.toYielder(null, new YieldingAccumulator() {
                        @Override
                        public Object accumulate(Object accumulated, Object in) {
                            yield();
                            return in;
                        }
                    });
                    String resultStr = mapper.writer().writeValueAsString(yielder);

                    List resultRows = Lists.transform(readQueryResultArrayFromString(resultStr),
                            toolChest.makePreComputeManipulatorFn(queryPlus.getQuery(),
                                    MetricManipulatorFns.deserializing()));
                    return Sequences.simple(resultRows);
                } catch (Exception ex) {
                    throw Throwables.propagate(ex);
                }
            }
        };
    }

    private List readQueryResultArrayFromString(String str) throws Exception {
        List result = new ArrayList();

        JsonParser jp = mapper.getFactory().createParser(str);

        if (jp.nextToken() != JsonToken.START_ARRAY) {
            throw new IAE("not an array [%s]", str);
        }

        ObjectCodec objectCodec = jp.getCodec();

        while (jp.nextToken() != JsonToken.END_ARRAY) {
            result.add(objectCodec.readValue(jp, toolChest.getResultTypeReference()));
        }
        return result;
    }

    public ObjectMapper getObjectMapper() {
        return mapper;
    }

    public <T> T[] runRelocateVerificationTest(AggregatorFactory factory, ColumnSelectorFactory selector,
            Class<T> clazz) {
        T[] results = (T[]) Array.newInstance(clazz, 2);
        BufferAggregator agg = factory.factorizeBuffered(selector);
        ByteBuffer myBuf = ByteBuffer.allocate(10040902);
        agg.init(myBuf, 0);
        agg.aggregate(myBuf, 0);
        results[0] = (T) agg.get(myBuf, 0);

        byte[] theBytes = new byte[factory.getMaxIntermediateSizeWithNulls()];
        myBuf.get(theBytes);

        ByteBuffer newBuf = ByteBuffer.allocate(941209);
        newBuf.position(7574);
        newBuf.put(theBytes);
        newBuf.position(0);
        agg.relocate(0, 7574, myBuf, newBuf);
        results[1] = (T) agg.get(newBuf, 7574);
        return results;
    }

    @Override
    public void close() throws IOException {
        resourceCloser.close();
    }
}