co.cask.cdap.app.mapreduce.LocalMRJobInfoFetcher.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.app.mapreduce.LocalMRJobInfoFetcher.java

Source

/*
 * Copyright  2015 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.app.mapreduce;

import co.cask.cdap.api.dataset.lib.cube.AggregationFunction;
import co.cask.cdap.api.dataset.lib.cube.TimeValue;
import co.cask.cdap.api.metrics.MetricDataQuery;
import co.cask.cdap.api.metrics.MetricStore;
import co.cask.cdap.api.metrics.MetricTimeSeries;
import co.cask.cdap.app.metrics.MapReduceMetrics;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.proto.Id;
import co.cask.cdap.proto.MRJobInfo;
import co.cask.cdap.proto.MRTaskInfo;
import co.cask.cdap.proto.ProgramType;
import com.google.common.base.Preconditions;
import com.google.common.collect.HashBasedTable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Table;
import com.google.inject.Inject;
import org.apache.hadoop.mapreduce.TaskCounter;

import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Retrieves information/reports for a MapReduce run via the Metrics system.
 */
public class LocalMRJobInfoFetcher implements MRJobInfoFetcher {

    private final MetricStore metricStore;

    @Inject
    public LocalMRJobInfoFetcher(MetricStore metricStore) {
        this.metricStore = metricStore;
    }

    /**
     * @param runId for which information will be returned.
     * @return a {@link MRJobInfo} containing information about a particular MapReduce program run.
     */
    public MRJobInfo getMRJobInfo(Id.Run runId) {
        Preconditions.checkArgument(ProgramType.MAPREDUCE.equals(runId.getProgram().getType()));

        // baseTags has tag keys: ns.app.mr.runid
        Map<String, String> baseTags = Maps.newHashMap();
        baseTags.put(Constants.Metrics.Tag.NAMESPACE, runId.getNamespace().getId());
        baseTags.put(Constants.Metrics.Tag.APP, runId.getProgram().getApplicationId());
        baseTags.put(Constants.Metrics.Tag.MAPREDUCE, runId.getProgram().getId());
        baseTags.put(Constants.Metrics.Tag.RUN_ID, runId.getId());

        Map<String, String> mapTags = Maps.newHashMap(baseTags);
        mapTags.put(Constants.Metrics.Tag.MR_TASK_TYPE, MapReduceMetrics.TaskType.Mapper.getId());

        Map<String, String> reduceTags = Maps.newHashMap(baseTags);
        reduceTags.put(Constants.Metrics.Tag.MR_TASK_TYPE, MapReduceMetrics.TaskType.Reducer.getId());

        // map from RunId -> (CounterName -> CounterValue)
        Table<String, String, Long> mapTaskMetrics = HashBasedTable.create();
        Table<String, String, Long> reduceTaskMetrics = HashBasedTable.create();

        // Populate mapTaskMetrics and reduce Task Metrics via MetricStore. Used to construct MRTaskInfo below.
        Map<String, String> metricNamesToCounters = Maps.newHashMap();
        metricNamesToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_INPUT_RECORDS),
                TaskCounter.MAP_INPUT_RECORDS.name());
        metricNamesToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_OUTPUT_RECORDS),
                TaskCounter.MAP_OUTPUT_RECORDS.name());
        metricNamesToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_BYTES),
                TaskCounter.MAP_OUTPUT_BYTES.name());
        metricNamesToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_COMPLETION),
                MapReduceMetrics.METRIC_TASK_COMPLETION);

        // get metrics grouped by instance-id for the map tasks
        queryGroupedAggregates(mapTags, mapTaskMetrics, metricNamesToCounters);

        Map<String, Long> mapProgress = Maps.newHashMap();
        if (mapTaskMetrics.columnMap().containsKey(MapReduceMetrics.METRIC_TASK_COMPLETION)) {
            mapProgress = Maps
                    .newHashMap(mapTaskMetrics.columnMap().remove(MapReduceMetrics.METRIC_TASK_COMPLETION));
        }

        Map<String, String> reduceMetricsToCounters = Maps.newHashMap();
        reduceMetricsToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_INPUT_RECORDS),
                TaskCounter.REDUCE_INPUT_RECORDS.name());
        reduceMetricsToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_OUTPUT_RECORDS),
                TaskCounter.REDUCE_OUTPUT_RECORDS.name());
        reduceMetricsToCounters.put(prependSystem(MapReduceMetrics.METRIC_TASK_COMPLETION),
                MapReduceMetrics.METRIC_TASK_COMPLETION);

        // get metrics grouped by instance-id for the map tasks
        queryGroupedAggregates(reduceTags, reduceTaskMetrics, reduceMetricsToCounters);

        Map<String, Long> reduceProgress = Maps.newHashMap();
        if (reduceTaskMetrics.columnMap().containsKey(MapReduceMetrics.METRIC_TASK_COMPLETION)) {
            reduceProgress = Maps
                    .newHashMap(reduceTaskMetrics.columnMap().remove(MapReduceMetrics.METRIC_TASK_COMPLETION));
        }

        // Construct MRTaskInfos from the information we can get from Metric system.
        List<MRTaskInfo> mapTaskInfos = Lists.newArrayList();
        for (Map.Entry<String, Map<String, Long>> taskEntry : mapTaskMetrics.rowMap().entrySet()) {
            String mapTaskId = taskEntry.getKey();
            mapTaskInfos.add(new MRTaskInfo(mapTaskId, null, null, null, mapProgress.get(mapTaskId) / 100.0F,
                    taskEntry.getValue()));
        }

        List<MRTaskInfo> reduceTaskInfos = Lists.newArrayList();
        for (Map.Entry<String, Map<String, Long>> taskEntry : reduceTaskMetrics.rowMap().entrySet()) {
            String reduceTaskId = taskEntry.getKey();
            reduceTaskInfos.add(new MRTaskInfo(reduceTaskId, null, null, null,
                    reduceProgress.get(reduceTaskId) / 100.0F, taskEntry.getValue()));
        }

        return getJobCounters(mapTags, reduceTags, mapTaskInfos, reduceTaskInfos);
    }

    private MRJobInfo getJobCounters(Map<String, String> mapTags, Map<String, String> reduceTags,
            List<MRTaskInfo> mapTaskInfos, List<MRTaskInfo> reduceTaskInfos) {
        HashMap<String, Long> metrics = Maps.newHashMap();

        Map<String, String> mapMetricsToCounters = ImmutableMap.of(
                prependSystem(MapReduceMetrics.METRIC_INPUT_RECORDS), TaskCounter.MAP_INPUT_RECORDS.name(),
                prependSystem(MapReduceMetrics.METRIC_OUTPUT_RECORDS), TaskCounter.MAP_OUTPUT_RECORDS.name(),
                prependSystem(MapReduceMetrics.METRIC_BYTES), TaskCounter.MAP_OUTPUT_BYTES.name(),
                prependSystem(MapReduceMetrics.METRIC_COMPLETION), MapReduceMetrics.METRIC_COMPLETION);

        getAggregates(mapTags, mapMetricsToCounters, metrics);
        float mapProgress = metrics.remove(MapReduceMetrics.METRIC_COMPLETION) / 100.0F;

        Map<String, String> reduceMetricsToCounters = ImmutableMap.of(
                prependSystem(MapReduceMetrics.METRIC_INPUT_RECORDS), TaskCounter.REDUCE_INPUT_RECORDS.name(),
                prependSystem(MapReduceMetrics.METRIC_OUTPUT_RECORDS), TaskCounter.REDUCE_OUTPUT_RECORDS.name(),
                prependSystem(MapReduceMetrics.METRIC_COMPLETION), MapReduceMetrics.METRIC_COMPLETION);

        getAggregates(reduceTags, reduceMetricsToCounters, metrics);
        float reduceProgress = metrics.remove(MapReduceMetrics.METRIC_COMPLETION) / 100.0F;
        return new MRJobInfo(mapProgress, reduceProgress, metrics, mapTaskInfos, reduceTaskInfos, false);
    }

    private String prependSystem(String metric) {
        return "system." + metric;
    }

    private void getAggregates(Map<String, String> tags, Map<String, String> metricsToCounters,
            Map<String, Long> result) {
        Map<String, AggregationFunction> metrics = Maps.newHashMap();
        // all map-reduce metrics are gauges
        for (String metric : metricsToCounters.keySet()) {
            metrics.put(metric, AggregationFunction.LATEST);
        }
        MetricDataQuery metricDataQuery = new MetricDataQuery(0, Integer.MAX_VALUE, Integer.MAX_VALUE, metrics,
                tags, ImmutableList.<String>of());
        Collection<MetricTimeSeries> query = metricStore.query(metricDataQuery);
        // initialize elements to zero
        for (String counterName : metricsToCounters.values()) {
            result.put(counterName, 0L);
        }
        for (MetricTimeSeries metricTimeSeries : query) {
            List<TimeValue> timeValues = metricTimeSeries.getTimeValues();
            TimeValue timeValue = Iterables.getOnlyElement(timeValues);
            result.put(metricsToCounters.get(metricTimeSeries.getMetricName()), timeValue.getValue());
        }
    }

    // queries MetricStore for one metric across all tasks of a certain TaskType, using GroupBy InstanceId
    private void queryGroupedAggregates(Map<String, String> tags, Table<String, String, Long> allTaskMetrics,
            Map<String, String> metricsToCounters) {
        Map<String, AggregationFunction> metrics = Maps.newHashMap();
        // all map-reduce metrics are gauges
        for (String metric : metricsToCounters.keySet()) {
            metrics.put(metric, AggregationFunction.LATEST);
        }

        MetricDataQuery metricDataQuery = new MetricDataQuery(0, Integer.MAX_VALUE, Integer.MAX_VALUE, metrics,
                tags, ImmutableList.of(Constants.Metrics.Tag.INSTANCE_ID));
        Collection<MetricTimeSeries> query = metricStore.query(metricDataQuery);

        for (MetricTimeSeries metricTimeSeries : query) {
            List<TimeValue> timeValues = metricTimeSeries.getTimeValues();
            TimeValue timeValue = Iterables.getOnlyElement(timeValues);
            String taskId = metricTimeSeries.getTagValues().get(Constants.Metrics.Tag.INSTANCE_ID);
            allTaskMetrics.put(taskId, metricsToCounters.get(metricTimeSeries.getMetricName()),
                    timeValue.getValue());
        }
    }
}