cascading.stats.hadoop.HadoopStepStats.java Source code

Java tutorial

Introduction

Here is the source code for cascading.stats.hadoop.HadoopStepStats.java

Source

/*
 * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
 *
 * Project and contact information: http://www.cascading.org/
 *
 * This file is part of the Cascading project.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cascading.stats.hadoop;

import java.io.IOException;
import java.util.Iterator;

import cascading.flow.FlowException;
import cascading.flow.FlowNode;
import cascading.flow.FlowStep;
import cascading.flow.planner.BaseFlowStep;
import cascading.management.state.ClientState;
import cascading.util.Util;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.TaskCompletionEvent;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** Class HadoopStepStats provides Hadoop specific statistics and methods to underlying Hadoop facilities. */
public abstract class HadoopStepStats extends BaseHadoopStepStats<RunningJob, Counters> {
    private static final Logger LOG = LoggerFactory.getLogger(HadoopStepStats.class);

    private HadoopNodeStats mapperNodeStats;
    private HadoopNodeStats reducerNodeStats;

    protected static Job getJob(RunningJob runningJob) {
        Job job = Util.returnInstanceFieldIfExistsSafe(runningJob, "job");

        if (job == null) {
            LOG.warn(
                    "unable to get underlying org.apache.hadoop.mapreduce.Job from org.apache.hadoop.mapred.RunningJob, task level task counters will be unavailable");
            return null;
        }

        return job;
    }

    protected HadoopStepStats(FlowStep<JobConf> flowStep, ClientState clientState) {
        super(flowStep, clientState);

        BaseFlowStep<JobConf> step = (BaseFlowStep<JobConf>) flowStep;

        Iterator<FlowNode> iterator = step.getFlowNodeGraph().getTopologicalIterator();

        mapperNodeStats = new HadoopNodeStats(this, getConfig(), HadoopSliceStats.Kind.MAPPER, iterator.next(),
                clientState);

        addNodeStats(mapperNodeStats);

        if (iterator.hasNext()) {
            reducerNodeStats = new HadoopNodeStats(this, getConfig(), HadoopSliceStats.Kind.REDUCER,
                    iterator.next(), clientState);
            addNodeStats(reducerNodeStats);
        }

        counterCache = new HadoopStepCounterCache(this, (Configuration) getConfig()) {
            @Override
            protected RunningJob getJobStatusClient() {
                return HadoopStepStats.this.getJobStatusClient();
            }
        };
    }

    private Configuration getConfig() {
        return (Configuration) this.getFlowStep().getConfig();
    }

    /**
     * Method getNumMapTasks returns the numMapTasks from the Hadoop job file.
     *
     * @return the numMapTasks (type int) of this HadoopStepStats object.
     */
    public int getNumMapTasks() {
        return mapperNodeStats.getChildren().size();
    }

    /**
     * Method getNumReduceTasks returns the numReducerTasks from the Hadoop job file.
     *
     * @return the numReducerTasks (type int) of this HadoopStepStats object.
     */
    public int getNumReduceTasks() {
        return reducerNodeStats == null ? 0 : reducerNodeStats.getChildren().size();
    }

    @Override
    public String getProcessStepID() {
        if (getJobStatusClient() == null)
            return null;

        return getJobStatusClient().getJobID().toString();
    }

    /**
     * Method getJobClient returns the Hadoop {@link JobClient} managing this Hadoop job.
     *
     * @return the jobClient (type JobClient) of this HadoopStepStats object.
     */
    public abstract JobClient getJobClient();

    /**
     * Returns the underlying Map tasks progress percentage.
     * <p/>
     * This method is experimental.
     *
     * @return float
     */
    public float getMapProgress() {
        Job runningJob = getJob(getJobStatusClient());

        if (runningJob == null)
            return 0;

        try {
            return runningJob.mapProgress();
        } catch (IOException exception) {
            throw new FlowException("unable to get progress");
        }
    }

    /**
     * Returns the underlying Reduce tasks progress percentage.
     * <p/>
     * This method is experimental.
     *
     * @return float
     */
    public float getReduceProgress() {
        Job runningJob = getJob(getJobStatusClient());

        if (runningJob == null)
            return 0;

        try {
            return runningJob.reduceProgress();
        } catch (IOException exception) {
            throw new FlowException("unable to get progress");
        }
    }

    public String getStatusURL() {
        Job runningJob = getJob(getJobStatusClient());

        if (runningJob == null)
            return null;

        return runningJob.getTrackingURL();
    }

    private boolean stepHasReducers() {
        return getFlowStep().getNumFlowNodes() > 1;
    }

    /** Method captureDetail captures statistics task details and completion events. */
    @Override
    public synchronized void captureDetail(Type depth) {
        if (!getType().isChild(depth))
            return;

        Job runningJob = getJob(getJobStatusClient());

        if (runningJob == null)
            return;

        try {
            mapperNodeStats.captureDetail(depth);

            if (reducerNodeStats != null)
                reducerNodeStats.captureDetail(depth);

            int count = 0;

            while (depth == Type.ATTEMPT) {
                // todo: we may be able to continue where we left off if we retain the count
                TaskCompletionEvent[] events = runningJob.getTaskCompletionEvents(count);

                if (events.length == 0)
                    break;

                addAttemptsToTaskStats(events);
                count += events.length;
            }
        } catch (IOException exception) {
            LOG.warn("unable to get task stats", exception);
        }
    }

    private void addAttemptsToTaskStats(TaskCompletionEvent[] events) {
        for (TaskCompletionEvent event : events) {
            if (event == null) {
                LOG.warn("found empty completion event");
                continue;
            }

            if (event.isMapTask())
                mapperNodeStats.addAttempt(event);
            else
                reducerNodeStats.addAttempt(event);
        }
    }
}