com.linkedin.cubert.plan.physical.ExecutorService.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.cubert.plan.physical.ExecutorService.java

Source

/* (c) 2014 LinkedIn Corp. All rights reserved.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use
 * this file except in compliance with the License. You may obtain a copy of the
 * License at  http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied.
 */

package com.linkedin.cubert.plan.physical;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.node.ObjectNode;

import com.linkedin.cubert.utils.ExecutionConfig;
import com.linkedin.cubert.utils.JsonUtils;
import com.linkedin.cubert.utils.ScriptStats;
import com.linkedin.cubert.utils.print;

/**
 * Parses and executes a phyiscal plan specified as a json script.
 * 
 * @author Maneesh Varshney
 * 
 */
public class ExecutorService {
    /* State of executing the script */
    public static enum ExecutionState {
        RUNNING, FINISHED
    }

    static final int MAX_PARALLEL_JOBS = 5;
    static final double PROGRESS_UPDATE = 0.04;

    /* The program json */
    private final JsonNode json;

    /* the total number of jobs */
    private int nJobs;

    /* Whether to run profiling on the job */
    private final boolean profileMode;

    /* Objects to gather execution statistics */
    private ScriptStats scriptStats;

    /* Scheduled Jobs maintain the list of jobs to be executed */
    final List<JobExecutor> scheduledJobs;

    /* The logger thread is a separate thread that monitors the progress of the jobs and reports the status to the user
     * The thread mechanism is required to report combined status of jobs running in parallel. */
    final LoggerThread loggerThread;

    /* jobsToLog retains the members which are currently running */
    final List<JobExecutor> jobsToLog;

    ExecutionState execState = ExecutionState.RUNNING;

    public ExecutorService(JsonNode json) {
        this.json = json;
        profileMode = json.has("profileMode") && json.get("profileMode").getBooleanValue();

        setupConf(this.json);
        try {
            scriptStats = new ScriptStats();
            scriptStats.init(json);
        } catch (Exception e) {
            scriptStats = null;
        }

        nJobs = json.get("jobs").size();

        scheduledJobs = new ArrayList<JobExecutor>(nJobs);
        jobsToLog = new ArrayList<JobExecutor>(nJobs);
        loggerThread = new LoggerThread(this);
    }

    private static void setupConf(JsonNode programNode) {
        // copy the hadoopConf and libjars from global level to each job
        JsonNode hadoopConf = programNode.get("hadoopConf");
        JsonNode libjars = programNode.get("libjars");

        for (JsonNode job : programNode.path("jobs")) {
            ObjectNode onode = (ObjectNode) job;
            if (hadoopConf != null)
                onode.put("hadoopConf", hadoopConf);
            if (libjars != null)
                onode.put("libjars", libjars);
        }
    }

    /**
     * Public API to execute all the jobs in the program.
     * The jobs are executed serially or in parallel decided on the ExecutionConfig method isParallelExec
     *
     * @throws IOException
     * @throws InterruptedException
     * @throws ClassNotFoundException
     * @throws InstantiationException
     * @throws IllegalAccessException
     */
    public void execute() throws IOException, InterruptedException, ClassNotFoundException, InstantiationException,
            IllegalAccessException {
        try {
            loggerThread.setnJobsToExecute(nJobs);
            loggerThread.start();

            if (ExecutionConfig.getInstance().isParallelExec()) {
                System.out.println("Executing jobs in parallel");
                new ThreadPoolManager(this, MAX_PARALLEL_JOBS, this.json).execute();
            } else {
                System.out.println("Executing jobs serially");
                for (int jobId = 0; jobId < nJobs; jobId++) {
                    executeJobId(jobId);
                }
            }
            finish();
            onCompletion();
        } finally {
            execState = ExecutionState.FINISHED;
        }
    }

    /**
     * Public API to execute a single job in the program.
     *
     * @throws IOException
     * @throws InterruptedException
     * @throws ClassNotFoundException
     * @throws InstantiationException
     * @throws IllegalAccessException
     */
    public void execute(int jobId) throws IOException, InterruptedException, ClassNotFoundException,
            InstantiationException, IllegalAccessException {
        loggerThread.setnJobsToExecute(1);
        try {
            loggerThread.start();
            executeJobId(jobId);
            finish();
        } finally {
            execState = ExecutionState.FINISHED;
        }
    }

    private void executeJobId(int jobId) throws IOException, InterruptedException, ClassNotFoundException,
            InstantiationException, IllegalAccessException {
        JsonNode job = json.get("jobs").get(jobId);
        JobExecutor jobExecutor = createJobExecutor(job);
        synchronized (scheduledJobs) {
            scheduledJobs.add(jobExecutor);
        }
        synchronized (jobsToLog) {
            jobsToLog.add(jobExecutor);
        }
        executeJob(jobExecutor);
    }

    JobExecutor createJobExecutor(JsonNode job)
            throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
        print.f("Executing job [%s]....", JsonUtils.getText(job, "name"));

        if (job.has("jobType")) {
            String jobType = job.get("jobType").getTextValue();
            if (jobType.equals("GENERATE_DICTIONARY")) {
                return new DictionaryExecutor(job.toString(), profileMode);
            } else {
                throw new IllegalArgumentException("Job type " + jobType + " is not recognized");
            }
        } else {
            return new JobExecutor(job.toString(), profileMode);
        }
    }

    void executeJob(JobExecutor jobExecutor) throws IOException, InterruptedException, ClassNotFoundException {
        scriptStats.setStartTime(jobExecutor.job);
        jobExecutor.run(false);
        print.f("Finished job [%s]....", JsonUtils.getText(jobExecutor.root, "name"));
        scriptStats.addJob(jobExecutor.job);
    }

    private void finish() throws IOException {
        /* Update user that the program execution is complete */
        System.out.println("100% complete");

        if (scriptStats != null) {
            scriptStats.computeAggregate();
            scriptStats.printAggregate();
        }
    }

    private void onCompletion() throws IOException {
        if (json.has("onCompletion") && !json.get("onCompletion").isNull()) {
            JsonNode tasks = json.get("onCompletion");
            FileSystem fs = FileSystem.get(new JobConf());

            for (int i = 0; i < tasks.size(); i++) {
                try {
                    final JsonNode task = tasks.get(i);
                    final String taskType = JsonUtils.getText(task, "type");
                    final String[] paths = JsonUtils.asArray(task, "paths");

                    if (taskType.equals("rm")) {
                        for (String path : paths) {
                            System.out.println("Deleting path " + path + "...");
                            fs.delete(new Path(path), true);
                        }
                    } else if (taskType.equals("mv")) {
                        System.out.println("Moving " + paths[0] + " to " + paths[1] + "...");

                        final Path from = new Path(paths[0]);
                        final Path to = new Path(paths[1]);
                        fs.delete(to, true);
                        fs.rename(from, to);
                    }
                } catch (IOException e) {
                    System.err.println("ERROR: " + e.getMessage());
                }
            }
        }
    }
}