Java tutorial
/* (c) 2014 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. */ package com.linkedin.cubert.plan.physical; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; import org.codehaus.jackson.JsonNode; import org.codehaus.jackson.node.ObjectNode; import com.linkedin.cubert.utils.ExecutionConfig; import com.linkedin.cubert.utils.JsonUtils; import com.linkedin.cubert.utils.ScriptStats; import com.linkedin.cubert.utils.print; /** * Parses and executes a phyiscal plan specified as a json script. * * @author Maneesh Varshney * */ public class ExecutorService { /* State of executing the script */ public static enum ExecutionState { RUNNING, FINISHED } static final int MAX_PARALLEL_JOBS = 5; static final double PROGRESS_UPDATE = 0.04; /* The program json */ private final JsonNode json; /* the total number of jobs */ private int nJobs; /* Whether to run profiling on the job */ private final boolean profileMode; /* Objects to gather execution statistics */ private ScriptStats scriptStats; /* Scheduled Jobs maintain the list of jobs to be executed */ final List<JobExecutor> scheduledJobs; /* The logger thread is a separate thread that monitors the progress of the jobs and reports the status to the user * The thread mechanism is required to report combined status of jobs running in parallel. */ final LoggerThread loggerThread; /* jobsToLog retains the members which are currently running */ final List<JobExecutor> jobsToLog; ExecutionState execState = ExecutionState.RUNNING; public ExecutorService(JsonNode json) { this.json = json; profileMode = json.has("profileMode") && json.get("profileMode").getBooleanValue(); setupConf(this.json); try { scriptStats = new ScriptStats(); scriptStats.init(json); } catch (Exception e) { scriptStats = null; } nJobs = json.get("jobs").size(); scheduledJobs = new ArrayList<JobExecutor>(nJobs); jobsToLog = new ArrayList<JobExecutor>(nJobs); loggerThread = new LoggerThread(this); } private static void setupConf(JsonNode programNode) { // copy the hadoopConf and libjars from global level to each job JsonNode hadoopConf = programNode.get("hadoopConf"); JsonNode libjars = programNode.get("libjars"); for (JsonNode job : programNode.path("jobs")) { ObjectNode onode = (ObjectNode) job; if (hadoopConf != null) onode.put("hadoopConf", hadoopConf); if (libjars != null) onode.put("libjars", libjars); } } /** * Public API to execute all the jobs in the program. * The jobs are executed serially or in parallel decided on the ExecutionConfig method isParallelExec * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException * @throws InstantiationException * @throws IllegalAccessException */ public void execute() throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException { try { loggerThread.setnJobsToExecute(nJobs); loggerThread.start(); if (ExecutionConfig.getInstance().isParallelExec()) { System.out.println("Executing jobs in parallel"); new ThreadPoolManager(this, MAX_PARALLEL_JOBS, this.json).execute(); } else { System.out.println("Executing jobs serially"); for (int jobId = 0; jobId < nJobs; jobId++) { executeJobId(jobId); } } finish(); onCompletion(); } finally { execState = ExecutionState.FINISHED; } } /** * Public API to execute a single job in the program. * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException * @throws InstantiationException * @throws IllegalAccessException */ public void execute(int jobId) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException { loggerThread.setnJobsToExecute(1); try { loggerThread.start(); executeJobId(jobId); finish(); } finally { execState = ExecutionState.FINISHED; } } private void executeJobId(int jobId) throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException { JsonNode job = json.get("jobs").get(jobId); JobExecutor jobExecutor = createJobExecutor(job); synchronized (scheduledJobs) { scheduledJobs.add(jobExecutor); } synchronized (jobsToLog) { jobsToLog.add(jobExecutor); } executeJob(jobExecutor); } JobExecutor createJobExecutor(JsonNode job) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException { print.f("Executing job [%s]....", JsonUtils.getText(job, "name")); if (job.has("jobType")) { String jobType = job.get("jobType").getTextValue(); if (jobType.equals("GENERATE_DICTIONARY")) { return new DictionaryExecutor(job.toString(), profileMode); } else { throw new IllegalArgumentException("Job type " + jobType + " is not recognized"); } } else { return new JobExecutor(job.toString(), profileMode); } } void executeJob(JobExecutor jobExecutor) throws IOException, InterruptedException, ClassNotFoundException { scriptStats.setStartTime(jobExecutor.job); jobExecutor.run(false); print.f("Finished job [%s]....", JsonUtils.getText(jobExecutor.root, "name")); scriptStats.addJob(jobExecutor.job); } private void finish() throws IOException { /* Update user that the program execution is complete */ System.out.println("100% complete"); if (scriptStats != null) { scriptStats.computeAggregate(); scriptStats.printAggregate(); } } private void onCompletion() throws IOException { if (json.has("onCompletion") && !json.get("onCompletion").isNull()) { JsonNode tasks = json.get("onCompletion"); FileSystem fs = FileSystem.get(new JobConf()); for (int i = 0; i < tasks.size(); i++) { try { final JsonNode task = tasks.get(i); final String taskType = JsonUtils.getText(task, "type"); final String[] paths = JsonUtils.asArray(task, "paths"); if (taskType.equals("rm")) { for (String path : paths) { System.out.println("Deleting path " + path + "..."); fs.delete(new Path(path), true); } } else if (taskType.equals("mv")) { System.out.println("Moving " + paths[0] + " to " + paths[1] + "..."); final Path from = new Path(paths[0]); final Path to = new Path(paths[1]); fs.delete(to, true); fs.rename(from, to); } } catch (IOException e) { System.err.println("ERROR: " + e.getMessage()); } } } } }