Example usage for org.apache.hadoop.mapred RunningJob isComplete

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RunningJob isComplete.

Prototype

public boolean isComplete() throws IOException;

Source Link

Document

Check if the job is finished or not.

Usage

From source file:org.apache.oozie.command.wf.TestWorkflowActionKillXCommand.java

License:Apache License

public void testWfActionKillChildJob() throws Exception {
    String externalJobID = launchSleepJob(1000);
    String childId = launchSleepJob(1000000);

    WorkflowJobBean job = this.addRecordToWfJobTable(WorkflowJob.Status.KILLED, WorkflowInstance.Status.KILLED);
    WorkflowActionBean action = this.addRecordToWfActionTable(job.getId(), externalJobID, "1",
            WorkflowAction.Status.KILLED, childId);

    new ActionKillXCommand(action.getId()).call();
    JobClient jobClient = createJobClient();

    final RunningJob mrJob = jobClient.getJob(JobID.forName(childId));
    waitFor(60 * 1000, new Predicate() {
        public boolean evaluate() throws Exception {
            return mrJob.isComplete();
        }//  w ww  .j  a v  a2s  . c o m
    });
    assertEquals(mrJob.getJobState(), JobStatus.KILLED);
}

From source file:org.apache.oozie.service.TestRecoveryService.java

License:Apache License

/**
 * Tests functionality of the Recovery Service Runnable command. </p> Starts an action with USER_RETRY status.
 * Runs the recovery runnable, and ensures the state changes to OK and the job completes successfully.
 *
 * @throws Exception/*from   w w  w  . ja  v  a  2 s.c om*/
 */
public void testWorkflowActionRecoveryUserRetry() throws Exception {
    final JPAService jpaService = Services.get().get(JPAService.class);
    WorkflowJobBean job1 = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING,
            WorkflowInstance.Status.RUNNING);
    WorkflowActionBean action1 = this.addRecordToWfActionTable(job1.getId(), "1",
            WorkflowAction.Status.USER_RETRY);

    WorkflowJobBean job2 = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING,
            WorkflowInstance.Status.RUNNING);
    WorkflowActionBean action2 = createWorkflowActionSetPending(job2.getId(), WorkflowAction.Status.USER_RETRY);
    //Default recovery created time is 7 days.
    action2.setCreatedTime(new Date(new Date().getTime() - 8 * RecoveryService.ONE_DAY_MILLISCONDS));
    WorkflowActionInsertJPAExecutor actionInsertCmd = new WorkflowActionInsertJPAExecutor(action2);
    jpaService.execute(actionInsertCmd);

    Runnable recoveryRunnable = new RecoveryRunnable(0, 60, 60);
    recoveryRunnable.run();
    sleep(3000);

    final WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action1.getId());

    waitFor(5000, new Predicate() {
        public boolean evaluate() throws Exception {
            WorkflowActionBean a = jpaService.execute(wfActionGetCmd);
            return a.getExternalId() != null;
        }
    });
    action1 = jpaService.execute(wfActionGetCmd);
    assertNotNull(action1.getExternalId());
    assertEquals(WorkflowAction.Status.RUNNING, action1.getStatus());

    //Action 2 should not get recover as it's created time is older then 7 days
    action2 = WorkflowActionQueryExecutor.getInstance().get(WorkflowActionQuery.GET_ACTION, action2.getId());
    assertNull(action2.getExternalId());
    assertEquals(WorkflowAction.Status.USER_RETRY, action2.getStatus());

    ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(job1, action1, false, false);
    MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor();
    JobConf conf = actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action1.getConf()));
    String user = conf.get("user.name");
    String group = conf.get("group.name");
    JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf);

    String launcherId = action1.getExternalId();

    final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId));

    waitFor(240 * 1000, new Predicate() {
        public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
        }
    });
    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(),
            conf);
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));
}

From source file:org.apache.pig.backend.hadoop.executionengine.mapreduceExec.MapReduceLauncher.java

License:Apache License

/**
 * Submit a Pig job to hadoop./* w  w  w  .jav a  2s .c o  m*/
 * 
 * @param mapFuncs
 *            a list of map functions to apply to the inputs. The cardinality of the list should
 *            be the same as input's cardinality.
 * @param groupFuncs
 *            a list of grouping functions to apply to the inputs. The cardinality of the list
 *            should be the same as input's cardinality.
 * @param reduceFunc
 *            the reduce function.
 * @param mapTasks
 *            the number of map tasks to use.
 * @param reduceTasks
 *            the number of reduce tasks to use.
 * @param input
 *            a list of inputs
 * @param output
 *            the path of the output.
 * @return an indicator of success or failure.
 * @throws IOException
 */
public boolean launchPig(POMapreduce pom) throws IOException {
    JobConf conf = new JobConf(config);
    setJobProperties(conf, pom);
    Properties properties = pom.pigContext.getProperties();
    ConfigurationValidator.validatePigProperties(properties);
    String jobName = properties.getProperty(PigContext.JOB_NAME);
    conf.setJobName(jobName);
    boolean success = false;
    List<String> funcs = new ArrayList<String>();

    if (pom.toMap != null) {
        for (EvalSpec es : pom.toMap)
            funcs.addAll(es.getFuncs());
    }
    if (pom.groupFuncs != null) {
        for (EvalSpec es : pom.groupFuncs)
            funcs.addAll(es.getFuncs());
    }
    if (pom.toReduce != null) {
        funcs.addAll(pom.toReduce.getFuncs());
    }

    // create jobs.jar locally and pass it to hadoop
    File submitJarFile = File.createTempFile("Job", ".jar");
    try {
        FileOutputStream fos = new FileOutputStream(submitJarFile);
        JarManager.createJar(fos, funcs, null, pom.pigContext);
        log.debug("Job jar size = " + submitJarFile.length());
        conf.setJar(submitJarFile.getPath());
        String user = System.getProperty("user.name");
        conf.setUser(user != null ? user : "Pigster");

        conf.set("pig.spill.size.threshold", properties.getProperty("pig.spill.size.threshold"));
        conf.set("pig.spill.gc.activation.size", properties.getProperty("pig.spill.gc.activation.size"));

        if (pom.reduceParallelism != -1) {
            conf.setNumReduceTasks(pom.reduceParallelism);
        }
        if (pom.toMap != null) {
            conf.set("pig.mapFuncs", ObjectSerializer.serialize(pom.toMap));
        }
        if (pom.toCombine != null) {
            conf.set("pig.combineFunc", ObjectSerializer.serialize(pom.toCombine));
            // this is to make sure that combiner is only called once
            // since we can't handle no combine or multiple combines
            conf.setCombineOnceOnly(true);
        }
        if (pom.groupFuncs != null) {
            conf.set("pig.groupFuncs", ObjectSerializer.serialize(pom.groupFuncs));
        }
        if (pom.toReduce != null) {
            conf.set("pig.reduceFunc", ObjectSerializer.serialize(pom.toReduce));
        }
        if (pom.toSplit != null) {
            conf.set("pig.splitSpec", ObjectSerializer.serialize(pom.toSplit));
        }
        if (pom.pigContext != null) {
            conf.set("pig.pigContext", ObjectSerializer.serialize(pom.pigContext));
        }
        conf.setMapRunnerClass(PigMapReduce.class);
        if (pom.toCombine != null) {
            conf.setCombinerClass(PigCombine.class);
            //conf.setCombinerClass(PigMapReduce.class);
        }
        if (pom.quantilesFile != null) {
            conf.set("pig.quantilesFile", pom.quantilesFile);
        } else {
            // this is not a sort job - can use byte comparison to speed up processing
            conf.setOutputKeyComparatorClass(PigWritableComparator.class);
        }
        if (pom.partitionFunction != null) {
            conf.setPartitionerClass(SortPartitioner.class);
        }
        conf.setReducerClass(PigMapReduce.class);
        conf.setInputFormat(PigInputFormat.class);
        conf.setOutputFormat(PigOutputFormat.class);
        // not used starting with 0.15 conf.setInputKeyClass(Text.class);
        // not used starting with 0.15 conf.setInputValueClass(Tuple.class);
        conf.setOutputKeyClass(Tuple.class);
        if (pom.userComparator != null) {
            conf.setOutputKeyComparatorClass(pom.userComparator);
        }
        conf.setOutputValueClass(IndexedTuple.class);
        conf.set("pig.inputs", ObjectSerializer.serialize(pom.inputFileSpecs));

        conf.setOutputPath(new Path(pom.outputFileSpec.getFileName()));
        conf.set("pig.storeFunc", ObjectSerializer.serialize(pom.outputFileSpec.getFuncSpec()));

        // Setup the DistributedCache for this job
        setupDistributedCache(pom.pigContext, conf, pom.properties, "pig.streaming.ship.files", true);
        setupDistributedCache(pom.pigContext, conf, pom.properties, "pig.streaming.cache.files", false);

        // Setup the logs directory for this job
        String jobOutputFileName = pom.pigContext.getJobOutputFile();
        if (jobOutputFileName != null && jobOutputFileName.length() > 0) {
            Path jobOutputFile = new Path(pom.pigContext.getJobOutputFile());
            conf.set("pig.output.dir", jobOutputFile.getParent().toString());
            conf.set("pig.streaming.log.dir", new Path(jobOutputFile, LOG_DIR).toString());
        }

        //
        // Now, actually submit the job (using the submit name)
        //
        JobClient jobClient = execEngine.getJobClient();
        RunningJob status = jobClient.submitJob(conf);
        log.debug("submitted job: " + status.getJobID());

        long sleepTime = 1000;
        double lastQueryProgress = -1.0;
        int lastJobsQueued = -1;
        double lastMapProgress = -1.0;
        double lastReduceProgress = -1.0;
        while (true) {
            try {
                Thread.sleep(sleepTime);
            } catch (Exception e) {
            }

            if (status.isComplete()) {
                success = status.isSuccessful();
                if (log.isDebugEnabled()) {
                    StringBuilder sb = new StringBuilder();
                    sb.append("Job finished ");
                    sb.append((success ? "" : "un"));
                    sb.append("successfully");
                    log.debug(sb.toString());
                }
                if (success) {
                    mrJobNumber++;
                }
                double queryProgress = ((double) mrJobNumber) / ((double) numMRJobs);
                if (queryProgress > lastQueryProgress) {
                    if (log.isInfoEnabled()) {
                        StringBuilder sbProgress = new StringBuilder();
                        sbProgress.append("Pig progress = ");
                        sbProgress.append(((int) (queryProgress * 100)));
                        sbProgress.append("%");
                        log.info(sbProgress.toString());
                    }
                    lastQueryProgress = queryProgress;
                }
                break;
            } else // still running
            {
                double mapProgress = status.mapProgress();
                double reduceProgress = status.reduceProgress();
                if (lastMapProgress != mapProgress || lastReduceProgress != reduceProgress) {
                    if (log.isDebugEnabled()) {
                        StringBuilder sbProgress = new StringBuilder();
                        sbProgress.append("Hadoop job progress: Map=");
                        sbProgress.append((int) (mapProgress * 100));
                        sbProgress.append("% Reduce=");
                        sbProgress.append((int) (reduceProgress * 100));
                        sbProgress.append("%");
                        log.debug(sbProgress.toString());
                    }
                    lastMapProgress = mapProgress;
                    lastReduceProgress = reduceProgress;
                }
                double numJobsCompleted = mrJobNumber;
                double thisJobProgress = (mapProgress + reduceProgress) / 2.0;
                double queryProgress = (numJobsCompleted + thisJobProgress) / ((double) numMRJobs);
                if (queryProgress > lastQueryProgress) {
                    if (log.isInfoEnabled()) {
                        StringBuilder sbProgress = new StringBuilder();
                        sbProgress.append("Pig progress = ");
                        sbProgress.append(((int) (queryProgress * 100)));
                        sbProgress.append("%");
                        log.info(sbProgress.toString());
                    }
                    lastQueryProgress = queryProgress;
                }
            }
        }

        // bug 1030028: if the input file is empty; hadoop doesn't create the output file!
        Path outputFile = conf.getOutputPath();
        String outputName = outputFile.getName();
        int colon = outputName.indexOf(':');
        if (colon != -1) {
            outputFile = new Path(outputFile.getParent(), outputName.substring(0, colon));
        }

        try {
            ElementDescriptor descriptor = ((HDataStorage) (pom.pigContext.getDfs()))
                    .asElement(outputFile.toString());

            if (success && !descriptor.exists()) {

                // create an empty output file
                PigFile f = new PigFile(outputFile.toString(), false);
                f.store(BagFactory.getInstance().newDefaultBag(), new PigStorage(), pom.pigContext);
            }
        } catch (DataStorageException e) {
            throw WrappedIOException.wrap("Failed to obtain descriptor for " + outputFile.toString(), e);
        }

        if (!success) {
            // go find the error messages
            getErrorMessages(jobClient.getMapTaskReports(status.getJobID()), "map");
            getErrorMessages(jobClient.getReduceTaskReports(status.getJobID()), "reduce");
        } else {
            long timeSpent = 0;

            // NOTE: this call is crashing due to a bug in Hadoop; the bug is known and the patch has not been applied yet.
            TaskReport[] mapReports = jobClient.getMapTaskReports(status.getJobID());
            TaskReport[] reduceReports = jobClient.getReduceTaskReports(status.getJobID());
            for (TaskReport r : mapReports) {
                timeSpent += (r.getFinishTime() - r.getStartTime());
            }
            for (TaskReport r : reduceReports) {
                timeSpent += (r.getFinishTime() - r.getStartTime());
            }
            totalHadoopTimeSpent += timeSpent;
        }
    } catch (Exception e) {
        // Do we need different handling for different exceptions
        e.printStackTrace();
        throw WrappedIOException.wrap(e);
    } finally {
        submitJarFile.delete();
    }
    return success;
}

From source file:org.pentaho.di.job.entries.hadoopjobexecutor.JobEntryHadoopJobExecutor.java

License:Apache License

public Result execute(Result result, int arg1) throws KettleException {
    result.setNrErrors(0);/*from  w  w  w.  j  ava2s.c  o  m*/

    Log4jFileAppender appender = null;
    String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$

    String hadoopDistro = System.getProperty("hadoop.distribution.name", hadoopDistribution);
    hadoopDistro = environmentSubstitute(hadoopDistro);
    if (Const.isEmpty(hadoopDistro)) {
        hadoopDistro = "generic";
    }

    try {
        appender = LogWriter.createFileAppender(logFileName, true, false);
        LogWriter.getInstance().addAppender(appender);
        log.setLogLevel(parentJob.getLogLevel());
    } catch (Exception e) {
        logError(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.FailedToOpenLogFile", logFileName, //$NON-NLS-1$
                e.toString()));
        logError(Const.getStackTracker(e));
    }

    try {
        URL resolvedJarUrl = null;
        String jarUrlS = environmentSubstitute(jarUrl);
        if (jarUrlS.indexOf("://") == -1) {
            // default to file://
            File jarFile = new File(jarUrlS);
            resolvedJarUrl = jarFile.toURI().toURL();
        } else {
            resolvedJarUrl = new URL(jarUrlS);
        }

        final String cmdLineArgsS = environmentSubstitute(cmdLineArgs);

        if (log.isDetailed())
            logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.ResolvedJar",
                    resolvedJarUrl.toExternalForm()));

        if (isSimple) {
            /*      final AtomicInteger taskCount = new AtomicInteger(0);
                  final AtomicInteger successCount = new AtomicInteger(0);
                  final AtomicInteger failedCount = new AtomicInteger(0); */

            if (log.isDetailed())
                logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.SimpleMode"));
            List<Class<?>> classesWithMains = JarUtility
                    .getClassesInJarWithMain(resolvedJarUrl.toExternalForm(), getClass().getClassLoader());
            for (final Class<?> clazz : classesWithMains) {
                Runnable r = new Runnable() {
                    public void run() {
                        try {
                            final ClassLoader cl = Thread.currentThread().getContextClassLoader();
                            try {
                                //                  taskCount.incrementAndGet();
                                Thread.currentThread().setContextClassLoader(clazz.getClassLoader());
                                Method mainMethod = clazz.getMethod("main", new Class[] { String[].class });
                                Object[] args = (cmdLineArgsS != null)
                                        ? new Object[] { cmdLineArgsS.split(" ") }
                                        : new Object[0];
                                mainMethod.invoke(null, args);
                            } finally {
                                Thread.currentThread().setContextClassLoader(cl);
                                //                  successCount.incrementAndGet();
                                //                  taskCount.decrementAndGet();
                            }
                        } catch (Throwable ignored) {
                            // skip, try the next one
                            //                logError(ignored.getMessage());
                            //                failedCount.incrementAndGet();
                            ignored.printStackTrace();
                        }
                    }
                };
                Thread t = new Thread(r);
                t.start();
            }

            // uncomment to implement blocking
            /* if (blocking) {
              while (taskCount.get() > 0 && !parentJob.isStopped()) {
                Thread.sleep(1000);
              }
                    
              if (!parentJob.isStopped()) {
                result.setResult(successCount.get() > 0);
                result.setNrErrors((successCount.get() > 0) ? 0 : 1);
              } else {
                // we can't really know at this stage if 
                // the hadoop job will finish successfully 
                // because we have to stop now
                result.setResult(true); // look on the bright side of life :-)...
                result.setNrErrors(0);
              }
            } else { */
            // non-blocking - just set success equal to no failures arising
            // from invocation
            //          result.setResult(failedCount.get() == 0);
            //          result.setNrErrors(failedCount.get());
            result.setResult(true);
            result.setNrErrors(0);
            /* } */
        } else {
            if (log.isDetailed())
                logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.AdvancedMode"));

            URL[] urls = new URL[] { resolvedJarUrl };
            URLClassLoader loader = new URLClassLoader(urls, getClass().getClassLoader());

            JobConf conf = new JobConf();
            String hadoopJobNameS = environmentSubstitute(hadoopJobName);
            conf.setJobName(hadoopJobNameS);

            String outputKeyClassS = environmentSubstitute(outputKeyClass);
            conf.setOutputKeyClass(loader.loadClass(outputKeyClassS));
            String outputValueClassS = environmentSubstitute(outputValueClass);
            conf.setOutputValueClass(loader.loadClass(outputValueClassS));

            if (mapperClass != null) {
                String mapperClassS = environmentSubstitute(mapperClass);
                Class<? extends Mapper> mapper = (Class<? extends Mapper>) loader.loadClass(mapperClassS);
                conf.setMapperClass(mapper);
            }
            if (combinerClass != null) {
                String combinerClassS = environmentSubstitute(combinerClass);
                Class<? extends Reducer> combiner = (Class<? extends Reducer>) loader.loadClass(combinerClassS);
                conf.setCombinerClass(combiner);
            }
            if (reducerClass != null) {
                String reducerClassS = environmentSubstitute(reducerClass);
                Class<? extends Reducer> reducer = (Class<? extends Reducer>) loader.loadClass(reducerClassS);
                conf.setReducerClass(reducer);
            }

            if (inputFormatClass != null) {
                String inputFormatClassS = environmentSubstitute(inputFormatClass);
                Class<? extends InputFormat> inputFormat = (Class<? extends InputFormat>) loader
                        .loadClass(inputFormatClassS);
                conf.setInputFormat(inputFormat);
            }
            if (outputFormatClass != null) {
                String outputFormatClassS = environmentSubstitute(outputFormatClass);
                Class<? extends OutputFormat> outputFormat = (Class<? extends OutputFormat>) loader
                        .loadClass(outputFormatClassS);
                conf.setOutputFormat(outputFormat);
            }

            String hdfsHostnameS = environmentSubstitute(hdfsHostname);
            String hdfsPortS = environmentSubstitute(hdfsPort);
            String jobTrackerHostnameS = environmentSubstitute(jobTrackerHostname);
            String jobTrackerPortS = environmentSubstitute(jobTrackerPort);

            // See if we can auto detect the distribution first
            HadoopConfigurer configurer = HadoopConfigurerFactory.locateConfigurer();

            if (configurer == null) {
                // go with what has been selected by the user
                configurer = HadoopConfigurerFactory.getConfigurer(hadoopDistro);

                // if the user-specified distribution is detectable, make sure it is still
                // the current distribution!
                if (configurer != null && configurer.isDetectable()) {
                    if (!configurer.isAvailable()) {
                        throw new KettleException(BaseMessages.getString(PKG,
                                "JobEntryHadoopJobExecutor.Error.DistroNoLongerPresent",
                                configurer.distributionName()));
                    }
                }
            }
            if (configurer == null) {
                throw new KettleException(BaseMessages.getString(PKG,
                        "JobEntryHadoopJobExecutor.Error.UnknownHadoopDistribution", hadoopDistro));
            }
            logBasic(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.Message.DistroConfigMessage",
                    configurer.distributionName()));

            List<String> configMessages = new ArrayList<String>();
            configurer.configure(hdfsHostnameS, hdfsPortS, jobTrackerHostnameS, jobTrackerPortS, conf,
                    configMessages);
            for (String m : configMessages) {
                logBasic(m);
            }

            String inputPathS = environmentSubstitute(inputPath);
            String[] inputPathParts = inputPathS.split(",");
            List<Path> paths = new ArrayList<Path>();
            for (String path : inputPathParts) {
                paths.add(new Path(configurer.getFilesystemURL() + path));
            }
            Path[] finalPaths = paths.toArray(new Path[paths.size()]);

            //FileInputFormat.setInputPaths(conf, new Path(configurer.getFilesystemURL() + inputPathS));
            FileInputFormat.setInputPaths(conf, finalPaths);
            String outputPathS = environmentSubstitute(outputPath);
            FileOutputFormat.setOutputPath(conf, new Path(configurer.getFilesystemURL() + outputPathS));

            // process user defined values
            for (UserDefinedItem item : userDefined) {
                if (item.getName() != null && !"".equals(item.getName()) && item.getValue() != null
                        && !"".equals(item.getValue())) {
                    String nameS = environmentSubstitute(item.getName());
                    String valueS = environmentSubstitute(item.getValue());
                    conf.set(nameS, valueS);
                }
            }

            String workingDirectoryS = environmentSubstitute(workingDirectory);
            conf.setWorkingDirectory(new Path(configurer.getFilesystemURL() + workingDirectoryS));
            conf.setJar(jarUrl);

            String numMapTasksS = environmentSubstitute(numMapTasks);
            String numReduceTasksS = environmentSubstitute(numReduceTasks);
            int numM = 1;
            try {
                numM = Integer.parseInt(numMapTasksS);
            } catch (NumberFormatException e) {
                logError("Can't parse number of map tasks '" + numMapTasksS + "'. Setting num"
                        + "map tasks to 1");
            }
            int numR = 1;
            try {
                numR = Integer.parseInt(numReduceTasksS);
            } catch (NumberFormatException e) {
                logError("Can't parse number of reduce tasks '" + numReduceTasksS + "'. Setting num"
                        + "reduce tasks to 1");
            }

            conf.setNumMapTasks(numM);
            conf.setNumReduceTasks(numR);

            JobClient jobClient = new JobClient(conf);
            RunningJob runningJob = jobClient.submitJob(conf);

            String loggingIntervalS = environmentSubstitute(loggingInterval);
            int logIntv = 60;
            try {
                logIntv = Integer.parseInt(loggingIntervalS);
            } catch (NumberFormatException e) {
                logError("Can't parse logging interval '" + loggingIntervalS + "'. Setting "
                        + "logging interval to 60");
            }
            if (blocking) {
                try {
                    int taskCompletionEventIndex = 0;
                    while (!parentJob.isStopped() && !runningJob.isComplete()) {
                        if (logIntv >= 1) {
                            printJobStatus(runningJob);
                            taskCompletionEventIndex = logTaskMessages(runningJob, taskCompletionEventIndex);
                            Thread.sleep(logIntv * 1000);
                        } else {
                            Thread.sleep(60000);
                        }
                    }

                    if (parentJob.isStopped() && !runningJob.isComplete()) {
                        // We must stop the job running on Hadoop
                        runningJob.killJob();
                        // Indicate this job entry did not complete
                        result.setResult(false);
                    }

                    printJobStatus(runningJob);
                    // Log any messages we may have missed while polling
                    logTaskMessages(runningJob, taskCompletionEventIndex);
                } catch (InterruptedException ie) {
                    logError(ie.getMessage(), ie);
                }

                // Entry is successful if the MR job is successful overall
                result.setResult(runningJob.isSuccessful());
            }

        }
    } catch (Throwable t) {
        t.printStackTrace();
        result.setStopped(true);
        result.setNrErrors(1);
        result.setResult(false);
        logError(t.getMessage(), t);
    }

    if (appender != null) {
        LogWriter.getInstance().removeAppender(appender);
        appender.close();

        ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(),
                parentJob.getJobname(), getName());
        result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
    }

    return result;
}

From source file:org.pentaho.hadoop.mapreduce.test.TestSubmitMapReduceJob.java

License:Open Source License

@Test
public void submitJob() throws Exception {

    String[] args = { "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/input",
            "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/output" };

    JobConf conf = new JobConf();
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    File jar = new File("./test-res/pentaho-mapreduce-sample.jar");

    URLClassLoader loader = new URLClassLoader(new URL[] { jar.toURI().toURL() });

    conf.setMapperClass(/*from   www.  j  av a2 s .c o  m*/
            (Class<? extends Mapper>) loader.loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Map"));
    conf.setCombinerClass((Class<? extends Reducer>) loader
            .loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Reduce"));
    conf.setReducerClass((Class<? extends Reducer>) loader
            .loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Reduce"));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort);
    conf.set("mapred.job.tracker", hostname + ":" + trackerPort);

    conf.setJarByClass(loader.loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount"));
    conf.setWorkingDirectory(new Path("/tmp/wordcount"));

    JobClient jobClient = new JobClient(conf);
    ClusterStatus status = jobClient.getClusterStatus();
    assertEquals(State.RUNNING, status.getJobTrackerState());

    RunningJob runningJob = jobClient.submitJob(conf);
    System.out.print("Running " + runningJob.getJobName() + "");
    while (!runningJob.isComplete()) {
        System.out.print(".");
        Thread.sleep(500);
    }
    System.out.println();
    System.out.println("Finished " + runningJob.getJobName() + ".");

    FileObject file = fsManager.resolveFile(buildHDFSURL("/junit/wordcount/output/part-00000"));
    String output = IOUtils.toString(file.getContent().getInputStream());
    assertEquals("Bye\t1\nGoodbye\t1\nHadoop\t2\nHello\t2\nWorld\t2\n", output);
}

From source file:org.pentaho.hadoop.mapreduce.test.TransMapReduceJobTestFIXME.java

License:Open Source License

@Test
public void submitJob() throws Exception {

    String[] args = { "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/input",
            "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/output" };

    JobConf conf = new JobConf();
    conf.setJobName("wordcount");

    KettleEnvironment.init();//from w  w w.  j  av  a  2s.  c  o  m
    TransExecutionConfiguration transExecConfig = new TransExecutionConfiguration();
    TransMeta transMeta = new TransMeta("./test-res/wordcount-mapper.ktr");
    TransConfiguration transConfig = new TransConfiguration(transMeta, transExecConfig);
    conf.set("transformation-map-xml", transConfig.getXML());

    transMeta = new TransMeta("./test-res/wordcount-reducer.ktr");
    transConfig = new TransConfiguration(transMeta, transExecConfig);
    conf.set("transformation-reduce-xml", transConfig.getXML());

    conf.set("transformation-map-input-stepname", "Injector");
    conf.set("transformation-map-output-stepname", "Output");

    conf.set("transformation-reduce-input-stepname", "Injector");
    conf.set("transformation-reduce-output-stepname", "Output");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    File jar = new File("./dist/pentaho-big-data-plugin-TRUNK-SNAPSHOT.jar");

    URLClassLoader loader = new URLClassLoader(new URL[] { jar.toURI().toURL() });

    conf.setMapperClass(
            (Class<? extends Mapper>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransMap"));
    conf.setCombinerClass(
            (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce"));
    conf.setReducerClass(
            (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce"));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort);
    conf.set("mapred.job.tracker", hostname + ":" + trackerPort);

    conf.setJar(jar.toURI().toURL().toExternalForm());
    conf.setWorkingDirectory(new Path("/tmp/wordcount"));

    JobClient jobClient = new JobClient(conf);
    ClusterStatus status = jobClient.getClusterStatus();
    assertEquals(State.RUNNING, status.getJobTrackerState());

    RunningJob runningJob = jobClient.submitJob(conf);
    System.out.print("Running " + runningJob.getJobName() + "");
    while (!runningJob.isComplete()) {
        System.out.print(".");
        Thread.sleep(500);
    }
    System.out.println();
    System.out.println("Finished " + runningJob.getJobName() + ".");

    FileObject file = fsManager.resolveFile(buildHDFSURL("/junit/wordcount/output/part-00000"));
    String output = IOUtils.toString(file.getContent().getInputStream());
    assertEquals(
            "Bye\t4\nGood\t2\nGoodbye\t1\nHadoop\t2\nHello\t5\nThis\t1\nWorld\t5\nand\t1\ncounting\t1\nextra\t1\nfor\t1\nis\t1\nsome\t1\ntext\t1\nwords\t1\n",
            output);
}

From source file:org.pig.oink.operation.impl.PigJobServerImpl.java

License:Apache License

@Override
public boolean cancelRequest(String requestId) throws IOException, Exception {
    PigRequestStats stats = null;/*w ww.j  a v a 2  s.co m*/
    try {
        stats = getRequestStats(requestId);
    } catch (IllegalArgumentException e) {
        logger.error("Invalid request ID", e);
        throw new IllegalArgumentException("Invalid request ID");
    } catch (Exception e) {
        logger.error("Unable to get list of jobs", e);
        throw new IOException("Unable to get list of jobs");
    }

    if (stats.getStatus().equals(Status.SUBMITTED.toString())) {
        List<String> jobs = stats.getJobs();
        for (String job : jobs) {
            job = job.substring(JT_UI.length());
            JobConf jobConf = new JobConf();
            jobConf.set("fs.default.name", PropertyLoader.getInstance().getProperty("fs.default.name"));
            jobConf.set("mapred.job.tracker", PropertyLoader.getInstance().getProperty("jobtracker"));
            try {
                JobClient jobClient = new JobClient(jobConf);
                RunningJob rJob = jobClient.getJob(JobID.forName(job));

                if (!rJob.isComplete()) {
                    rJob.killJob();
                }
            } catch (Exception e) {
                throw new Exception("Unable to kill job " + job);
            }
        }
        PigRequestStats requestStats = new PigRequestStats(0, 0, null, jobs.size());
        requestStats.setJobs(jobs);
        requestStats.setStatus(Status.KILLED.toString());
        Path statsPath = new Path(
                PropertyLoader.getInstance().getProperty(Constants.REQUEST_PATH) + requestId + "/stats");
        PigUtils.writeStatsFile(statsPath, requestStats);
        return true;
    } else {
        return false;
    }
}

From source file:org.pooledtimeseries.healthcheck.CheckCartesianProductSeqFile.java

License:Apache License

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    long start = System.currentTimeMillis();
    JobConf conf = new JobConf("Cartesian Product");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: CheckCartesianProductSeqFile <input sequence file> <out>");
        System.exit(1);//from   w  ww  . j  a  va2s .  c  o m
    }

    // Configure the join type
    conf.setJarByClass(CheckCartesianProductSeqFile.class);

    conf.setMapperClass(CartesianMapper.class);
    conf.setReducerClass(CartesianReducer.class);

    conf.setInputFormat(CartesianInputFormat.class);
    CartesianInputFormat.setLeftInputInfo(conf, SequenceFileInputFormat.class, otherArgs[0]);
    CartesianInputFormat.setRightInputInfo(conf, SequenceFileInputFormat.class, otherArgs[0]);

    TextOutputFormat.setOutputPath(conf, new Path(otherArgs[1]));

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    RunningJob job = JobClient.runJob(conf);
    while (!job.isComplete()) {
        Thread.sleep(1000);
    }

    long finish = System.currentTimeMillis();

    System.out.println("Time in ms: " + (finish - start));

    System.exit(job.isSuccessful() ? 0 : 2);
}

From source file:setest.FormatStorageMR.java

License:Open Source License

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.out.println("FormatStorageMR <input> <output>");
        System.exit(-1);/*from  w w w . ja v  a2  s .  c om*/
    }

    JobConf conf = new JobConf(FormatStorageMR.class);

    conf.setJobName("FormatStorageMR");

    conf.setNumMapTasks(1);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Unit.Record.class);

    conf.setMapperClass(FormatStorageTestMapper.class);
    conf.setReducerClass(FormatStorageTestReducer.class);

    conf.setInputFormat(FormatStorageInputFormat.class);
    conf.setOutputFormat(FormatStorageOutputFormat.class);
    conf.set("mapred.output.compress", "flase");

    Head head = new Head();
    initHead(head);

    head.toJobConf(conf);

    FileInputFormat.setInputPaths(conf, args[0]);
    Path outputPath = new Path(args[1]);
    FileOutputFormat.setOutputPath(conf, outputPath);

    FileSystem fs = outputPath.getFileSystem(conf);
    fs.delete(outputPath, true);

    JobClient jc = new JobClient(conf);
    RunningJob rj = null;
    rj = jc.submitJob(conf);

    String lastReport = "";
    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
    long reportTime = System.currentTimeMillis();
    long maxReportInterval = 3 * 1000;
    while (!rj.isComplete()) {
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
        }

        int mapProgress = Math.round(rj.mapProgress() * 100);
        int reduceProgress = Math.round(rj.reduceProgress() * 100);

        String report = " map = " + mapProgress + "%,  reduce = " + reduceProgress + "%";

        if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) {

            String output = dateFormat.format(Calendar.getInstance().getTime()) + report;
            System.out.println(output);
            lastReport = report;
            reportTime = System.currentTimeMillis();
        }
    }

    System.exit(0);

}