Example usage for org.apache.hadoop.mapred RunningJob getJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RunningJob getJobName.

Prototype

public String getJobName();

Source Link

Document

Get the name of the job.

Usage

From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java

License:Apache License

public JobInfo getInfo(Submission submission, RunningJob job, JobConf conf)
        throws NotFoundException, InternalException {

    JobInfo info = new JobInfo();

    info.setNativeID(submission.getHadoopID());
    info.setName(job.getJobName());
    info.setTest(false);//w w  w . j  a  v a  2  s. com

    if (conf == null)
        // Can't proceed any further if configuration is unavailable
        return info;

    info.setRequestedMapTasks(conf.getNumMapTasks());
    info.setRequestedReduceTasks(conf.getNumReduceTasks());
    info.setMapper(conf.get(CONF_MAPPER));
    info.setReducer(conf.get(CONF_REDUCER));
    info.setNumericSort(conf.getBoolean(CONF_NUMERIC, false));
    info.setInputPath(
            JobServiceHandler.relativizePath(_homeDir, FileInputFormat.getInputPaths(conf)[0]).toString());
    info.setOutputPath(
            JobServiceHandler.relativizePath(_homeDir, FileOutputFormat.getOutputPath(conf)).toString());

    return info;
}

From source file:eu.scape_project.tb.hadoopjobtracker.HadoobJobTrackerClient.java

License:Apache License

private static String getJobName(JobClient jobClt, JobID singleJobID) throws IOException {
    RunningJob runJob = jobClt.getJob(singleJobID);
    String runJobName = runJob.getJobName();
    return runJobName;
}

From source file:org.apache.accumulo.server.master.CoordinateRecoveryTask.java

License:Apache License

void cleanupOldJobs() {
    try {//w w w .j a  va  2 s  .co m
        Configuration conf = CachedConfiguration.getInstance();
        @SuppressWarnings("deprecation")
        JobClient jc = new JobClient(new org.apache.hadoop.mapred.JobConf(conf));
        for (JobStatus status : jc.getAllJobs()) {
            if (!status.isJobComplete()) {
                RunningJob job = jc.getJob(status.getJobID());
                if (job.getJobName().equals(LogSort.getJobName())) {
                    log.info("found a running " + job.getJobName());
                    Configuration jobConfig = new Configuration(false);
                    log.info("fetching configuration from " + job.getJobFile());
                    jobConfig.addResource(TraceFileSystem
                            .wrap(FileUtil.getFileSystem(conf, ServerConfiguration.getSiteConfiguration()))
                            .open(new Path(job.getJobFile())));
                    if (HdfsZooInstance.getInstance().getInstanceID()
                            .equals(jobConfig.get(LogSort.INSTANCE_ID_PROPERTY))) {
                        log.info("Killing job " + job.getID().toString());
                    }
                }
            }
        }
        FileStatus[] children = fs.listStatus(new Path(ServerConstants.getRecoveryDir()));
        if (children != null) {
            for (FileStatus child : children) {
                log.info("Deleting recovery directory " + child);
                fs.delete(child.getPath(), true);
            }
        }
    } catch (IOException e) {
        log.error("Error cleaning up old Log Sort jobs" + e);
    } catch (Exception e) {
        log.error("Unknown error cleaning up old jobs", e);
    }
}

From source file:org.apache.hcatalog.hcatmix.load.HadoopLoadGenerator.java

License:Apache License

/**
 * Prepare input directory/jobConf and launch the hadoop job, for load testing
 *
 * @param confFileName The properties file for the task, should be available in the classpath
 * @param conf/*w ww.ja v  a 2s  .c  o  m*/
 * @return
 * @throws IOException
 * @throws MetaException
 * @throws TException
 */
public SortedMap<Long, ReduceResult> runLoadTest(String confFileName, Configuration conf)
        throws Exception, MetaException, TException {
    JobConf jobConf;
    if (conf != null) {
        jobConf = new JobConf(conf);
    } else {
        jobConf = new JobConf(new Configuration());
    }
    InputStream confFileIS;
    try {
        confFileIS = HCatMixUtils.getInputStream(confFileName);
    } catch (Exception e) {
        LOG.error("Couldn't load configuration file " + confFileName);
        throw e;
    }
    Properties props = new Properties();
    try {
        props.load(confFileIS);
    } catch (IOException e) {
        LOG.error("Couldn't load properties file: " + confFileName, e);
        throw e;
    }

    LOG.info("Loading configuration file: " + confFileName);
    addToJobConf(jobConf, props, Conf.MAP_RUN_TIME_MINUTES);
    addToJobConf(jobConf, props, Conf.STAT_COLLECTION_INTERVAL_MINUTE);
    addToJobConf(jobConf, props, Conf.THREAD_INCREMENT_COUNT);
    addToJobConf(jobConf, props, Conf.THREAD_INCREMENT_INTERVAL_MINUTES);
    addToJobConf(jobConf, props, Conf.THREAD_COMPLETION_BUFFER_MINUTES);

    int numMappers = Integer
            .parseInt(props.getProperty(Conf.NUM_MAPPERS.propName, "" + Conf.NUM_MAPPERS.defaultValue));
    Path inputDir = new Path(props.getProperty(Conf.INPUT_DIR.propName, Conf.INPUT_DIR.defaultValueStr));
    Path outputDir = new Path(props.getProperty(Conf.OUTPUT_DIR.propName, Conf.OUTPUT_DIR.defaultValueStr));

    jobConf.setJobName(JOB_NAME);
    jobConf.setNumMapTasks(numMappers);
    jobConf.setMapperClass(HCatMapper.class);
    jobConf.setJarByClass(HCatMapper.class);
    jobConf.setReducerClass(HCatReducer.class);
    jobConf.setMapOutputKeyClass(LongWritable.class);
    jobConf.setMapOutputValueClass(IntervalResult.class);
    jobConf.setOutputKeyClass(LongWritable.class);
    jobConf.setOutputValueClass(ReduceResult.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    jobConf.set(Conf.TASK_CLASS_NAMES.getJobConfKey(),
            props.getProperty(Conf.TASK_CLASS_NAMES.propName, Conf.TASK_CLASS_NAMES.defaultValueStr));

    fs = FileSystem.get(jobConf);
    Path jarRoot = new Path("/tmp/hcatmix_jar_" + new Random().nextInt());
    HadoopUtils.uploadClasspathAndAddToJobConf(jobConf, jarRoot);
    fs.deleteOnExit(jarRoot);

    FileInputFormat.setInputPaths(jobConf, createInputFiles(inputDir, numMappers));
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }
    FileOutputFormat.setOutputPath(jobConf, outputDir);

    // Set up delegation token required for hiveMetaStoreClient in map task
    HiveConf hiveConf = new HiveConf(HadoopLoadGenerator.class);
    HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(hiveConf);
    String tokenStr = hiveClient.getDelegationToken(UserGroupInformation.getCurrentUser().getUserName(),
            "mapred");
    Token<? extends AbstractDelegationTokenIdentifier> token = new Token<DelegationTokenIdentifier>();
    token.decodeFromUrlString(tokenStr);
    token.setService(new Text(METASTORE_TOKEN_SIGNATURE));
    jobConf.getCredentials().addToken(new Text(METASTORE_TOKEN_KEY), token);

    // Submit the job, once the job is complete see output
    LOG.info("Submitted hadoop job");
    RunningJob j = JobClient.runJob(jobConf);
    LOG.info("JobID is: " + j.getJobName());
    if (!j.isSuccessful()) {
        throw new IOException("Job failed");
    }
    return readResult(outputDir, jobConf);
}

From source file:org.apache.hive.hcatalog.templeton.tool.LogRetriever.java

License:Apache License

private void logJob(String logDir, String jobID, PrintWriter listWriter) throws IOException {
    RunningJob rj = jobClient.getJob(JobID.forName(jobID));
    String jobURLString = rj.getTrackingURL();

    Path jobDir = new Path(logDir, jobID);
    fs.mkdirs(jobDir);/*  w  ww.j a va 2s .  co  m*/

    // Logger jobconf
    try {
        logJobConf(jobID, jobURLString, jobDir.toString());
    } catch (IOException e) {
        System.err.println("Cannot retrieve job.xml.html for " + jobID);
        e.printStackTrace();
    }

    listWriter.println("job: " + jobID + "(" + "name=" + rj.getJobName() + "," + "status="
            + JobStatus.getJobRunState(rj.getJobState()) + ")");

    // Get completed attempts
    List<AttemptInfo> attempts = new ArrayList<AttemptInfo>();
    for (String type : new String[] { "map", "reduce", "setup", "cleanup" }) {
        try {
            List<AttemptInfo> successAttempts = getCompletedAttempts(jobID, jobURLString, type);
            attempts.addAll(successAttempts);
        } catch (IOException e) {
            System.err.println("Cannot retrieve " + type + " tasks for " + jobID);
            e.printStackTrace();
        }
    }

    // Get failed attempts
    try {
        List<AttemptInfo> failedAttempts = getFailedAttempts(jobID, jobURLString);
        attempts.addAll(failedAttempts);
    } catch (IOException e) {
        System.err.println("Cannot retrieve failed attempts for " + jobID);
        e.printStackTrace();
    }

    // Logger attempts
    for (AttemptInfo attempt : attempts) {
        try {
            logAttempt(jobID, attempt, jobDir.toString());
            listWriter.println("  attempt:" + attempt.id + "(" + "type=" + attempt.type + "," + "status="
                    + attempt.status + "," + "starttime=" + attempt.startTime + "," + "endtime="
                    + attempt.endTime + ")");
        } catch (IOException e) {
            System.err.println("Cannot log attempt " + attempt.id);
            e.printStackTrace();
        }
    }

    listWriter.println();
}

From source file:org.apache.oozie.action.hadoop.TestMapReduceActionExecutor.java

License:Apache License

/**
 * Test "oozie.launcher.mapred.job.name" and "mapred.job.name" can be set in
 * the action configuration and not overridden by the action executor
 *
 * @throws Exception//from  w w  w  .  j  a  v  a 2 s. c  o  m
 */
public void testSetMapredJobName() throws Exception {
    final String launcherJobName = "MapReduceLauncherTest";
    final String mapredJobName = "MapReduceTest";

    FileSystem fs = getFileSystem();

    Path inputDir = new Path(getFsTestCaseDir(), "input");
    Path outputDir = new Path(getFsTestCaseDir(), "output");

    Writer w = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
    w.write("dummy\n");
    w.write("dummy\n");
    w.close();

    XConfiguration mrConfig = getMapReduceConfig(inputDir.toString(), outputDir.toString());
    mrConfig.set("oozie.launcher.mapred.job.name", launcherJobName);
    mrConfig.set("mapred.job.name", mapredJobName);

    StringBuilder sb = new StringBuilder("<map-reduce>").append("<job-tracker>").append(getJobTrackerUri())
            .append("</job-tracker>").append("<name-node>").append(getNameNodeUri()).append("</name-node>")
            .append(mrConfig.toXmlString(false)).append("</map-reduce>");
    String actionXml = sb.toString();

    Context context = createContext("map-reduce", actionXml);
    final RunningJob launcherJob = submitAction(context);
    String launcherId = context.getAction().getExternalId();
    waitFor(120 * 2000, new Predicate() {
        public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
        }
    });

    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(),
            context.getProtoActionConf());
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));
    // Assert launcher job name has been set
    System.out.println("Launcher job name: " + launcherJob.getJobName());
    assertTrue(launcherJob.getJobName().equals(launcherJobName));

    MapReduceActionExecutor ae = new MapReduceActionExecutor();
    ae.check(context, context.getAction());
    assertTrue(launcherId.equals(context.getAction().getExternalId()));

    JobConf conf = ae.createBaseHadoopConf(context, XmlUtils.parseXml(actionXml));
    String user = conf.get("user.name");

    JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf);
    final RunningJob mrJob = jobClient.getJob(JobID.forName(context.getAction().getExternalChildIDs()));

    waitFor(120 * 1000, new Predicate() {
        public boolean evaluate() throws Exception {
            return mrJob.isComplete();
        }
    });
    assertTrue(mrJob.isSuccessful());
    ae.check(context, context.getAction());

    assertEquals("SUCCEEDED", context.getAction().getExternalStatus());
    assertNull(context.getAction().getData());

    ae.end(context, context.getAction());
    assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus());

    // Assert Mapred job name has been set
    System.out.println("Mapred job name: " + mrJob.getJobName());
    assertTrue(mrJob.getJobName().equals(mapredJobName));

    // Assert for stats info stored in the context.
    assertNull(context.getExecutionStats());

    // External Child IDs used to be null, but after 4.0, become Non-Null in case of MR action.
    assertNotNull(context.getExternalChildIDs());

    // hadoop.counters will always be set in case of MR action.
    assertNotNull(context.getVar("hadoop.counters"));
    String counters = context.getVar("hadoop.counters");
    assertTrue(counters.contains("Counter"));
}

From source file:org.godhuli.rhipe.FileUtils.java

License:Apache License

public REXP getstatus(String jd, boolean geterrors) throws Exception {
    org.apache.hadoop.mapred.JobID jj = org.apache.hadoop.mapred.JobID.forName(jd);
    if (jj == null)
        throw new IOException("Jobtracker could not find jobID: " + jd);
    org.apache.hadoop.mapred.RunningJob rj = jclient.getJob(jj);
    if (rj == null)
        throw new IOException(
                "No such job: " + jd + " available, wrong job? or try the History Viewer (see the Web UI) ");
    String jobfile = rj.getJobFile();
    String jobname = rj.getJobName();
    // cfg.addResource(new Path(jobfile));
    org.apache.hadoop.mapred.Counters cc = rj.getCounters();
    long startsec = getStart(jclient, jj);
    double dura = ((double) System.currentTimeMillis() - startsec) / 1000;
    REXP ro = FileUtils.buildlistFromOldCounter(cc, dura);
    int jobs = rj.getJobState();
    String jobss = null;//from w ww  . j  a  v  a  2 s  .c om
    if (jobs == JobStatus.FAILED)
        jobss = "FAILED";
    else if (jobs == JobStatus.KILLED)
        jobss = "KILLED";
    else if (jobs == JobStatus.PREP)
        jobss = "PREP";
    else if (jobs == JobStatus.RUNNING)
        jobss = "RUNNING";
    else if (jobs == JobStatus.SUCCEEDED)
        jobss = "SUCCEEDED";
    float mapprog = rj.mapProgress(), reduprog = rj.reduceProgress();

    org.apache.hadoop.mapred.TaskReport[] maptr = jclient.getMapTaskReports(jj);
    org.apache.hadoop.mapred.TaskReport[] redtr = jclient.getReduceTaskReports(jj);

    int totalmaps = maptr.length, totalreds = redtr.length;
    int mappending = 0, redpending = 0, maprunning = 0, redrunning = 0, redfailed = 0, redkilled = 0,
            mapkilled = 0, mapfailed = 0, mapcomp = 0, redcomp = 0;
    for (int i = 0; i < maptr.length; i++) {
        TIPStatus t = maptr[i].getCurrentStatus();
        switch (t) {
        case COMPLETE:
            mapcomp++;
            break;
        case FAILED:
            mapfailed++;
            break;
        case PENDING:
            mappending++;
            break;
        case RUNNING:
            maprunning++;
            break;
        case KILLED:
            mapkilled++;
            break;
        }
    }
    for (int i = 0; i < redtr.length; i++) {
        TIPStatus t = redtr[i].getCurrentStatus();
        switch (t) {
        case COMPLETE:
            redcomp++;
            break;
        case FAILED:
            redfailed++;
            break;
        case PENDING:
            redpending++;
            break;
        case RUNNING:
            redrunning++;
            break;
        case KILLED:
            redkilled++;
            break;
        }
    }
    int reduceafails = 0, reduceakilled = 0, mapafails = 0, mapakilled = 0;
    int startfrom = 0;

    REXP.Builder errcontainer = REXP.newBuilder();
    errcontainer.setRclass(REXP.RClass.STRING);
    while (true) {
        org.apache.hadoop.mapred.TaskCompletionEvent[] events = rj.getTaskCompletionEvents(startfrom);
        for (int i = 0; i < events.length; i++) {
            org.apache.hadoop.mapred.TaskCompletionEvent e = events[i];
            int f = 0, k = 0;
            switch (e.getTaskStatus()) {
            case KILLED:
                if (e.isMapTask()) {
                    mapakilled++;
                } else {
                    reduceakilled++;
                }
                break;
            case TIPFAILED:
            case FAILED:
                if (e.isMapTask()) {
                    mapafails++;
                } else {
                    reduceafails++;
                }
                if (geterrors) {
                    REXPProtos.STRING.Builder content = REXPProtos.STRING.newBuilder();
                    String[] s = rj.getTaskDiagnostics(e.getTaskAttemptId());
                    if (s != null && s.length > 0) {
                        content.setStrval(s[0]);
                        errcontainer.addStringValue(content.build());
                    }
                }
                break;
            }
        }
        startfrom += events.length;
        if (events.length == 0)
            break;
    }

    REXP.Builder thevals = REXP.newBuilder();
    thevals.setRclass(REXP.RClass.LIST);
    thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobss }));
    thevals.addRexpValue(RObjects.buildDoubleVector(new double[] { dura }));
    thevals.addRexpValue(RObjects.buildDoubleVector(new double[] { (double) mapprog, (double) reduprog }));
    thevals.addRexpValue(RObjects.buildIntVector(
            new int[] { totalmaps, mappending, maprunning, mapcomp, mapkilled, mapafails, mapakilled }));
    thevals.addRexpValue(RObjects.buildIntVector(
            new int[] { totalreds, redpending, redrunning, redcomp, redkilled, reduceafails, reduceakilled }));
    thevals.addRexpValue(ro);
    thevals.addRexpValue(errcontainer);
    thevals.addRexpValue(RObjects.makeStringVector(rj.getTrackingURL()));
    thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobname }));
    thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobfile }));
    return (thevals.build());
}

From source file:org.godhuli.rhipe.FileUtils.java

License:Apache License

public byte[] getDetailedInfoForJob(String jd) throws Exception {
    org.apache.hadoop.mapred.JobID jj = org.apache.hadoop.mapred.JobID.forName(jd);
    if (jj == null)
        throw new IOException("Jobtracker could not find jobID: " + jd);
    org.apache.hadoop.mapred.RunningJob rj = jclient.getJob(jj);
    if (rj == null)
        throw new IOException(
                "No such job: " + jd + " available, wrong job? or try the History Viewer (see the Web UI) ");
    String jobfile = rj.getJobFile();
    String jobname = rj.getJobName();
    org.apache.hadoop.mapred.Counters cc = rj.getCounters();
    long startsec = getStart(jclient, jj);
    REXP allCounters = FileUtils.buildlistFromOldCounter(cc, 0);
    int jobs = rj.getJobState();
    String jobss = null;/*from  w  ww  . j a  va 2 s.  c om*/
    if (jobs == JobStatus.FAILED)
        jobss = "FAILED";
    else if (jobs == JobStatus.KILLED)
        jobss = "KILLED";
    else if (jobs == JobStatus.PREP)
        jobss = "PREP";
    else if (jobs == JobStatus.RUNNING)
        jobss = "RUNNING";
    else if (jobs == JobStatus.SUCCEEDED)
        jobss = "SUCCEEDED";
    float mapprog = rj.mapProgress(), reduprog = rj.reduceProgress();

    REXP.Builder thevals = REXP.newBuilder();
    thevals.setRclass(REXP.RClass.LIST);
    thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobss }));
    thevals.addRexpValue(RObjects.buildDoubleVector(new double[] { startsec }));
    thevals.addRexpValue(RObjects.buildDoubleVector(new double[] { (double) mapprog, (double) reduprog }));
    thevals.addRexpValue(allCounters);
    thevals.addRexpValue(RObjects.makeStringVector(rj.getTrackingURL()));
    thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobname }));
    thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobfile }));

    org.apache.hadoop.mapred.TaskReport[] maptr = jclient.getMapTaskReports(jj);
    REXP.Builder thevalsA = REXP.newBuilder();
    thevalsA.setRclass(REXP.RClass.LIST);
    for (TaskReport t : maptr) {
        thevalsA.addRexpValue(TaskReportToRexp(t));
    }
    thevals.addRexpValue(thevalsA.build());

    org.apache.hadoop.mapred.TaskReport[] redtr = jclient.getReduceTaskReports(jj);
    REXP.Builder thevalsB = REXP.newBuilder();
    thevalsB.setRclass(REXP.RClass.LIST);
    for (TaskReport t : redtr) {
        thevalsB.addRexpValue(TaskReportToRexp(t));
    }
    thevals.addRexpValue(thevalsB.build());

    return thevals.build().toByteArray();
}

From source file:org.pentaho.hadoop.mapreduce.test.TestSubmitMapReduceJob.java

License:Open Source License

@Test
public void submitJob() throws Exception {

    String[] args = { "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/input",
            "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/output" };

    JobConf conf = new JobConf();
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    File jar = new File("./test-res/pentaho-mapreduce-sample.jar");

    URLClassLoader loader = new URLClassLoader(new URL[] { jar.toURI().toURL() });

    conf.setMapperClass(//from  www . j  av a  2s  .com
            (Class<? extends Mapper>) loader.loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Map"));
    conf.setCombinerClass((Class<? extends Reducer>) loader
            .loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Reduce"));
    conf.setReducerClass((Class<? extends Reducer>) loader
            .loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Reduce"));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort);
    conf.set("mapred.job.tracker", hostname + ":" + trackerPort);

    conf.setJarByClass(loader.loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount"));
    conf.setWorkingDirectory(new Path("/tmp/wordcount"));

    JobClient jobClient = new JobClient(conf);
    ClusterStatus status = jobClient.getClusterStatus();
    assertEquals(State.RUNNING, status.getJobTrackerState());

    RunningJob runningJob = jobClient.submitJob(conf);
    System.out.print("Running " + runningJob.getJobName() + "");
    while (!runningJob.isComplete()) {
        System.out.print(".");
        Thread.sleep(500);
    }
    System.out.println();
    System.out.println("Finished " + runningJob.getJobName() + ".");

    FileObject file = fsManager.resolveFile(buildHDFSURL("/junit/wordcount/output/part-00000"));
    String output = IOUtils.toString(file.getContent().getInputStream());
    assertEquals("Bye\t1\nGoodbye\t1\nHadoop\t2\nHello\t2\nWorld\t2\n", output);
}

From source file:org.pentaho.hadoop.mapreduce.test.TransMapReduceJobTestFIXME.java

License:Open Source License

@Test
public void submitJob() throws Exception {

    String[] args = { "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/input",
            "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/output" };

    JobConf conf = new JobConf();
    conf.setJobName("wordcount");

    KettleEnvironment.init();//  www  .ja  v a  2s . co m
    TransExecutionConfiguration transExecConfig = new TransExecutionConfiguration();
    TransMeta transMeta = new TransMeta("./test-res/wordcount-mapper.ktr");
    TransConfiguration transConfig = new TransConfiguration(transMeta, transExecConfig);
    conf.set("transformation-map-xml", transConfig.getXML());

    transMeta = new TransMeta("./test-res/wordcount-reducer.ktr");
    transConfig = new TransConfiguration(transMeta, transExecConfig);
    conf.set("transformation-reduce-xml", transConfig.getXML());

    conf.set("transformation-map-input-stepname", "Injector");
    conf.set("transformation-map-output-stepname", "Output");

    conf.set("transformation-reduce-input-stepname", "Injector");
    conf.set("transformation-reduce-output-stepname", "Output");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    File jar = new File("./dist/pentaho-big-data-plugin-TRUNK-SNAPSHOT.jar");

    URLClassLoader loader = new URLClassLoader(new URL[] { jar.toURI().toURL() });

    conf.setMapperClass(
            (Class<? extends Mapper>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransMap"));
    conf.setCombinerClass(
            (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce"));
    conf.setReducerClass(
            (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce"));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort);
    conf.set("mapred.job.tracker", hostname + ":" + trackerPort);

    conf.setJar(jar.toURI().toURL().toExternalForm());
    conf.setWorkingDirectory(new Path("/tmp/wordcount"));

    JobClient jobClient = new JobClient(conf);
    ClusterStatus status = jobClient.getClusterStatus();
    assertEquals(State.RUNNING, status.getJobTrackerState());

    RunningJob runningJob = jobClient.submitJob(conf);
    System.out.print("Running " + runningJob.getJobName() + "");
    while (!runningJob.isComplete()) {
        System.out.print(".");
        Thread.sleep(500);
    }
    System.out.println();
    System.out.println("Finished " + runningJob.getJobName() + ".");

    FileObject file = fsManager.resolveFile(buildHDFSURL("/junit/wordcount/output/part-00000"));
    String output = IOUtils.toString(file.getContent().getInputStream());
    assertEquals(
            "Bye\t4\nGood\t2\nGoodbye\t1\nHadoop\t2\nHello\t5\nThis\t1\nWorld\t5\nand\t1\ncounting\t1\nextra\t1\nfor\t1\nis\t1\nsome\t1\ntext\t1\nwords\t1\n",
            output);
}