Example usage for org.apache.hadoop.mapred RunningJob getJobName

List of usage examples for org.apache.hadoop.mapred RunningJob getJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RunningJob getJobName.

Prototype

public String getJobName();

Source Link

Document

Get the name of the job.

Usage

From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java

License:Apache License

public JobInfo getInfo(Submission submission, RunningJob job, JobConf conf)
        throws NotFoundException, InternalException {

    JobInfo info = new JobInfo();

    info.setNativeID(submission.getHadoopID());
    info.setName(job.getJobName());
    info.setTest(false);//w w  w . j  a  v a  2  s. com

    if (conf == null)
        // Can't proceed any further if configuration is unavailable
        return info;

    info.setRequestedMapTasks(conf.getNumMapTasks());
    info.setRequestedReduceTasks(conf.getNumReduceTasks());
    info.setMapper(conf.get(CONF_MAPPER));
    info.setReducer(conf.get(CONF_REDUCER));
    info.setNumericSort(conf.getBoolean(CONF_NUMERIC, false));
    info.setInputPath(
            JobServiceHandler.relativizePath(_homeDir, FileInputFormat.getInputPaths(conf)[0]).toString());
    info.setOutputPath(
            JobServiceHandler.relativizePath(_homeDir, FileOutputFormat.getOutputPath(conf)).toString());

    return info;
}

From source file:eu.scape_project.tb.hadoopjobtracker.HadoobJobTrackerClient.java

License:Apache License

private static String getJobName(JobClient jobClt, JobID singleJobID) throws IOException {
    RunningJob runJob = jobClt.getJob(singleJobID);
    String runJobName = runJob.getJobName();
    return runJobName;
}

From source file:org.apache.accumulo.server.master.CoordinateRecoveryTask.java

License:Apache License

void cleanupOldJobs() {
    try {//w w w .j a  va  2 s  .co m
        Configuration conf = CachedConfiguration.getInstance();
        @SuppressWarnings("deprecation")
        JobClient jc = new JobClient(new org.apache.hadoop.mapred.JobConf(conf));
        for (JobStatus status : jc.getAllJobs()) {
            if (!status.isJobComplete()) {
                RunningJob job = jc.getJob(status.getJobID());
                if (job.getJobName().equals(LogSort.getJobName())) {
                    log.info("found a running " + job.getJobName());
                    Configuration jobConfig = new Configuration(false);
                    log.info("fetching configuration from " + job.getJobFile());
                    jobConfig.addResource(TraceFileSystem
                            .wrap(FileUtil.getFileSystem(conf, ServerConfiguration.getSiteConfiguration()))
                            .open(new Path(job.getJobFile())));
                    if (HdfsZooInstance.getInstance().getInstanceID()
                            .equals(jobConfig.get(LogSort.INSTANCE_ID_PROPERTY))) {
                        log.info("Killing job " + job.getID().toString());
                    }
                }
            }
        }
        FileStatus[] children = fs.listStatus(new Path(ServerConstants.getRecoveryDir()));
        if (children != null) {
            for (FileStatus child : children) {
                log.info("Deleting recovery directory " + child);
                fs.delete(child.getPath(), true);
            }
        }
    } catch (IOException e) {
        log.error("Error cleaning up old Log Sort jobs" + e);
    } catch (Exception e) {
        log.error("Unknown error cleaning up old jobs", e);
    }
}

From source file:org.apache.hcatalog.hcatmix.load.HadoopLoadGenerator.java

License:Apache License

/**
 * Prepare input directory/jobConf and launch the hadoop job, for load testing
 *
 * @param confFileName The properties file for the task, should be available in the classpath
 * @param conf/*w ww.ja v  a 2s  .c  o  m*/
 * @return
 * @throws IOException
 * @throws MetaException
 * @throws TException
 */
public SortedMap<Long, ReduceResult> runLoadTest(String confFileName, Configuration conf)
        throws Exception, MetaException, TException {
    JobConf jobConf;
    if (conf != null) {
        jobConf = new JobConf(conf);
    } else {
        jobConf = new JobConf(new Configuration());
    }
    InputStream confFileIS;
    try {
        confFileIS = HCatMixUtils.getInputStream(confFileName);
    } catch (Exception e) {
        LOG.error("Couldn't load configuration file " + confFileName);
        throw e;
    }
    Properties props = new Properties();
    try {
        props.load(confFileIS);
    } catch (IOException e) {
        LOG.error("Couldn't load properties file: " + confFileName, e);
        throw e;
    }

    LOG.info("Loading configuration file: " + confFileName);
    addToJobConf(jobConf, props, Conf.MAP_RUN_TIME_MINUTES);
    addToJobConf(jobConf, props, Conf.STAT_COLLECTION_INTERVAL_MINUTE);
    addToJobConf(jobConf, props, Conf.THREAD_INCREMENT_COUNT);
    addToJobConf(jobConf, props, Conf.THREAD_INCREMENT_INTERVAL_MINUTES);
    addToJobConf(jobConf, props, Conf.THREAD_COMPLETION_BUFFER_MINUTES);

    int numMappers = Integer
            .parseInt(props.getProperty(Conf.NUM_MAPPERS.propName, "" + Conf.NUM_MAPPERS.defaultValue));
    Path inputDir = new Path(props.getProperty(Conf.INPUT_DIR.propName, Conf.INPUT_DIR.defaultValueStr));
    Path outputDir = new Path(props.getProperty(Conf.OUTPUT_DIR.propName, Conf.OUTPUT_DIR.defaultValueStr));

    jobConf.setJobName(JOB_NAME);
    jobConf.setNumMapTasks(numMappers);
    jobConf.setMapperClass(HCatMapper.class);
    jobConf.setJarByClass(HCatMapper.class);
    jobConf.setReducerClass(HCatReducer.class);
    jobConf.setMapOutputKeyClass(LongWritable.class);
    jobConf.setMapOutputValueClass(IntervalResult.class);
    jobConf.setOutputKeyClass(LongWritable.class);
    jobConf.setOutputValueClass(ReduceResult.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    jobConf.set(Conf.TASK_CLASS_NAMES.getJobConfKey(),
            props.getProperty(Conf.TASK_CLASS_NAMES.propName, Conf.TASK_CLASS_NAMES.defaultValueStr));

    fs = FileSystem.get(jobConf);
    Path jarRoot = new Path("/tmp/hcatmix_jar_" + new Random().nextInt());
    HadoopUtils.uploadClasspathAndAddToJobConf(jobConf, jarRoot);
    fs.deleteOnExit(jarRoot);

    FileInputFormat.setInputPaths(jobConf, createInputFiles(inputDir, numMappers));
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }
    FileOutputFormat.setOutputPath(jobConf, outputDir);

    // Set up delegation token required for hiveMetaStoreClient in map task
    HiveConf hiveConf = new HiveConf(HadoopLoadGenerator.class);
    HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(hiveConf);
    String tokenStr = hiveClient.getDelegationToken(UserGroupInformation.getCurrentUser().getUserName(),
            "mapred");
    Token<? extends AbstractDelegationTokenIdentifier> token = new Token<DelegationTokenIdentifier>();
    token.decodeFromUrlString(tokenStr);
    token.setService(new Text(METASTORE_TOKEN_SIGNATURE));
    jobConf.getCredentials().addToken(new Text(METASTORE_TOKEN_KEY), token);

    // Submit the job, once the job is complete see output
    LOG.info("Submitted hadoop job");
    RunningJob j = JobClient.runJob(jobConf);
    LOG.info("JobID is: " + j.getJobName());
    if (!j.isSuccessful()) {
        throw new IOException("Job failed");
    }
    return readResult(outputDir, jobConf);
}

From source file:org.apache.hive.hcatalog.templeton.tool.LogRetriever.java

License:Apache License

private void logJob(String logDir, String jobID, PrintWriter listWriter) throws IOException {
    RunningJob rj = jobClient.getJob(JobID.forName(jobID));
    String jobURLString = rj.getTrackingURL();

    Path jobDir = new Path(logDir, jobID);
    fs.mkdirs(jobDir);/*  w  ww.j a va 2s .  co  m*/

    // Logger jobconf
    try {
        logJobConf(jobID, jobURLString, jobDir.toString());
    } catch (IOException e) {
        System.err.println("Cannot retrieve job.xml.html for " + jobID);
        e.printStackTrace();
    }

    listWriter.println("job: " + jobID + "(" + "name=" + rj.getJobName() + "," + "status="
            + JobStatus.getJobRunState(rj.getJobState()) + ")");

    // Get completed attempts
    List<AttemptInfo> attempts = new ArrayList<AttemptInfo>();
    for (String type : new String[] { "map", "reduce", "setup", "cleanup" }) {
        try {
            List<AttemptInfo> successAttempts = getCompletedAttempts(jobID, jobURLString, type);
            attempts.addAll(successAttempts);
        } catch (IOException e) {
            System.err.println("Cannot retrieve " + type + " tasks for " + jobID);
            e.printStackTrace();
        }
    }

    // Get failed attempts
    try {
        List<AttemptInfo> failedAttempts = getFailedAttempts(jobID, jobURLString);
        attempts.addAll(failedAttempts);
    } catch (IOException e) {
        System.err.println("Cannot retrieve failed attempts for " + jobID);
        e.printStackTrace();
    }

    // Logger attempts
    for (AttemptInfo attempt : attempts) {
        try {
            logAttempt(jobID, attempt, jobDir.toString());
            listWriter.println("  attempt:" + attempt.id + "(" + "type=" + attempt.type + "," + "status="
                    + attempt.status + "," + "starttime=" + attempt.startTime + "," + "endtime="
                    + attempt.endTime + ")");
        } catch (IOException e) {
            System.err.println("Cannot log attempt " + attempt.id);
            e.printStackTrace();
        }
    }

    listWriter.println();
}

From source file:org.apache.oozie.action.hadoop.TestMapReduceActionExecutor.java

License:Apache License

/**
 * Test "oozie.launcher.mapred.job.name" and "mapred.job.name" can be set in
 * the action configuration and not overridden by the action executor
 *
 * @throws Exception//from  w w  w  .  j  a  v  a 2 s. c  o  m
 */
public void testSetMapredJobName() throws Exception {
    final String launcherJobName = "MapReduceLauncherTest";
    final String mapredJobName = "MapReduceTest";

    FileSystem fs = getFileSystem();

    Path inputDir = new Path(getFsTestCaseDir(), "input");
    Path outputDir = new Path(getFsTestCaseDir(), "output");

    Writer w = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
    w.write("dummy\n");
    w.write("dummy\n");
    w.close();

    XConfiguration mrConfig = getMapReduceConfig(inputDir.toString(), outputDir.toString());
    mrConfig.set("oozie.launcher.mapred.job.name", launcherJobName);
    mrConfig.set("mapred.job.name", mapredJobName);

    StringBuilder sb = new StringBuilder("<map-reduce>").append("<job-tracker>").append(getJobTrackerUri())
            .append("</job-tracker>").append("<name-node>").append(getNameNodeUri()).append("</name-node>")
            .append(mrConfig.toXmlString(false)).append("</map-reduce>");
    String actionXml = sb.toString();

    Context context = createContext("map-reduce", actionXml);
    final RunningJob launcherJob = submitAction(context);
    String launcherId = context.getAction().getExternalId();
    waitFor(120 * 2000, new Predicate() {
        public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
        }
    });

    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(),
            context.getProtoActionConf());
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));
    // Assert launcher job name has been set
    System.out.println("Launcher job name: " + launcherJob.getJobName());
    assertTrue(launcherJob.getJobName().equals(launcherJobName));

    MapReduceActionExecutor ae = new MapReduceActionExecutor();
    ae.check(context, context.getAction());
    assertTrue(launcherId.equals(context.getAction().getExternalId()));

    JobConf conf = ae.createBaseHadoopConf(context, XmlUtils.parseXml(actionXml));
    String user = conf.get("user.name");

    JobClient jobClient = Services.get().get(HadoopAccessorService.class).createJobClient(user, conf);
    final RunningJob mrJob = jobClient.getJob(JobID.forName(context.getAction().getExternalChildIDs()));

    waitFor(120 * 1000, new Predicate() {
        public boolean evaluate() throws Exception {
            return mrJob.isComplete();
        }
    });
    assertTrue(mrJob.isSuccessful());
    ae.check(context, context.getAction());

    assertEquals("SUCCEEDED", context.getAction().getExternalStatus());
    assertNull(context.getAction().getData());

    ae.end(context, context.getAction());
    assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus());

    // Assert Mapred job name has been set
    System.out.println("Mapred job name: " + mrJob.getJobName());
    assertTrue(mrJob.getJobName().equals(mapredJobName));

    // Assert for stats info stored in the context.
    assertNull(context.getExecutionStats());

    // External Child IDs used to be null, but after 4.0, become Non-Null in case of MR action.
    assertNotNull(context.getExternalChildIDs());

    // hadoop.counters will always be set in case of MR action.
    assertNotNull(context.getVar("hadoop.counters"));
    String counters = context.getVar("hadoop.counters");
    assertTrue(counters.contains("Counter"));
}

From source file:org.godhuli.rhipe.FileUtils.java

License:Apache License

public REXP getstatus(String jd, boolean geterrors) throws Exception {
    org.apache.hadoop.mapred.JobID jj = org.apache.hadoop.mapred.JobID.forName(jd);
    if (jj == null)
        throw new IOException("Jobtracker could not find jobID: " + jd);
    org.apache.hadoop.mapred.RunningJob rj = jclient.getJob(jj);
    if (rj == null)
        throw new IOException(
                "No such job: " + jd + " available, wrong job? or try the History Viewer (see the Web UI) ");
    String jobfile = rj.getJobFile();
    String jobname = rj.getJobName();
    // cfg.addResource(new Path(jobfile));
    org.apache.hadoop.mapred.Counters cc = rj.getCounters();
    long startsec = getStart(jclient, jj);
    double dura = ((double) System.currentTimeMillis() - startsec) / 1000;
    REXP ro = FileUtils.buildlistFromOldCounter(cc, dura);
    int jobs = rj.getJobState();
    String jobss = null;//from w ww  . j  a  v  a  2 s  .c om
    if (jobs == JobStatus.FAILED)
        jobss = "FAILED";
    else if (jobs == JobStatus.KILLED)
        jobss = "KILLED";
    else if (jobs == JobStatus.PREP)
        jobss = "PREP";
    else if (jobs == JobStatus.RUNNING)
        jobss = "RUNNING";
    else if (jobs == JobStatus.SUCCEEDED)
        jobss = "SUCCEEDED";
    float mapprog = rj.mapProgress(), reduprog = rj.reduceProgress();

    org.apache.hadoop.mapred.TaskReport[] maptr = jclient.getMapTaskReports(jj);
    org.apache.hadoop.mapred.TaskReport[] redtr = jclient.getReduceTaskReports(jj);

    int totalmaps = maptr.length, totalreds = redtr.length;
    int mappending = 0, redpending = 0, maprunning = 0, redrunning = 0, redfailed = 0, redkilled = 0,
            mapkilled = 0, mapfailed = 0, mapcomp = 0, redcomp = 0;
    for (int i = 0; i < maptr.length; i++) {
        TIPStatus t = maptr[i].getCurrentStatus();
        switch (t) {
        case COMPLETE:
            mapcomp++;
            break;
        case FAILED:
            mapfailed++;
            break;
        case PENDING:
            mappending++;
            break;
        case RUNNING:
            maprunning++;
            break;
        case KILLED:
            mapkilled++;
            break;
        }
    }
    for (int i = 0; i < redtr.length; i++) {
        TIPStatus t = redtr[i].getCurrentStatus();
        switch (t) {
        case COMPLETE:
            redcomp++;
            break;
        case FAILED:
            redfailed++;
            break;
        case PENDING:
            redpending++;
            break;
        case RUNNING:
            redrunning++;
            break;
        case KILLED:
            redkilled++;
            break;
        }
    }
    int reduceafails = 0, reduceakilled = 0, mapafails = 0, mapakilled = 0;
    int startfrom = 0;

    REXP.Builder errcontainer = REXP.newBuilder();
    errcontainer.setRclass(REXP.RClass.STRING);
    while (true) {
        org.apache.hadoop.mapred.TaskCompletionEvent[] events = rj.getTaskCompletionEvents(startfrom);
        for (int i = 0; i < events.length; i++) {
            org.apache.hadoop.mapred.TaskCompletionEvent e = events[i];
            int f = 0, k = 0;
            switch (e.getTaskStatus()) {
            case KILLED:
                if (e.isMapTask()) {
                    mapakilled++;
                } else {
                    reduceakilled++;
                }
                break;
            case TIPFAILED:
            case FAILED:
                if (e.isMapTask()) {
                    mapafails++;
                } else {
                    reduceafails++;
                }
                if (geterrors) {
                    REXPProtos.STRING.Builder content = REXPProtos.STRING.newBuilder();
                    String[] s = rj.getTaskDiagnostics(e.getTaskAttemptId());
                    if (s != null && s.length > 0) {
                        content.setStrval(s[0]);
                        errcontainer.addStringValue(content.build());
                    }
                }
                break;
            }
        }
        startfrom += events.length;
        if (events.length == 0)
            break;
    }

    REXP.Builder thevals = REXP.newBuilder();
    thevals.setRclass(REXP.RClass.LIST);
    thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobss }));
    thevals.addRexpValue(RObjects.buildDoubleVector(new double[] { dura }));
    thevals.addRexpValue(RObjects.buildDoubleVector(new double[] { (double) mapprog, (double) reduprog }));
    thevals.addRexpValue(RObjects.buildIntVector(
            new int[] { totalmaps, mappending, maprunning, mapcomp, mapkilled, mapafails, mapakilled }));
    thevals.addRexpValue(RObjects.buildIntVector(
            new int[] { totalreds, redpending, redrunning, redcomp, redkilled, reduceafails, reduceakilled }));
    thevals.addRexpValue(ro);
    thevals.addRexpValue(errcontainer);
    thevals.addRexpValue(RObjects.makeStringVector(rj.getTrackingURL()));
    thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobname }));
    thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobfile }));
    return (thevals.build());
}

From source file:org.godhuli.rhipe.FileUtils.java

License:Apache License

public byte[] getDetailedInfoForJob(String jd) throws Exception {
    org.apache.hadoop.mapred.JobID jj = org.apache.hadoop.mapred.JobID.forName(jd);
    if (jj == null)
        throw new IOException("Jobtracker could not find jobID: " + jd);
    org.apache.hadoop.mapred.RunningJob rj = jclient.getJob(jj);
    if (rj == null)
        throw new IOException(
                "No such job: " + jd + " available, wrong job? or try the History Viewer (see the Web UI) ");
    String jobfile = rj.getJobFile();
    String jobname = rj.getJobName();
    org.apache.hadoop.mapred.Counters cc = rj.getCounters();
    long startsec = getStart(jclient, jj);
    REXP allCounters = FileUtils.buildlistFromOldCounter(cc, 0);
    int jobs = rj.getJobState();
    String jobss = null;/*from  w  ww  . j a  va 2 s.  c om*/
    if (jobs == JobStatus.FAILED)
        jobss = "FAILED";
    else if (jobs == JobStatus.KILLED)
        jobss = "KILLED";
    else if (jobs == JobStatus.PREP)
        jobss = "PREP";
    else if (jobs == JobStatus.RUNNING)
        jobss = "RUNNING";
    else if (jobs == JobStatus.SUCCEEDED)
        jobss = "SUCCEEDED";
    float mapprog = rj.mapProgress(), reduprog = rj.reduceProgress();

    REXP.Builder thevals = REXP.newBuilder();
    thevals.setRclass(REXP.RClass.LIST);
    thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobss }));
    thevals.addRexpValue(RObjects.buildDoubleVector(new double[] { startsec }));
    thevals.addRexpValue(RObjects.buildDoubleVector(new double[] { (double) mapprog, (double) reduprog }));
    thevals.addRexpValue(allCounters);
    thevals.addRexpValue(RObjects.makeStringVector(rj.getTrackingURL()));
    thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobname }));
    thevals.addRexpValue(RObjects.makeStringVector(new String[] { jobfile }));

    org.apache.hadoop.mapred.TaskReport[] maptr = jclient.getMapTaskReports(jj);
    REXP.Builder thevalsA = REXP.newBuilder();
    thevalsA.setRclass(REXP.RClass.LIST);
    for (TaskReport t : maptr) {
        thevalsA.addRexpValue(TaskReportToRexp(t));
    }
    thevals.addRexpValue(thevalsA.build());

    org.apache.hadoop.mapred.TaskReport[] redtr = jclient.getReduceTaskReports(jj);
    REXP.Builder thevalsB = REXP.newBuilder();
    thevalsB.setRclass(REXP.RClass.LIST);
    for (TaskReport t : redtr) {
        thevalsB.addRexpValue(TaskReportToRexp(t));
    }
    thevals.addRexpValue(thevalsB.build());

    return thevals.build().toByteArray();
}

From source file:org.pentaho.hadoop.mapreduce.test.TestSubmitMapReduceJob.java

License:Open Source License

@Test
public void submitJob() throws Exception {

    String[] args = { "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/input",
            "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/output" };

    JobConf conf = new JobConf();
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    File jar = new File("./test-res/pentaho-mapreduce-sample.jar");

    URLClassLoader loader = new URLClassLoader(new URL[] { jar.toURI().toURL() });

    conf.setMapperClass(//from  www . j  av a  2s  .com
            (Class<? extends Mapper>) loader.loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Map"));
    conf.setCombinerClass((Class<? extends Reducer>) loader
            .loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Reduce"));
    conf.setReducerClass((Class<? extends Reducer>) loader
            .loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Reduce"));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort);
    conf.set("mapred.job.tracker", hostname + ":" + trackerPort);

    conf.setJarByClass(loader.loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount"));
    conf.setWorkingDirectory(new Path("/tmp/wordcount"));

    JobClient jobClient = new JobClient(conf);
    ClusterStatus status = jobClient.getClusterStatus();
    assertEquals(State.RUNNING, status.getJobTrackerState());

    RunningJob runningJob = jobClient.submitJob(conf);
    System.out.print("Running " + runningJob.getJobName() + "");
    while (!runningJob.isComplete()) {
        System.out.print(".");
        Thread.sleep(500);
    }
    System.out.println();
    System.out.println("Finished " + runningJob.getJobName() + ".");

    FileObject file = fsManager.resolveFile(buildHDFSURL("/junit/wordcount/output/part-00000"));
    String output = IOUtils.toString(file.getContent().getInputStream());
    assertEquals("Bye\t1\nGoodbye\t1\nHadoop\t2\nHello\t2\nWorld\t2\n", output);
}

From source file:org.pentaho.hadoop.mapreduce.test.TransMapReduceJobTestFIXME.java

License:Open Source License

@Test
public void submitJob() throws Exception {

    String[] args = { "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/input",
            "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/output" };

    JobConf conf = new JobConf();
    conf.setJobName("wordcount");

    KettleEnvironment.init();//  www  .ja  v a  2s . co m
    TransExecutionConfiguration transExecConfig = new TransExecutionConfiguration();
    TransMeta transMeta = new TransMeta("./test-res/wordcount-mapper.ktr");
    TransConfiguration transConfig = new TransConfiguration(transMeta, transExecConfig);
    conf.set("transformation-map-xml", transConfig.getXML());

    transMeta = new TransMeta("./test-res/wordcount-reducer.ktr");
    transConfig = new TransConfiguration(transMeta, transExecConfig);
    conf.set("transformation-reduce-xml", transConfig.getXML());

    conf.set("transformation-map-input-stepname", "Injector");
    conf.set("transformation-map-output-stepname", "Output");

    conf.set("transformation-reduce-input-stepname", "Injector");
    conf.set("transformation-reduce-output-stepname", "Output");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    File jar = new File("./dist/pentaho-big-data-plugin-TRUNK-SNAPSHOT.jar");

    URLClassLoader loader = new URLClassLoader(new URL[] { jar.toURI().toURL() });

    conf.setMapperClass(
            (Class<? extends Mapper>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransMap"));
    conf.setCombinerClass(
            (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce"));
    conf.setReducerClass(
            (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce"));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort);
    conf.set("mapred.job.tracker", hostname + ":" + trackerPort);

    conf.setJar(jar.toURI().toURL().toExternalForm());
    conf.setWorkingDirectory(new Path("/tmp/wordcount"));

    JobClient jobClient = new JobClient(conf);
    ClusterStatus status = jobClient.getClusterStatus();
    assertEquals(State.RUNNING, status.getJobTrackerState());

    RunningJob runningJob = jobClient.submitJob(conf);
    System.out.print("Running " + runningJob.getJobName() + "");
    while (!runningJob.isComplete()) {
        System.out.print(".");
        Thread.sleep(500);
    }
    System.out.println();
    System.out.println("Finished " + runningJob.getJobName() + ".");

    FileObject file = fsManager.resolveFile(buildHDFSURL("/junit/wordcount/output/part-00000"));
    String output = IOUtils.toString(file.getContent().getInputStream());
    assertEquals(
            "Bye\t4\nGood\t2\nGoodbye\t1\nHadoop\t2\nHello\t5\nThis\t1\nWorld\t5\nand\t1\ncounting\t1\nextra\t1\nfor\t1\nis\t1\nsome\t1\ntext\t1\nwords\t1\n",
            output);
}