Example usage for org.apache.hadoop.mapred JobConf setWorkingDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setWorkingDirectory.

Prototype

public void setWorkingDirectory(Path dir)

Source Link

Document

Set the current working directory for the default file system.

Usage

From source file:org.pentaho.hadoop.mapreduce.test.TransMapReduceJobTestFIXME.java

License:Open Source License

@Test
public void submitJob() throws Exception {

    String[] args = { "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/input",
            "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/output" };

    JobConf conf = new JobConf();
    conf.setJobName("wordcount");

    KettleEnvironment.init();// w w w.j  a  v  a2 s. c  o  m
    TransExecutionConfiguration transExecConfig = new TransExecutionConfiguration();
    TransMeta transMeta = new TransMeta("./test-res/wordcount-mapper.ktr");
    TransConfiguration transConfig = new TransConfiguration(transMeta, transExecConfig);
    conf.set("transformation-map-xml", transConfig.getXML());

    transMeta = new TransMeta("./test-res/wordcount-reducer.ktr");
    transConfig = new TransConfiguration(transMeta, transExecConfig);
    conf.set("transformation-reduce-xml", transConfig.getXML());

    conf.set("transformation-map-input-stepname", "Injector");
    conf.set("transformation-map-output-stepname", "Output");

    conf.set("transformation-reduce-input-stepname", "Injector");
    conf.set("transformation-reduce-output-stepname", "Output");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    File jar = new File("./dist/pentaho-big-data-plugin-TRUNK-SNAPSHOT.jar");

    URLClassLoader loader = new URLClassLoader(new URL[] { jar.toURI().toURL() });

    conf.setMapperClass(
            (Class<? extends Mapper>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransMap"));
    conf.setCombinerClass(
            (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce"));
    conf.setReducerClass(
            (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce"));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort);
    conf.set("mapred.job.tracker", hostname + ":" + trackerPort);

    conf.setJar(jar.toURI().toURL().toExternalForm());
    conf.setWorkingDirectory(new Path("/tmp/wordcount"));

    JobClient jobClient = new JobClient(conf);
    ClusterStatus status = jobClient.getClusterStatus();
    assertEquals(State.RUNNING, status.getJobTrackerState());

    RunningJob runningJob = jobClient.submitJob(conf);
    System.out.print("Running " + runningJob.getJobName() + "");
    while (!runningJob.isComplete()) {
        System.out.print(".");
        Thread.sleep(500);
    }
    System.out.println();
    System.out.println("Finished " + runningJob.getJobName() + ".");

    FileObject file = fsManager.resolveFile(buildHDFSURL("/junit/wordcount/output/part-00000"));
    String output = IOUtils.toString(file.getContent().getInputStream());
    assertEquals(
            "Bye\t4\nGood\t2\nGoodbye\t1\nHadoop\t2\nHello\t5\nThis\t1\nWorld\t5\nand\t1\ncounting\t1\nextra\t1\nfor\t1\nis\t1\nsome\t1\ntext\t1\nwords\t1\n",
            output);
}

From source file:org.pentaho.weblogs.WebLogs.java

License:Apache License

/**
 * The main driver for word count map/reduce program. Invoke this method to submit the map/reduce job.
 * /*from  w  w w  .j  ava2 s .  com*/
 * @throws IOException
 *           When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {

    JobConf conf = new JobConf(getConf(), WebLogs.class);
    conf.setJobName("wordcount");
    conf.set("debug", "true");
    conf.setWorkingDirectory(new Path("./"));
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    // these are set so the job is run in the same
    // JVM as the debugger - we are not submitting
    // to MR Node.
    conf.set("mapred.job.tracker", "local");
    conf.set("fs.default.name", "local");

    // The mapper, reducer and combiner classes.
    File jar = new File("./dist/pentaho-big-data-plugin-TRUNK-SNAPSHOT.jar");
    URLClassLoader loader = new URLClassLoader(new URL[] { jar.toURI().toURL() });
    conf.setMapperClass(
            (Class<? extends Mapper>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransMap"));
    // conf.setCombinerClass((Class<? extends Reducer>)
    // loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce"));
    conf.setReducerClass(
            (Class<? extends Reducer>) loader.loadClass("org.pentaho.hadoop.mapreduce.GenericTransReduce"));

    TransExecutionConfiguration transExecConfig = new TransExecutionConfiguration();

    TransMeta mapperTransMeta = new TransMeta("./samples/jobs/hadoop/weblogs-mapper.ktr");
    TransConfiguration mapperTransConfig = new TransConfiguration(mapperTransMeta, transExecConfig);
    conf.set("transformation-map-xml", mapperTransConfig.getXML());

    TransMeta reducerTransMeta = new TransMeta("./samples/jobs/hadoop/weblogs-reducer.ktr");
    TransConfiguration reducerTransConfig = new TransConfiguration(reducerTransMeta, transExecConfig);
    conf.set("transformation-reduce-xml", reducerTransConfig.getXML());

    // transformation data interface
    conf.set("transformation-map-input-stepname", "Injector");
    conf.set("transformation-map-output-stepname", "Output");
    conf.set("transformation-reduce-input-stepname", "Injector");
    conf.set("transformation-reduce-output-stepname", "Output");
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}