Example usage for org.apache.hadoop.mapred JobConf setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setJobName.

Prototype

public void setJobName(String name)

Source Link

Document

Set the user-specified job name.

Usage

From source file:CountHistogram.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    try {/*from   ww w  . ja  v  a  2 s  .  co m*/
        JobClient client = new JobClient();
        JobConf job = new JobConf(getConf(), CountHistogram.class);
        job.setJobName("CountHistogram");

        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(IntWritable.class);

        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        job.setInputFormat(TextInputFormat.class);
        job.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        client.setConf(job);
        JobClient.runJob(job);
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    }
    return 0;
}

From source file:adept.mapreduce.MapReduce.java

License:Apache License

public JobConf getConfiguration(String inputPath, String outputPath, String mapClass) throws Exception {
    //Configuration conf = getConf();
    Class thisclass = getClass();
    JobConf job = new JobConf(new Configuration(), thisclass);

    try {/*  www  .j a va2 s.  c o m*/
        Path in = new Path(inputPath);

        Path out = new Path(outputPath);
        FileInputFormat.setInputPaths(job, in);
        FileOutputFormat.setOutputPath(job, out);

        job.setJobName("Algorithm Map-Reduce");
        job.setMapperClass((Class<? extends Mapper>) Class.forName(mapClass));

    } catch (Exception e) {
        throw new RuntimeException("Exception occurred: " + e.getMessage());
    }

    job.setReducerClass(AdeptReducer.class);
    job.setInputFormat(KeyValueTextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.set("key.value.separator.in.input.line", "\t");

    return job;
}

From source file:adept.mapreduce.MapReduceExample.java

License:Apache License

public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    //Configuration conf = new Configuration();

    JobConf job = new JobConf(conf, MapReduceExample.class);

    Path in = new Path(args[0]);

    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);

    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("MapReduecExample");

    job.setMapperClass(MapClass.class);

    job.setReducerClass(Reduce.class);

    job.setInputFormat(KeyValueTextInputFormat.class);

    job.setOutputFormat(TextOutputFormat.class);

    job.setOutputKeyClass(Text.class);

    job.setOutputValueClass(Text.class);

    job.set("key.value.separator.in.input.line", ",");

    JobClient.runJob(job);//from   w  ww .j ava  2 s .c  om

    return 0;

}

From source file:arrestsbyyear.ArrestsByYear.java

public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    JobConf job = new JobConf(conf, ArrestsByYear.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("ArrestsByYear");
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormat(KeyValueTextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //   job.set("key.value.separator.in.input.line", "");

    JobClient.runJob(job);//from w ww. jav  a  2 s .c  om

    return 0;
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.cpu.MatrixMultiplicationCpu.java

License:Apache License

public static Configuration createMatrixMultiplicationCpuConf(Configuration initialConf, Path aPath, Path bPath,
        Path outPath, int outCardinality, boolean isDebugging) {

    JobConf conf = new JobConf(initialConf, MatrixMultiplicationCpu.class);
    conf.setJobName("MatrixMultiplicationCPU: " + aPath + " x " + bPath + " = " + outPath);

    conf.setInt(CONF_OUT_CARD, outCardinality);
    conf.setBoolean(CONF_DEBUG, isDebugging);

    conf.setInputFormat(CompositeInputFormat.class);
    conf.set("mapred.join.expr",
            CompositeInputFormat.compose("inner", SequenceFileInputFormat.class, aPath, bPath));

    conf.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(conf, outPath);

    conf.setMapperClass(MatrixMultiplyCpuMapper.class);
    conf.setCombinerClass(MatrixMultiplicationCpuReducer.class);
    conf.setReducerClass(MatrixMultiplicationCpuReducer.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(VectorWritable.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(VectorWritable.class);

    // Increase client heap size
    conf.set("mapred.child.java.opts", "-Xms8G -Xmx8G");

    return conf;//from w  w w. j a  va 2s  .co  m
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.gpu.MatrixMultiplicationGpu.java

License:Apache License

public static Configuration createMatrixMultiplicationGpuConf(Configuration initialConf, Path aPath, Path bPath,
        Path outPath, int outCardinality, int tileWidth, boolean isDebugging) {

    JobConf conf = new JobConf(initialConf, MatrixMultiplicationGpu.class);
    conf.setJobName("MatrixMultiplicationGPU: " + aPath + " x " + bPath + " = " + outPath);

    conf.setInt(CONF_OUT_CARD, outCardinality);
    conf.setInt(CONF_TILE_WIDTH, tileWidth);
    conf.setBoolean(CONF_DEBUG, isDebugging);

    conf.setInputFormat(CompositeInputFormat.class);
    conf.set("mapred.join.expr",
            CompositeInputFormat.compose("inner", SequenceFileInputFormat.class, aPath, bPath));

    conf.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(conf, outPath);

    conf.setMapperClass(MatrixMultiplyGpuMapper.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(VectorWritable.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(VectorWritable.class);

    // Increase client heap size for GPU Rootbeer execution
    conf.set("mapred.child.java.opts", "-Xms8G -Xmx8G");

    // No Reduce step is needed
    // -> 0 reducer means reduce step will be skipped and
    // mapper output will be the final out
    // -> Identity reducer means then shuffling/sorting will still take place
    conf.setNumReduceTasks(0);/*from w ww .ja va2 s  . c  o m*/

    return conf;
}

From source file:average.AverageDriver.java

public static void main(String[] args) {
    JobClient client = new JobClient();
    // Configurations for Job set in this variable
    JobConf conf = new JobConf(average.AverageDriver.class);

    // Name of the Job
    conf.setJobName("BookCrossing1.0");

    // Data type of Output Key and Value
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    // Setting the Mapper and Reducer Class
    conf.setMapperClass(average.AverageMapper.class);
    conf.setReducerClass(average.AverageReducer.class);

    // Formats of the Data Type of Input and output
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    // Specify input and output DIRECTORIES (not files)
    FileInputFormat.setInputPaths(conf, new Path(args[1]));
    FileOutputFormat.setOutputPath(conf, new Path(args[2]));

    client.setConf(conf);//  www.ja v a2  s.  c o m
    try {
        // Running the job with Configurations set in the conf.
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:averageprocessingtimesbytype.AverageProcessingTimesByType.java

public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    JobConf job = new JobConf(conf, AverageProcessingTimesByType.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("AverageProcessingTimesByType");
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormat(KeyValueTextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //   job.set("key.value.separator.in.input.line", "");

    JobClient.runJob(job);//from ww w .jav a2  s.  co  m

    return 0;
}

From source file:azkaban.jobtype.javautils.AbstractHadoopJob.java

License:Apache License

@SuppressWarnings("rawtypes")
public JobConf createJobConf(Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass)
        throws IOException, URISyntaxException {
    JobConf conf = new JobConf();
    // set custom class loader with custom find resource strategy.

    conf.setJobName(getJobName());
    conf.setMapperClass(mapperClass);//www .j  av a 2 s . c  o  m
    if (reducerClass != null) {
        conf.setReducerClass(reducerClass);
    }

    if (props.getBoolean("is.local", false)) {
        conf.set("mapred.job.tracker", "local");
        conf.set("fs.default.name", "file:///");
        conf.set("mapred.local.dir", "/tmp/map-red");

        logger.info("Running locally, no hadoop jar set.");
    } else {
        HadoopUtils.setClassLoaderAndJar(conf, getClass());
        logger.info("Setting hadoop jar file for class:" + getClass() + "  to " + conf.getJar());
        logger.info("*************************************************************************");
        logger.info(
                "          Running on Real Hadoop Cluster(" + conf.get("mapred.job.tracker") + ")           ");
        logger.info("*************************************************************************");
    }

    // set JVM options if present
    if (props.containsKey("mapred.child.java.opts")) {
        conf.set("mapred.child.java.opts", props.getString("mapred.child.java.opts"));
        logger.info("mapred.child.java.opts set to " + props.getString("mapred.child.java.opts"));
    }

    // set input and output paths if they are present
    if (props.containsKey("input.paths")) {
        List<String> inputPaths = props.getStringList("input.paths");
        if (inputPaths.size() == 0)
            throw new IllegalArgumentException("Must specify at least one value for property 'input.paths'");
        for (String path : inputPaths) {
            HadoopUtils.addAllSubPaths(conf, new Path(path));
        }
    }

    if (props.containsKey("output.path")) {
        String location = props.get("output.path");
        FileOutputFormat.setOutputPath(conf, new Path(location));

        // For testing purpose only remove output file if exists
        if (props.getBoolean("force.output.overwrite", false)) {
            FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf);
            fs.delete(FileOutputFormat.getOutputPath(conf), true);
        }
    }

    // Adds External jars to hadoop classpath
    String externalJarList = props.getString("hadoop.external.jarFiles", null);
    if (externalJarList != null) {
        FileSystem fs = FileSystem.get(conf);
        String[] jarFiles = externalJarList.split(",");
        for (String jarFile : jarFiles) {
            logger.info("Adding extenral jar File:" + jarFile);
            DistributedCache.addFileToClassPath(new Path(jarFile), conf, fs);
        }
    }

    // Adds distributed cache files
    String cacheFileList = props.getString("hadoop.cache.files", null);
    if (cacheFileList != null) {
        String[] cacheFiles = cacheFileList.split(",");
        for (String cacheFile : cacheFiles) {
            logger.info("Adding Distributed Cache File:" + cacheFile);
            DistributedCache.addCacheFile(new URI(cacheFile), conf);
        }
    }

    // Adds distributed cache files
    String archiveFileList = props.getString("hadoop.cache.archives", null);
    if (archiveFileList != null) {
        String[] archiveFiles = archiveFileList.split(",");
        for (String archiveFile : archiveFiles) {
            logger.info("Adding Distributed Cache Archive File:" + archiveFile);
            DistributedCache.addCacheArchive(new URI(archiveFile), conf);
        }
    }

    String hadoopCacheJarDir = props.getString("hdfs.default.classpath.dir", null);
    if (hadoopCacheJarDir != null) {
        FileSystem fs = FileSystem.get(conf);
        if (fs != null) {
            FileStatus[] status = fs.listStatus(new Path(hadoopCacheJarDir));

            if (status != null) {
                for (int i = 0; i < status.length; ++i) {
                    if (!status[i].isDir()) {
                        Path path = new Path(hadoopCacheJarDir, status[i].getPath().getName());
                        logger.info("Adding Jar to Distributed Cache Archive File:" + path);

                        DistributedCache.addFileToClassPath(path, conf, fs);
                    }
                }
            } else {
                logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " is empty.");
            }
        } else {
            logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " filesystem doesn't exist");
        }
    }

    for (String key : getProps().getKeySet()) {
        String lowerCase = key.toLowerCase();
        if (lowerCase.startsWith(HADOOP_PREFIX)) {
            String newKey = key.substring(HADOOP_PREFIX.length());
            conf.set(newKey, getProps().get(key));
        }
    }

    HadoopUtils.setPropsInJob(conf, getProps());

    // put in tokens
    if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) {
        conf.set(MAPREDUCE_JOB_CREDENTIALS_BINARY, System.getenv(HADOOP_TOKEN_FILE_LOCATION));
    }

    return conf;
}

From source file:babel.prep.corpus.CorpusGenerator.java

License:Apache License

/**
 * Configures a map-only dataset generation job.
 *//*w  ww.  j  a v a 2s.c  o  m*/
protected JobConf createJobConf(String crawlDir, String pagesSubDir, boolean xmlOut) throws IOException {
    JobConf job = new JobConf(getConf());
    job.setJobName("create " + (xmlOut ? "xml formatted" : "") + " dataset from " + pagesSubDir);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(CorpusGenMapper.class);
    job.setOutputFormat(xmlOut ? MultipleXMLLangFileOutputFormat.class : MultipleLangFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Page.class);

    FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDir));

    Path outDir = new Path(new Path(crawlDir, CORPUS_SUBDIR),
            "corpus." + (xmlOut ? PARAM_XML + "." : "") + getCurTimeStamp());
    m_fs.delete(outDir, true);

    FileOutputFormat.setOutputPath(job, outDir);

    setUniqueTempDir(job);

    return job;
}