Example usage for org.apache.hadoop.mapred JobConf setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setJobName.

Prototype

public void setJobName(String name)

Source Link

Document

Set the user-specified job name.

Usage

From source file:IndexService.IndexMergeMR.java

License:Open Source License

public static RunningJob run(String inputfiles, String outputdir, Configuration conf) {
    if (inputfiles == null || outputdir == null)
        return null;

    JobConf job = new JobConf(conf);
    job.setJobName("MergeIndexMR");
    job.setJarByClass(IndexMergeMR.class);
    job.setNumReduceTasks(1);// w  ww. j  a  v  a 2  s. c  o m
    FileSystem fs = null;
    try {
        fs = FileSystem.get(job);
        fs.delete(new Path(outputdir), true);

        String[] ifs = inputfiles.split(",");
        TreeSet<String> files = new TreeSet<String>();
        for (int i = 0; i < ifs.length; i++) {
            IFormatDataFile ifdf = new IFormatDataFile(job);
            ifdf.open(ifs[i]);
            Collection<String> strs = ifdf.fileInfo().head().getUdi().infos().values();
            for (String str : strs) {
                files.add(str);
            }
            ifdf.close();
        }
        StringBuffer sb = new StringBuffer();
        for (String str : files) {
            sb.append(str + ",");
        }
        job.set(ConstVar.HD_index_filemap, sb.substring(0, sb.length() - 1));

        IFormatDataFile ifdf = new IFormatDataFile(job);
        ifdf.open(ifs[0]);

        HashMap<Integer, IRecord.IFType> map = ifdf.fileInfo().head().fieldMap().fieldtypes();
        ArrayList<String> fieldStrings = new ArrayList<String>();

        for (int i = 0; i < map.size(); i++) {
            IRecord.IFType type = map.get(i);
            fieldStrings.add(type.type() + ConstVar.RecordSplit + type.idx());
        }

        job.setStrings(ConstVar.HD_fieldMap, fieldStrings.toArray(new String[fieldStrings.size()]));
        job.set("datafiletype", ifdf.fileInfo().head().getUdi().infos().get(123456));
        ifdf.close();
    } catch (Exception e2) {
        e2.printStackTrace();
    }

    FileInputFormat.setInputPaths(job, inputfiles);
    FileOutputFormat.setOutputPath(job, new Path(outputdir));

    job.setOutputKeyClass(IndexKey.class);
    job.setOutputValueClass(IndexValue.class);

    job.setPartitionerClass(IndexMergePartitioner.class);

    job.setMapperClass(MergeIndexMap.class);
    job.setCombinerClass(MergeIndexReduce.class);
    job.setReducerClass(MergeIndexReduce.class);

    job.setInputFormat(IndexMergeIFormatInputFormat.class);
    job.setOutputFormat(IndexMergeIFormatOutputFormat.class);

    try {
        JobClient jc = new JobClient(job);
        return jc.submitJob(job);
    } catch (IOException e) {
        e.printStackTrace();
        return null;
    }
}

From source file:IndexService.IndexMR.java

License:Open Source License

public static RunningJob run(Configuration conf2, String inputfiles, boolean column, String ids,
        String outputdir) {//from  w  w w . j  ava 2s  . c  om
    if (inputfiles == null || outputdir == null)
        return null;

    JobConf conf = new JobConf(conf2);
    conf.setJobName("IndexMR:\t" + ids);
    conf.setJarByClass(IndexMR.class);
    FileSystem fs = null;
    try {
        fs = FileSystem.get(conf);
        fs.delete(new Path(outputdir), true);
    } catch (IOException e3) {
        e3.printStackTrace();
    }

    conf.set("index.ids", ids);
    if (column) {
        conf.set("datafiletype", "column");
    } else {
        conf.set("datafiletype", "format");
    }

    String[] ifs = inputfiles.split(",");
    long wholerecnum = 0;

    String[] idxs = ids.split(",");
    String[] fieldStrings = new String[idxs.length + 2];

    if (!column) {
        IFormatDataFile ifdf;
        try {
            ifdf = new IFormatDataFile(conf);
            ifdf.open(ifs[0]);
            for (int i = 0; i < idxs.length; i++) {
                int id = Integer.parseInt(idxs[i]);
                byte type = ifdf.fileInfo().head().fieldMap().fieldtypes().get(id).type();
                fieldStrings[i] = type + ConstVar.RecordSplit + i;
            }
            ifdf.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    } else {
        try {
            IColumnDataFile icdf = new IColumnDataFile(conf);
            icdf.open(ifs[0]);
            for (int i = 0; i < idxs.length; i++) {
                int id = Integer.parseInt(idxs[i]);
                byte type = icdf.fieldtypes().get(id).type();
                fieldStrings[i] = type + ConstVar.RecordSplit + i;
            }
            icdf.close();
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    fieldStrings[fieldStrings.length - 2] = ConstVar.FieldType_Short + ConstVar.RecordSplit
            + (fieldStrings.length - 2);
    fieldStrings[fieldStrings.length - 1] = ConstVar.FieldType_Int + ConstVar.RecordSplit
            + (fieldStrings.length - 1);

    conf.setStrings(ConstVar.HD_fieldMap, fieldStrings);

    if (!column) {
        conf.set(ConstVar.HD_index_filemap, inputfiles);
        for (String file : ifs) {
            IFormatDataFile fff;
            try {
                fff = new IFormatDataFile(conf);
                fff.open(file);
                wholerecnum += fff.segIndex().recnum();
                fff.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    } else {
        HashSet<String> files = new HashSet<String>();
        for (String file : ifs) {
            files.add(file);
        }
        StringBuffer sb = new StringBuffer();
        for (String str : files) {
            sb.append(str).append(",");
        }
        conf.set(ConstVar.HD_index_filemap, sb.substring(0, sb.length() - 1));

        for (String file : files) {
            Path parent = new Path(file).getParent();
            try {
                FileStatus[] fss = fs.listStatus(parent);
                String openfile = "";
                for (FileStatus status : fss) {
                    if (status.getPath().toString().contains(file)) {
                        openfile = status.getPath().toString();
                        break;
                    }
                }
                IFormatDataFile fff = new IFormatDataFile(conf);
                fff.open(openfile);
                wholerecnum += fff.segIndex().recnum();
                fff.close();

            } catch (IOException e) {
                e.printStackTrace();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    conf.setNumReduceTasks((int) ((wholerecnum - 1) / (100000000) + 1));

    FileInputFormat.setInputPaths(conf, inputfiles);
    Path outputPath = new Path(outputdir);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.setOutputKeyClass(IndexKey.class);
    conf.setOutputValueClass(IndexValue.class);

    conf.setPartitionerClass(IndexPartitioner.class);

    conf.setMapperClass(IndexMap.class);
    conf.setCombinerClass(IndexReduce.class);
    conf.setReducerClass(IndexReduce.class);

    if (column) {
        conf.setInputFormat(IColumnInputFormat.class);
    } else {
        conf.setInputFormat(IFormatInputFormat.class);
    }
    conf.setOutputFormat(IndexIFormatOutputFormat.class);

    try {
        JobClient jc = new JobClient(conf);
        return jc.submitJob(conf);
    } catch (IOException e) {
        e.printStackTrace();
        return null;
    }
}

From source file:invertedIndex.startJob.java

public static void start(String[] args) {
    try {/*w w w.j a  va2  s .c om*/
        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("wordcount");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        lineIndexMapper Map = new lineIndexMapper();
        conf.setMapperClass(Map.getClass());

        lineIndexReducer Reduce = new lineIndexReducer();
        conf.setCombinerClass(Reduce.getClass());
        conf.setReducerClass(Reduce.getClass());

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[1]));

        Path outputDir = new Path(args[2]);

        outputDir.getFileSystem(conf).delete(outputDir, true);
        FileSystem fs = FileSystem.get(conf);
        fs.delete(outputDir, true);

        FileOutputFormat.setOutputPath(conf, outputDir);

        JobClient.runJob(conf);

    } catch (Exception Exp) {

        Exp.printStackTrace();
    }
}

From source file:io.aos.t4f.hadoop.mapred.WordCountMapReduceTest.java

License:Apache License

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker./*w w  w  . j a v a 2  s  .  com*/
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), WordCountMapReduceTest.class);
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(WordCountMapper.class);
    conf.setCombinerClass(WordCountReducer.class);
    conf.setReducerClass(WordCountReducer.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:io.aos.t4f.hadoop.mapred.WordCountTest.java

License:Apache License

private JobConf createJobConf() {
    JobConf conf = mrCluster.createJobConf();
    conf.setJobName("wordcount test");

    conf.setMapperClass(WordCountMapper.class);
    conf.setReducerClass(WordCountReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setNumMapTasks(1);//from  ww w  .j a va 2s  .  c om
    conf.setNumReduceTasks(1);
    FileInputFormat.setInputPaths(conf, input);
    FileOutputFormat.setOutputPath(conf, output);
    return conf;
}

From source file:io.bfscan.clueweb12.DumpWarcRecordsToPlainText.java

License:Apache License

/**
 * Runs this tool./*from w ww  . ja v  a 2 s .co m*/
 */
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String input = cmdline.getOptionValue(INPUT_OPTION);
    String output = cmdline.getOptionValue(OUTPUT_OPTION);

    LOG.info("Tool name: " + DumpWarcRecordsToPlainText.class.getSimpleName());
    LOG.info(" - input: " + input);
    LOG.info(" - output: " + output);

    JobConf conf = new JobConf(getConf(), DumpWarcRecordsToPlainText.class);
    conf.setJobName(DumpWarcRecordsToPlainText.class.getSimpleName() + ":" + input);

    conf.setNumReduceTasks(0);

    FileInputFormat.addInputPaths(conf, input);
    FileOutputFormat.setOutputPath(conf, new Path(output));

    conf.setInputFormat(ClueWeb12InputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setMapperClass(MyMapper.class);

    RunningJob job = JobClient.runJob(conf);
    Counters counters = job.getCounters();
    int numDocs = (int) counters.findCounter(Records.PAGES).getCounter();

    LOG.info("Read " + numDocs + " docs.");

    return 0;
}

From source file:io.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

private static void setJobName(JobConf jobConf, List<DataSegment> segments) {
    if (segments.size() == 1) {
        final DataSegment segment = segments.get(0);
        jobConf.setJobName(String.format("druid-convert-%s-%s-%s", segment.getDataSource(),
                segment.getInterval(), segment.getVersion()));
    } else {/*from   w  w  w.j  ava2  s .  c  o m*/
        final Set<String> dataSources = Sets
                .newHashSet(Iterables.transform(segments, new Function<DataSegment, String>() {
                    @Override
                    public String apply(DataSegment input) {
                        return input.getDataSource();
                    }
                }));
        final Set<String> versions = Sets
                .newHashSet(Iterables.transform(segments, new Function<DataSegment, String>() {
                    @Override
                    public String apply(DataSegment input) {
                        return input.getVersion();
                    }
                }));
        jobConf.setJobName(String.format("druid-convert-%s-%s", Arrays.toString(dataSources.toArray()),
                Arrays.toString(versions.toArray())));
    }
}

From source file:io.fluo.stress.trie.Generate.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length != 4) {
        log.error("Usage: " + this.getClass().getSimpleName()
                + " <numMappers> <numbersPerMapper> <max> <output dir>");
        System.exit(-1);/*from ww w. jav  a  2  s .c o m*/
    }

    int numMappers = Integer.parseInt(args[0]);
    int numPerMapper = Integer.parseInt(args[1]);
    long max = Long.parseLong(args[2]);
    Path out = new Path(args[3]);

    Preconditions.checkArgument(numMappers > 0, "numMappers <= 0");
    Preconditions.checkArgument(numPerMapper > 0, "numPerMapper <= 0");
    Preconditions.checkArgument(max > 0, "max <= 0");

    JobConf job = new JobConf(getConf());

    job.setJobName(this.getClass().getName());

    job.setJarByClass(Generate.class);

    job.setInt(TRIE_GEN_NUM_PER_MAPPER_PROP, numPerMapper);
    job.setInt(TRIE_GEN_NUM_MAPPERS_PROP, numMappers);
    job.setLong(TRIE_GEN_MAX_PROP, max);

    job.setInputFormat(RandomLongInputFormat.class);

    job.setNumReduceTasks(0);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, out);

    RunningJob runningJob = JobClient.runJob(job);
    runningJob.waitForCompletion();
    return runningJob.isSuccessful() ? 0 : -1;
}

From source file:io.fluo.stress.trie.NumberIngest.java

License:Apache License

public static void main(String[] args) throws IOException, ConfigurationException {

    // Parse arguments
    if (args.length != 4) {
        log.error("Usage: NumberIngest <numMappers> <numbersPerMapper> <nodeSize> <fluoProps>");
        System.exit(-1);//w ww.ja  v  a2s  . c  o  m
    }
    int numMappers = Integer.parseInt(args[0]);
    int numPerMapper = Integer.parseInt(args[1]);
    int nodeSize = Integer.parseInt(args[2]);
    String fluoPropsPath = args[3];

    String hadoopPrefix = System.getenv("HADOOP_PREFIX");
    if (hadoopPrefix == null) {
        hadoopPrefix = System.getenv("HADOOP_HOME");
        if (hadoopPrefix == null) {
            log.error("HADOOP_PREFIX or HADOOP_HOME needs to be set!");
            System.exit(-1);
        }
    }

    // create test name
    String testId = String.format("test-%d", (new Date().getTime() / 1000));
    String testDir = "/trie-stress/" + testId;

    setupHdfs(hadoopPrefix, testDir, numMappers, numPerMapper);

    JobConf ingestConf = new JobConf(NumberIngest.class);
    ingestConf.setJobName("NumberIngest");

    FluoConfiguration config = new FluoConfiguration(new File(fluoPropsPath));

    loadConfig(ingestConf, ConfigurationConverter.getProperties(config));
    ingestConf.setInt(TRIE_NODE_SIZE_PROP, nodeSize);

    ingestConf.setOutputKeyClass(LongWritable.class);
    ingestConf.setOutputValueClass(IntWritable.class);
    ingestConf.setMapperClass(NumberIngest.IngestMapper.class);
    ingestConf.setReducerClass(NumberIngest.UniqueReducer.class);

    FileInputFormat.setInputPaths(ingestConf, new Path(testDir + "/input/"));
    FileOutputFormat.setOutputPath(ingestConf, new Path(testDir + "/unique/"));

    RunningJob ingestJob = JobClient.runJob(ingestConf);
    ingestJob.waitForCompletion();
    if (ingestJob.isSuccessful()) {

        JobConf countConf = new JobConf(NumberIngest.class);
        countConf.setJobName("NumberCount");

        countConf.setOutputKeyClass(Text.class);
        countConf.setOutputValueClass(LongWritable.class);
        countConf.setMapperClass(NumberIngest.CountMapper.class);
        countConf.setReducerClass(NumberIngest.CountReducer.class);

        FileInputFormat.setInputPaths(countConf, new Path(testDir + "/unique/"));
        FileOutputFormat.setOutputPath(countConf, new Path(testDir + "/output/"));

        RunningJob countJob = JobClient.runJob(countConf);
        countJob.waitForCompletion();
        if (countJob.isSuccessful()) {
            log.info("Ingest and count jobs were successful");
            log.info("Output can be viewed @ " + testDir);
            System.exit(0);
        } else {
            log.error("Count job failed for " + testId);
        }
    } else {
        log.error("Ingest job failed.  Skipping count job for " + testId);
    }

    System.exit(-1);
}

From source file:io.fluo.stress.trie.Unique.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length < 1) {
        log.error("Usage: " + this.getClass().getSimpleName() + "<input dir>{ <input dir>}");
        System.exit(-1);/*from  w ww  . j a  v  a  2s .co  m*/
    }

    JobConf job = new JobConf(getConf());

    job.setJobName(Unique.class.getName());
    job.setJarByClass(Unique.class);

    job.setInputFormat(SequenceFileInputFormat.class);
    for (String arg : args) {
        SequenceFileInputFormat.addInputPath(job, new Path(arg));
    }

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setReducerClass(UniqueReducer.class);

    job.setOutputFormat(NullOutputFormat.class);

    RunningJob runningJob = JobClient.runJob(job);
    runningJob.waitForCompletion();
    numUnique = (int) runningJob.getCounters().getCounter(Stats.UNIQUE);

    log.debug("numUnique : " + numUnique);

    return runningJob.isSuccessful() ? 0 : -1;

}