List of usage examples for org.apache.hadoop.mapred JobConf setJobName
public void setJobName(String name)
From source file:IndexService.IndexMergeMR.java
License:Open Source License
public static RunningJob run(String inputfiles, String outputdir, Configuration conf) { if (inputfiles == null || outputdir == null) return null; JobConf job = new JobConf(conf); job.setJobName("MergeIndexMR"); job.setJarByClass(IndexMergeMR.class); job.setNumReduceTasks(1);// w ww. j a v a 2 s. c o m FileSystem fs = null; try { fs = FileSystem.get(job); fs.delete(new Path(outputdir), true); String[] ifs = inputfiles.split(","); TreeSet<String> files = new TreeSet<String>(); for (int i = 0; i < ifs.length; i++) { IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(ifs[i]); Collection<String> strs = ifdf.fileInfo().head().getUdi().infos().values(); for (String str : strs) { files.add(str); } ifdf.close(); } StringBuffer sb = new StringBuffer(); for (String str : files) { sb.append(str + ","); } job.set(ConstVar.HD_index_filemap, sb.substring(0, sb.length() - 1)); IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(ifs[0]); HashMap<Integer, IRecord.IFType> map = ifdf.fileInfo().head().fieldMap().fieldtypes(); ArrayList<String> fieldStrings = new ArrayList<String>(); for (int i = 0; i < map.size(); i++) { IRecord.IFType type = map.get(i); fieldStrings.add(type.type() + ConstVar.RecordSplit + type.idx()); } job.setStrings(ConstVar.HD_fieldMap, fieldStrings.toArray(new String[fieldStrings.size()])); job.set("datafiletype", ifdf.fileInfo().head().getUdi().infos().get(123456)); ifdf.close(); } catch (Exception e2) { e2.printStackTrace(); } FileInputFormat.setInputPaths(job, inputfiles); FileOutputFormat.setOutputPath(job, new Path(outputdir)); job.setOutputKeyClass(IndexKey.class); job.setOutputValueClass(IndexValue.class); job.setPartitionerClass(IndexMergePartitioner.class); job.setMapperClass(MergeIndexMap.class); job.setCombinerClass(MergeIndexReduce.class); job.setReducerClass(MergeIndexReduce.class); job.setInputFormat(IndexMergeIFormatInputFormat.class); job.setOutputFormat(IndexMergeIFormatOutputFormat.class); try { JobClient jc = new JobClient(job); return jc.submitJob(job); } catch (IOException e) { e.printStackTrace(); return null; } }
From source file:IndexService.IndexMR.java
License:Open Source License
public static RunningJob run(Configuration conf2, String inputfiles, boolean column, String ids, String outputdir) {//from w w w . j ava 2s . c om if (inputfiles == null || outputdir == null) return null; JobConf conf = new JobConf(conf2); conf.setJobName("IndexMR:\t" + ids); conf.setJarByClass(IndexMR.class); FileSystem fs = null; try { fs = FileSystem.get(conf); fs.delete(new Path(outputdir), true); } catch (IOException e3) { e3.printStackTrace(); } conf.set("index.ids", ids); if (column) { conf.set("datafiletype", "column"); } else { conf.set("datafiletype", "format"); } String[] ifs = inputfiles.split(","); long wholerecnum = 0; String[] idxs = ids.split(","); String[] fieldStrings = new String[idxs.length + 2]; if (!column) { IFormatDataFile ifdf; try { ifdf = new IFormatDataFile(conf); ifdf.open(ifs[0]); for (int i = 0; i < idxs.length; i++) { int id = Integer.parseInt(idxs[i]); byte type = ifdf.fileInfo().head().fieldMap().fieldtypes().get(id).type(); fieldStrings[i] = type + ConstVar.RecordSplit + i; } ifdf.close(); } catch (IOException e) { e.printStackTrace(); } } else { try { IColumnDataFile icdf = new IColumnDataFile(conf); icdf.open(ifs[0]); for (int i = 0; i < idxs.length; i++) { int id = Integer.parseInt(idxs[i]); byte type = icdf.fieldtypes().get(id).type(); fieldStrings[i] = type + ConstVar.RecordSplit + i; } icdf.close(); } catch (IOException e) { e.printStackTrace(); } } fieldStrings[fieldStrings.length - 2] = ConstVar.FieldType_Short + ConstVar.RecordSplit + (fieldStrings.length - 2); fieldStrings[fieldStrings.length - 1] = ConstVar.FieldType_Int + ConstVar.RecordSplit + (fieldStrings.length - 1); conf.setStrings(ConstVar.HD_fieldMap, fieldStrings); if (!column) { conf.set(ConstVar.HD_index_filemap, inputfiles); for (String file : ifs) { IFormatDataFile fff; try { fff = new IFormatDataFile(conf); fff.open(file); wholerecnum += fff.segIndex().recnum(); fff.close(); } catch (IOException e) { e.printStackTrace(); } } } else { HashSet<String> files = new HashSet<String>(); for (String file : ifs) { files.add(file); } StringBuffer sb = new StringBuffer(); for (String str : files) { sb.append(str).append(","); } conf.set(ConstVar.HD_index_filemap, sb.substring(0, sb.length() - 1)); for (String file : files) { Path parent = new Path(file).getParent(); try { FileStatus[] fss = fs.listStatus(parent); String openfile = ""; for (FileStatus status : fss) { if (status.getPath().toString().contains(file)) { openfile = status.getPath().toString(); break; } } IFormatDataFile fff = new IFormatDataFile(conf); fff.open(openfile); wholerecnum += fff.segIndex().recnum(); fff.close(); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } } } conf.setNumReduceTasks((int) ((wholerecnum - 1) / (100000000) + 1)); FileInputFormat.setInputPaths(conf, inputfiles); Path outputPath = new Path(outputdir); FileOutputFormat.setOutputPath(conf, outputPath); conf.setOutputKeyClass(IndexKey.class); conf.setOutputValueClass(IndexValue.class); conf.setPartitionerClass(IndexPartitioner.class); conf.setMapperClass(IndexMap.class); conf.setCombinerClass(IndexReduce.class); conf.setReducerClass(IndexReduce.class); if (column) { conf.setInputFormat(IColumnInputFormat.class); } else { conf.setInputFormat(IFormatInputFormat.class); } conf.setOutputFormat(IndexIFormatOutputFormat.class); try { JobClient jc = new JobClient(conf); return jc.submitJob(conf); } catch (IOException e) { e.printStackTrace(); return null; } }
From source file:invertedIndex.startJob.java
public static void start(String[] args) { try {/*w w w.j a va2 s .c om*/ JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); lineIndexMapper Map = new lineIndexMapper(); conf.setMapperClass(Map.getClass()); lineIndexReducer Reduce = new lineIndexReducer(); conf.setCombinerClass(Reduce.getClass()); conf.setReducerClass(Reduce.getClass()); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[1])); Path outputDir = new Path(args[2]); outputDir.getFileSystem(conf).delete(outputDir, true); FileSystem fs = FileSystem.get(conf); fs.delete(outputDir, true); FileOutputFormat.setOutputPath(conf, outputDir); JobClient.runJob(conf); } catch (Exception Exp) { Exp.printStackTrace(); } }
From source file:io.aos.t4f.hadoop.mapred.WordCountMapReduceTest.java
License:Apache License
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker./*w w w . j a v a 2 s . com*/ */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), WordCountMapReduceTest.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(WordCountMapper.class); conf.setCombinerClass(WordCountReducer.class); conf.setReducerClass(WordCountReducer.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:io.aos.t4f.hadoop.mapred.WordCountTest.java
License:Apache License
private JobConf createJobConf() { JobConf conf = mrCluster.createJobConf(); conf.setJobName("wordcount test"); conf.setMapperClass(WordCountMapper.class); conf.setReducerClass(WordCountReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setNumMapTasks(1);//from ww w .j a va 2s . c om conf.setNumReduceTasks(1); FileInputFormat.setInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, output); return conf; }
From source file:io.bfscan.clueweb12.DumpWarcRecordsToPlainText.java
License:Apache License
/** * Runs this tool./*from w ww . ja v a 2 s .co m*/ */ @SuppressWarnings("static-access") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String input = cmdline.getOptionValue(INPUT_OPTION); String output = cmdline.getOptionValue(OUTPUT_OPTION); LOG.info("Tool name: " + DumpWarcRecordsToPlainText.class.getSimpleName()); LOG.info(" - input: " + input); LOG.info(" - output: " + output); JobConf conf = new JobConf(getConf(), DumpWarcRecordsToPlainText.class); conf.setJobName(DumpWarcRecordsToPlainText.class.getSimpleName() + ":" + input); conf.setNumReduceTasks(0); FileInputFormat.addInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, new Path(output)); conf.setInputFormat(ClueWeb12InputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(MyMapper.class); RunningJob job = JobClient.runJob(conf); Counters counters = job.getCounters(); int numDocs = (int) counters.findCounter(Records.PAGES).getCounter(); LOG.info("Read " + numDocs + " docs."); return 0; }
From source file:io.druid.indexer.updater.HadoopConverterJob.java
License:Apache License
private static void setJobName(JobConf jobConf, List<DataSegment> segments) { if (segments.size() == 1) { final DataSegment segment = segments.get(0); jobConf.setJobName(String.format("druid-convert-%s-%s-%s", segment.getDataSource(), segment.getInterval(), segment.getVersion())); } else {/*from w w w.j ava2 s . c o m*/ final Set<String> dataSources = Sets .newHashSet(Iterables.transform(segments, new Function<DataSegment, String>() { @Override public String apply(DataSegment input) { return input.getDataSource(); } })); final Set<String> versions = Sets .newHashSet(Iterables.transform(segments, new Function<DataSegment, String>() { @Override public String apply(DataSegment input) { return input.getVersion(); } })); jobConf.setJobName(String.format("druid-convert-%s-%s", Arrays.toString(dataSources.toArray()), Arrays.toString(versions.toArray()))); } }
From source file:io.fluo.stress.trie.Generate.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 4) { log.error("Usage: " + this.getClass().getSimpleName() + " <numMappers> <numbersPerMapper> <max> <output dir>"); System.exit(-1);/*from ww w. jav a 2 s .c o m*/ } int numMappers = Integer.parseInt(args[0]); int numPerMapper = Integer.parseInt(args[1]); long max = Long.parseLong(args[2]); Path out = new Path(args[3]); Preconditions.checkArgument(numMappers > 0, "numMappers <= 0"); Preconditions.checkArgument(numPerMapper > 0, "numPerMapper <= 0"); Preconditions.checkArgument(max > 0, "max <= 0"); JobConf job = new JobConf(getConf()); job.setJobName(this.getClass().getName()); job.setJarByClass(Generate.class); job.setInt(TRIE_GEN_NUM_PER_MAPPER_PROP, numPerMapper); job.setInt(TRIE_GEN_NUM_MAPPERS_PROP, numMappers); job.setLong(TRIE_GEN_MAX_PROP, max); job.setInputFormat(RandomLongInputFormat.class); job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, out); RunningJob runningJob = JobClient.runJob(job); runningJob.waitForCompletion(); return runningJob.isSuccessful() ? 0 : -1; }
From source file:io.fluo.stress.trie.NumberIngest.java
License:Apache License
public static void main(String[] args) throws IOException, ConfigurationException { // Parse arguments if (args.length != 4) { log.error("Usage: NumberIngest <numMappers> <numbersPerMapper> <nodeSize> <fluoProps>"); System.exit(-1);//w ww.ja v a2s . c o m } int numMappers = Integer.parseInt(args[0]); int numPerMapper = Integer.parseInt(args[1]); int nodeSize = Integer.parseInt(args[2]); String fluoPropsPath = args[3]; String hadoopPrefix = System.getenv("HADOOP_PREFIX"); if (hadoopPrefix == null) { hadoopPrefix = System.getenv("HADOOP_HOME"); if (hadoopPrefix == null) { log.error("HADOOP_PREFIX or HADOOP_HOME needs to be set!"); System.exit(-1); } } // create test name String testId = String.format("test-%d", (new Date().getTime() / 1000)); String testDir = "/trie-stress/" + testId; setupHdfs(hadoopPrefix, testDir, numMappers, numPerMapper); JobConf ingestConf = new JobConf(NumberIngest.class); ingestConf.setJobName("NumberIngest"); FluoConfiguration config = new FluoConfiguration(new File(fluoPropsPath)); loadConfig(ingestConf, ConfigurationConverter.getProperties(config)); ingestConf.setInt(TRIE_NODE_SIZE_PROP, nodeSize); ingestConf.setOutputKeyClass(LongWritable.class); ingestConf.setOutputValueClass(IntWritable.class); ingestConf.setMapperClass(NumberIngest.IngestMapper.class); ingestConf.setReducerClass(NumberIngest.UniqueReducer.class); FileInputFormat.setInputPaths(ingestConf, new Path(testDir + "/input/")); FileOutputFormat.setOutputPath(ingestConf, new Path(testDir + "/unique/")); RunningJob ingestJob = JobClient.runJob(ingestConf); ingestJob.waitForCompletion(); if (ingestJob.isSuccessful()) { JobConf countConf = new JobConf(NumberIngest.class); countConf.setJobName("NumberCount"); countConf.setOutputKeyClass(Text.class); countConf.setOutputValueClass(LongWritable.class); countConf.setMapperClass(NumberIngest.CountMapper.class); countConf.setReducerClass(NumberIngest.CountReducer.class); FileInputFormat.setInputPaths(countConf, new Path(testDir + "/unique/")); FileOutputFormat.setOutputPath(countConf, new Path(testDir + "/output/")); RunningJob countJob = JobClient.runJob(countConf); countJob.waitForCompletion(); if (countJob.isSuccessful()) { log.info("Ingest and count jobs were successful"); log.info("Output can be viewed @ " + testDir); System.exit(0); } else { log.error("Count job failed for " + testId); } } else { log.error("Ingest job failed. Skipping count job for " + testId); } System.exit(-1); }
From source file:io.fluo.stress.trie.Unique.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 1) { log.error("Usage: " + this.getClass().getSimpleName() + "<input dir>{ <input dir>}"); System.exit(-1);/*from w ww . j a v a 2s .co m*/ } JobConf job = new JobConf(getConf()); job.setJobName(Unique.class.getName()); job.setJarByClass(Unique.class); job.setInputFormat(SequenceFileInputFormat.class); for (String arg : args) { SequenceFileInputFormat.addInputPath(job, new Path(arg)); } job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(UniqueReducer.class); job.setOutputFormat(NullOutputFormat.class); RunningJob runningJob = JobClient.runJob(job); runningJob.waitForCompletion(); numUnique = (int) runningJob.getCounters().getCounter(Stats.UNIQUE); log.debug("numUnique : " + numUnique); return runningJob.isSuccessful() ? 0 : -1; }