List of usage examples for org.apache.hadoop.mapreduce Job setJobName
public void setJobName(String name) throws IllegalStateException
From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.utils.URIExtractor.java
License:Apache License
/** * {@inheritDoc}//from w w w . j a v a 2 s . c om */ @Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); // set from the command line job.setJarByClass(URIExtractor.class); job.setJobName(URIExtractor.class.getName()); // mapper job.setMapperClass(URIExtractorMapper.class); job.setReducerClass(URIExtractorReducer.class); // input-output is warc job.setInputFormatClass(WARCInputFormat.class); // is necessary, so that Hadoop does not mix the map input format up. job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); // set output compression to GZip FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:de.tudarmstadt.ukp.experiments.dip.hadoop.ClueWebTRECIdFileExtractor.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); for (Map.Entry<String, String> next : job.getConfiguration()) { System.out.println(next.getKey() + ": " + next.getValue()); }/* www .jav a 2s .com*/ job.setJarByClass(ClueWebTRECIdFileExtractor.class); job.setJobName(ClueWebTRECIdFileExtractor.class.getName()); // mapper job.setMapperClass(MapperClass.class); // input job.setInputFormatClass(WARCInputFormat.class); // output job.setOutputFormatClass(WARCOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(WARCWritable.class); FileOutputFormat.setCompressOutput(job, true); // paths String commaSeparatedInputFiles = args[0]; String outputPath = args[1]; // load IDs to be searched for job.getConfiguration().set(MAPREDUCE_MAPPER_TREC_IDS, loadTrecIds(args[2])); FileInputFormat.addInputPaths(job, commaSeparatedInputFiles); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:de.tudarmstadt.ukp.experiments.dip.hadoop.OriginalURLGrep.java
License:Apache License
@Override public int run(String[] args) throws Exception { org.apache.hadoop.conf.Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); System.out.println("Other args: " + Arrays.toString(otherArgs)); Job job = Job.getInstance(); job.setJarByClass(OriginalURLGrep.class); job.setJobName(OriginalURLGrep.class.getName()); job.setMapperClass(OrigURLGrepMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); // cache file - IDs for index String idFile = args[2];/*from w w w . j a v a 2 s . com*/ System.err.println("idFile: " + idFile); job.addCacheFile(new URI(idFile + "#" + NODE_IDS)); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); String commaSeparatedInputFiles = otherArgs[0]; String outputPath = otherArgs[1]; System.err.println("commaSeparatedInputFiles: " + commaSeparatedInputFiles); System.err.println("outputPath: " + outputPath); FileInputFormat.addInputPaths(job, commaSeparatedInputFiles); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:dm_p2_mr.DM_P2_MR.java
public static void main(String[] args) throws Exception { // TODO code application logic here generateLinkedHashMap("cho.txt"); while (iter < 10) { if (iter == 0) { Configuration confg = new Configuration(); for (int i = 0; i < init_centroids.length; i++) { List<Double> exps = linkedHashMap.get(Integer.parseInt(init_centroids[i])); StringBuilder temp = new StringBuilder(); for (int k = 0; k < exps.size(); k++) { temp.append(exps.get(i)); temp.append(" "); }/*from w ww .ja v a 2 s .c o m*/ confg.set(String.valueOf(i + 1), temp.toString()); } Job job = Job.getInstance(confg); job.setJobName("mapreduce"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(kmapper.class); //conf.setCombinerClass(Reduce.class); job.setReducerClass(kreducer.class); // FileInputFormat.addInputPath(job, new Path(inputPath)); // FileOutputFormat.setOutputPath(job, new Path(outputPath)); // // job.setInputFormat(TextInputFormat.class); // conf.setOutputFormat(TextOutputFormat.class); String filePath = new File("").getAbsolutePath(); String inputPath = "/input"; String outputPath = "/output"; FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); // for (int i = 0; i < init_centroids.length; i++) { // List<Double> exps = linkedHashMap.get(Integer.parseInt(init_centroids[i])); // StringBuilder temp = new StringBuilder(); // for (int k = 0; k < exps.size(); k++) { // temp.append(exps.get(i)); // temp.append(" "); // } // conf.set(init_centroids[i], temp.toString()); // // } job.waitForCompletion(true); //JobClient.runJob(job); } else { Configuration confg = new Configuration(); FileSystem fOpen = FileSystem.get(confg); Path outputPathReduceFile = new Path("/output/part-r-00000"); BufferedReader reader = new BufferedReader(new InputStreamReader(fOpen.open(outputPathReduceFile))); String Line = reader.readLine(); while (Line != null) { String[] split = Line.split(":"); confg.set(split[0], split[1]); Line = reader.readLine(); } // for (int i = 0; i < init_centroids.length; i++) { // List<Double> exps = linkedHashMap.get(Integer.parseInt(init_centroids[i])); // StringBuilder temp = new StringBuilder(); // for (int k = 0; k < exps.size(); k++) { // temp.append(exps.get(i)); // temp.append(" "); // } // confg.set(String.valueOf(i + 1), temp.toString()); // // } Job job = Job.getInstance(confg); job.setJobName("mapreduce"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(kmapper.class); //conf.setCombinerClass(Reduce.class); job.setReducerClass(kreducer.class); // FileInputFormat.addInputPath(job, new Path(inputPath)); // FileOutputFormat.setOutputPath(job, new Path(outputPath)); // // job.setInputFormat(TextInputFormat.class); // conf.setOutputFormat(TextOutputFormat.class); String filePath = new File("").getAbsolutePath(); String inputPath = "/input"; String outputPath = "/output"; FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); // for (int i = 0; i < init_centroids.length; i++) { // List<Double> exps = linkedHashMap.get(Integer.parseInt(init_centroids[i])); // StringBuilder temp = new StringBuilder(); // for (int k = 0; k < exps.size(); k++) { // temp.append(exps.get(i)); // temp.append(" "); // } // conf.set(init_centroids[i], temp.toString()); // // } job.waitForCompletion(true); //JobClient.runJob(job); } iter++; } }
From source file:drdoobs.AggregateJob.java
public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(getClass());/*from ww w. j ava2 s . c o m*/ job.setJobName(getClass().getSimpleName()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(ProjectionMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:edu.american.student.foreman.HadoopForeman.java
License:Open Source License
/** * Creates a native hadoop job given a HadoopJobConfiguration * @param conf/*w w w .j a va 2 s . co m*/ * @return * @throws HadoopException */ @SuppressWarnings({ "unchecked", "rawtypes" }) public Job getHadoopJob(HadoopJobConfiguration conf) throws HadoopException { Job job; try { job = new Job(); job.setJobName(conf.getJobName()); job.setMapperClass(conf.getMapperClass()); job.setInputFormatClass((Class<? extends InputFormat>) conf.getInputFormatClass()); if (conf.getOutputFormatClass() != null) { job.setOutputFormatClass((Class<? extends OutputFormat>) conf.getOutputFormatClass()); } if (conf.getOutputKeyClass() != null) { job.setOutputKeyClass(conf.getOutputKeyClass()); } if (conf.getOutputValueClass() != null) { job.setOutputValueClass(conf.getOutputValueClass()); } if (conf.getReducerClass() != null) { job.setReducerClass(conf.getReducerClass()); } job.setNumReduceTasks(conf.getNumReduceTasks()); Configuration conf1 = job.getConfiguration(); if (conf.getInputFormatClass() == AccumuloInputFormat.class) { AccumuloInputFormat.setInputInfo(conf1, Constants.getAccumuloUser(), Constants.getAccumuloPassword().getBytes(), conf.getDefaultTable(), conf.getDefaultAuths()); AccumuloInputFormat.setZooKeeperInstance(conf1, Constants.getZookeeperInstanceName(), Constants.getZookeeperInstance()); } if (conf.getFetchColumns() != null) { AccumuloInputFormat.fetchColumns(conf1, conf.getFetchColumns()); } else if (conf.getInputFormatClass() == TextInputFormat.class) { if (conf.getPathToProcess() != null) { FileInputFormat.setInputPaths(job, conf.getPathToProcess()); } } if (conf.getOutputFormatClass() == AccumuloOutputFormat.class) { AccumuloOutputFormat.setOutputInfo(conf1, Constants.getAccumuloUser(), Constants.getAccumuloPassword().getBytes(), true, conf.getDefaultTable()); AccumuloOutputFormat.setZooKeeperInstance(conf1, Constants.getZookeeperInstanceName(), Constants.getZookeeperInstance()); } return job; } catch (IOException e) { String gripe = "Could not configure a Hadoop job"; log.log(Level.SEVERE, gripe, e); throw new HadoopException(gripe, e); } }
From source file:edu.american.student.mnemosyne.core.util.foreman.HadoopForeman.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) public Job getHadoopJob(HadoopJobConfiguration conf) throws HadoopException { Job job; try {// w w w .ja v a 2 s. c om job = new Job(); DistributedCache.setCacheArchives( new URI[] { new URI("/cache/accumulo-core-1.4.1.jar"), new URI("/cache/accumulo-server-1.4.1.jar"), new URI("/cache/accumulo-start-1.4.1.jar"), new URI("/cache/cloudtrace-1.4.1.jar"), new URI("/cache/commons-collections-3.2.jar"), new URI("/cache/commons-configuration-1.5.jar"), new URI("/cache/commons-io-1.4.jar"), new URI("/cache/commons-jci-core-1.0.jar"), new URI("/cache/commons-jci-fam-1.0.jar"), new URI("/cache/commons-lang-2.4.jar"), new URI("/cache/commons-logging-1.0.4.jar"), new URI("/cache/commons-logging-api-1.0.4.jar"), new URI("/cache/jline-0.9.94.jar"), new URI("/cache/libthrift-0.6.1.jar"), new URI("/cache/log4j-1.2.16.jar") }, job.getConfiguration()); job.setJobName(conf.getJobName()); System.out.println("Setting jar class " + conf.getJarClass()); ((JobConf) job.getConfiguration()).setJar("/opt/mnemosyne.jar"); job.setJarByClass(conf.getJarClass()); job.setMapperClass(conf.getMapperClass()); job.setInputFormatClass((Class<? extends InputFormat>) conf.getInputFormatClass()); if (conf.getOutputFormatClass() != null) { job.setOutputFormatClass((Class<? extends OutputFormat>) conf.getOutputFormatClass()); } if (conf.getOutputKeyClass() != null) { job.setOutputKeyClass(conf.getOutputKeyClass()); } if (conf.getOutputValueClass() != null) { job.setOutputValueClass(conf.getOutputValueClass()); } if (conf.getReducerClass() != null) { job.setReducerClass(conf.getReducerClass()); } job.setNumReduceTasks(conf.getNumReduceTasks()); Configuration conf1 = job.getConfiguration(); if (conf.getInputFormatClass() == AccumuloInputFormat.class) { AccumuloInputFormat.setInputInfo(conf1, MnemosyneConstants.getAccumuloUser(), MnemosyneConstants.getAccumuloPassword().getBytes(), conf.getDefaultTable(), conf.getDefaultAuths()); AccumuloInputFormat.setZooKeeperInstance(conf1, MnemosyneConstants.getZookeeperInstanceName(), MnemosyneConstants.getZookeeperInstance()); } if (conf.getFetchColumns() != null) { AccumuloInputFormat.fetchColumns(conf1, conf.getFetchColumns()); } else if (conf.getInputFormatClass() == TextInputFormat.class) { if (conf.getPathToProcess() != null) { FileInputFormat.setInputPaths(job, conf.getPathToProcess()); } } if (conf.getOutputFormatClass() == AccumuloOutputFormat.class) { AccumuloOutputFormat.setOutputInfo(conf1, MnemosyneConstants.getAccumuloUser(), MnemosyneConstants.getAccumuloPassword().getBytes(), true, conf.getDefaultTable()); AccumuloOutputFormat.setZooKeeperInstance(conf1, MnemosyneConstants.getZookeeperInstanceName(), MnemosyneConstants.getZookeeperInstance()); } return job; } catch (IOException e) { String gripe = "Could not configure a Hadoop job"; log.log(Level.SEVERE, gripe, e); throw new HadoopException(gripe, e); } catch (URISyntaxException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; }
From source file:edu.berkeley.chukwa_xtrace.XtrExtract.java
License:Apache License
@Override public int run(String[] arg) throws Exception { Job extractor = new Job(getConf()); extractor.setMapperClass(MapClass.class); extractor.setReducerClass(Reduce.class); extractor.setJobName("x-trace reconstructor"); extractor.setJarByClass(this.getClass()); extractor.setMapOutputKeyClass(BytesWritable.class); extractor.setMapOutputValueClass(Text.class); extractor.setOutputKeyClass(BytesWritable.class); extractor.setOutputValueClass(TextArrayWritable.class); extractor.setInputFormatClass(SequenceFileInputFormat.class); extractor.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(extractor, new Path(arg[0])); FileOutputFormat.setOutputPath(extractor, new Path(arg[1])); System.out.println("looks OK. Submitting."); extractor.submit();// www. j a v a2 s . c o m // extractor.waitForCompletion(false); return 0; }
From source file:edu.berkeley.chukwa_xtrace.XtrIndex.java
License:Apache License
@Override public int run(String[] arg) throws Exception { Job extractor = new Job(getConf()); extractor.setMapperClass(MapClass.class); //no reduce, just identity extractor.setJobName("x-trace indexer"); extractor.setJarByClass(this.getClass()); extractor.setMapOutputKeyClass(BytesWritable.class); extractor.setMapOutputValueClass(TextArrayWritable.class); extractor.setOutputKeyClass(BytesWritable.class); extractor.setOutputValueClass(TextArrayWritable.class); extractor.setInputFormatClass(SequenceFileInputFormat.class); extractor.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(extractor, new Path(arg[0])); FileOutputFormat.setOutputPath(extractor, new Path(arg[1])); System.out.println("looks OK. Submitting."); extractor.submit();// w w w. j a va 2 s . c om // extractor.waitForCompletion(false); return 0; }
From source file:edu.cooper.cloud.MultiFileWordCount.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { printUsage();/* w ww . ja v a 2 s . co m*/ return 2; } Job job = new Job(getConf()); job.setJobName("MultiFileWordCount"); job.setJarByClass(MultiFileWordCount.class); //set the InputFormat of the job to our InputFormat job.setInputFormatClass(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(IntWritable.class); //use the defined mapper job.setMapperClass(MapClass.class); //use the edu.cooper.cloud.Normalize Reducer job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }