List of usage examples for org.apache.hadoop.mapreduce Job getJobName
public String getJobName()
From source file:com.marklogic.contentpump.ContentPump.java
License:Apache License
public static int runCommand(String[] args) throws IOException { // get command String cmd = args[0];//from ww w . ja v a 2 s . c o m if (cmd.equalsIgnoreCase("help")) { printUsage(); return 1; } else if (cmd.equalsIgnoreCase("version")) { logVersions(); return 1; } Command command = Command.forName(cmd); // get options arguments String[] optionArgs = Arrays.copyOfRange(args, 1, args.length); if (LOG.isDebugEnabled()) { LOG.debug("Command: " + command); StringBuilder buf = new StringBuilder(); for (String arg : optionArgs) { buf.append(arg); buf.append(' '); } LOG.debug("Arguments: " + buf); } // parse hadoop specific options Configuration conf = new Configuration(); GenericOptionsParser genericParser = new GenericOptionsParser(conf, optionArgs); String[] remainingArgs = genericParser.getRemainingArgs(); // parse command specific options CommandlineOptions options = new CommandlineOptions(); command.configOptions(options); CommandLineParser parser = new GnuParser(); CommandLine cmdline; try { cmdline = parser.parse(options, remainingArgs); } catch (Exception e) { LOG.error("Error parsing command arguments: "); LOG.error(e.getMessage()); // Print the command usage message and exit. command.printUsage(command, options.getPublicOptions()); return 1; // Exit on exception here. } for (String arg : cmdline.getArgs()) { LOG.error("Unrecognized argument: " + arg); // Print the command usage message and exit. command.printUsage(command, options.getPublicOptions()); return 1; // Exit on exception here. } // check running mode and hadoop conf dir configuration String mode = cmdline.getOptionValue(MODE); String hadoopConfDir = System.getenv(HADOOP_CONFDIR_ENV_NAME); if (cmdline.hasOption(HADOOP_CONF_DIR)) { hadoopConfDir = cmdline.getOptionValue(HADOOP_CONF_DIR); } boolean distributed = hadoopConfDir != null && (mode == null || mode.equals(MODE_DISTRIBUTED)); if (MODE_DISTRIBUTED.equalsIgnoreCase(mode) && !distributed) { LOG.error("Cannot run in distributed mode. HADOOP_CONF_DIR is " + "not configured."); } if (LOG.isDebugEnabled()) { LOG.debug("Running in: " + (distributed ? "distributed " : "local") + "mode"); if (distributed) { LOG.debug("HADOOP_CONF_DIR is set to " + hadoopConfDir); } } conf.set(EXECUTION_MODE, distributed ? MODE_DISTRIBUTED : MODE_LOCAL); if (distributed) { if (!cmdline.hasOption(SPLIT_INPUT) && Command.getInputType(cmdline).equals(InputType.DELIMITED_TEXT)) { conf.setBoolean(ConfigConstants.CONF_SPLIT_INPUT, true); } File hdConfDir = new File(hadoopConfDir); try { checkHadoopConfDir(hdConfDir); } catch (IllegalArgumentException e) { LOG.error("Error found with Hadoop home setting", e); System.err.println(e.getMessage()); return 1; } // set new class loader based on Hadoop Conf Dir try { setClassLoader(hdConfDir, conf); } catch (Exception e) { LOG.error("Error configuring class loader", e); System.err.println(e.getMessage()); return 1; } } else { // running in local mode // Tell Hadoop that we are running in local mode. This is useful // when the user has Hadoop home or their Hadoop conf dir in their // classpath but want to run in local mode. conf.set(CONF_MAPREDUCE_JOBTRACKER_ADDRESS, "local"); } // create job Job job = null; try { if (distributed) { // So far all jobs created by mlcp are map only, // so set number of reduce tasks to 0. conf.setInt("mapreduce.job.reduces", 0); // No speculative runs since speculative tasks don't get to // clean up sessions properly conf.setBoolean("mapreduce.map.speculative", false); } else { // set working directory conf.set(CONF_MAPREDUCE_JOB_WORKING_DIR, System.getProperty("user.dir")); } job = command.createJob(conf, cmdline); } catch (Exception e) { // Print exception message. e.printStackTrace(); return 1; } LOG.info("Job name: " + job.getJobName()); // run job try { if (distributed) { // submit job submitJob(job); } else { runJobLocally(job, cmdline, command); } return 0; } catch (Exception e) { LOG.error("Error running a ContentPump job", e); e.printStackTrace(System.err); return 1; } }
From source file:com.marklogic.contentpump.ContentPump.java
License:Apache License
private static void submitJob(Job job) throws Exception { String cpHome = System.getProperty(CONTENTPUMP_HOME_PROPERTY_NAME); // find job jar File cpHomeDir = new File(cpHome); FilenameFilter jobJarFilter = new FilenameFilter() { @Override/*from w ww . j a v a2 s . c om*/ public boolean accept(File dir, String name) { if (name.endsWith(".jar") && name.startsWith(CONTENTPUMP_JAR_PREFIX)) { return true; } else { return false; } } }; File[] cpJars = cpHomeDir.listFiles(jobJarFilter); if (cpJars == null || cpJars.length == 0) { throw new RuntimeException("Content Pump jar file " + "is not found under " + cpHome); } if (cpJars.length > 1) { throw new RuntimeException("More than one Content Pump jar file " + "are found under " + cpHome); } // set job jar Configuration conf = job.getConfiguration(); conf.set("mapreduce.job.jar", cpJars[0].toURI().toURL().toString()); // find lib jars FilenameFilter filter = new FilenameFilter() { @Override public boolean accept(File dir, String name) { if (name.endsWith(".jar") && !name.startsWith("hadoop")) { return true; } else { return false; } } }; // set lib jars StringBuilder jars = new StringBuilder(); for (File jar : cpHomeDir.listFiles(filter)) { if (jars.length() > 0) { jars.append(','); } jars.append(jar.toURI().toURL().toString()); } conf.set("tmpjars", jars.toString()); if (LOG.isTraceEnabled()) LOG.trace("LIBJARS:" + jars.toString()); job.waitForCompletion(true); AuditUtil.auditMlcpFinish(conf, job.getJobName(), job.getCounters()); }
From source file:com.marklogic.contentpump.ContentPump.java
License:Apache License
private static void runJobLocally(Job job, CommandLine cmdline, Command cmd) throws Exception { LocalJobRunner runner = new LocalJobRunner(job, cmdline, cmd); runner.run();//from ww w . j a v a 2s . co m AuditUtil.auditMlcpFinish(job.getConfiguration(), job.getJobName(), runner.getReporter().counters); }
From source file:com.metamx.druid.indexer.DeterminePartitionsJob.java
License:Open Source License
public boolean run() { try {//ww w . j a v a 2 s .co m /* * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear * in the final segment. */ if (!config.getPartitionsSpec().isAssumeGrouped()) { final Job groupByJob = new Job(new Configuration(), String.format( "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals())); injectSystemProperties(groupByJob); groupByJob.setInputFormatClass(TextInputFormat.class); groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class); groupByJob.setMapOutputKeyClass(BytesWritable.class); groupByJob.setMapOutputValueClass(NullWritable.class); groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setOutputKeyClass(BytesWritable.class); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); groupByJob.setJarByClass(DeterminePartitionsJob.class); config.addInputPaths(groupByJob); config.intoConfiguration(groupByJob); FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir()); groupByJob.submit(); log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL()); if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); return false; } } else { log.info("Skipping group-by job."); } /* * Read grouped data and determine appropriate partitions. */ final Job dimSelectionJob = new Job(new Configuration(), String.format( "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals())); dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19"); injectSystemProperties(dimSelectionJob); if (!config.getPartitionsSpec().isAssumeGrouped()) { // Read grouped data from the groupByJob. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class); dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir()); } else { // Directly read the source data, since we assume it's already grouped. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class); dimSelectionJob.setInputFormatClass(TextInputFormat.class); config.addInputPaths(dimSelectionJob); } SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob); dimSelectionJob.setMapOutputValueClass(Text.class); dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class); dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class); dimSelectionJob.setOutputKeyClass(BytesWritable.class); dimSelectionJob.setOutputValueClass(Text.class); dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class); dimSelectionJob.setJarByClass(DeterminePartitionsJob.class); config.intoConfiguration(dimSelectionJob); FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath()); dimSelectionJob.submit(); log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL()); if (!dimSelectionJob.waitForCompletion(true)) { log.error("Job failed: %s", dimSelectionJob.getJobID().toString()); return false; } /* * Load partitions determined by the previous job. */ log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals()); FileSystem fileSystem = null; Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance()); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals()) { DateTime bucket = segmentGranularity.getStart(); final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0)); if (fileSystem == null) { fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration()); } if (fileSystem.exists(partitionInfoPath)) { List<ShardSpec> specs = config.jsonMapper.readValue( Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference<List<ShardSpec>>() { }); List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size()); for (int i = 0; i < specs.size(); ++i) { actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++)); log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i)); } shardSpecs.put(bucket, actualSpecs); } else { log.info("Path[%s] didn't exist!?", partitionInfoPath); } } config.setShardSpecs(shardSpecs); return true; } catch (Exception e) { throw Throwables.propagate(e); } }
From source file:com.metamx.druid.indexer.IndexGeneratorJob.java
License:Open Source License
public boolean run() { try {/*from www . j ava 2s. c o m*/ Job job = new Job(new Configuration(), String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals())); job.getConfiguration().set("io.sort.record.percent", "0.23"); for (String propName : System.getProperties().stringPropertyNames()) { Configuration conf = job.getConfiguration(); if (propName.startsWith("hadoop.")) { conf.set(propName.substring("hadoop.".length()), System.getProperty(propName)); } } job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(IndexGeneratorMapper.class); job.setMapOutputValueClass(Text.class); SortableBytes.useSortableBytesAsMapOutputKey(job); job.setNumReduceTasks(Iterables.size(config.getAllBuckets())); job.setPartitionerClass(IndexGeneratorPartitioner.class); job.setReducerClass(IndexGeneratorReducer.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(IndexGeneratorOutputFormat.class); FileOutputFormat.setOutputPath(job, config.makeIntermediatePath()); config.addInputPaths(job); config.intoConfiguration(job); job.setJarByClass(IndexGeneratorJob.class); job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); boolean success = job.waitForCompletion(true); Counter invalidRowCount = job.getCounters() .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER); jobStats.setInvalidRowCount(invalidRowCount.getValue()); return success; } catch (Exception e) { throw new RuntimeException(e); } }
From source file:com.moz.fiji.mapreduce.framework.JobHistoryFijiTable.java
License:Apache License
/** * Writes a job into the JobHistoryFijiTable. * * @param job The job to save./* www .ja v a2 s.co m*/ * @param startTime The time the job began, in milliseconds. * @param endTime The time the job ended, in milliseconds * @throws IOException If there is an error writing to the table. */ public void recordJob(final Job job, final long startTime, final long endTime) throws IOException { recordJob(job.getJobID().toString(), job.getJobName(), startTime, endTime, job.isSuccessful(), job.getConfiguration(), getCounters(job), Collections.<String, String>emptyMap()); }
From source file:com.panguso.lc.analysis.format.Logcenter.java
License:Open Source License
@Override public int run(String[] args) throws Exception { context = new ClassPathXmlApplicationContext("applicationContext.xml"); Properties prop = context.getBean("configProperties", Properties.class); // ??//ww w . j a v a2 s. c o m // String time = new DateTime().toString("yyyyMMddHH"); // hadoop.lib=/application/format/lib/ // hadoop.conf=/application/format/conf/ // hadoop.src=/log/src/ // hadoop.dest=/log/dest/ // hadoop.archive=/log/archive/ libPath = prop.getProperty("hadoop.lib"); confPath = prop.getProperty("hadoop.conf"); srcPath = prop.getProperty("hadoop.src"); destPath = prop.getProperty("hadoop.dest"); archivePath = prop.getProperty("hadoop.archive"); Configuration conf = getConf(); logger.info("libPath=" + libPath); logger.info("confPath=" + confPath); logger.info("srcPath=" + srcPath); logger.info("destPath=" + destPath); logger.info("archivePath=" + archivePath); FileSystem fs = FileSystem.get(conf); // --jar FileStatus[] fJars = fs.listStatus(new Path(libPath)); for (FileStatus fileStatus : fJars) { String jar = libPath + fileStatus.getPath().getName(); DistributedCache.addFileToClassPath(new Path(jar), conf, FileSystem.get(conf)); } // --? FileStatus[] fProp = fs.listStatus(new Path(confPath)); for (FileStatus fileStatus : fProp) { DistributedCache.addArchiveToClassPath(new Path(confPath + fileStatus.getPath().getName()), conf, FileSystem.get(conf)); } FileStatus[] fDirs = fs.listStatus(new Path(srcPath)); if (fDirs != null && fDirs.length > 0) { for (FileStatus file : fDirs) { // dir String currentTime = file.getPath().getName(); String srcPathWithTime = srcPath + currentTime + "/"; String destPathWithTime = destPath + currentTime + "/"; String archPathWithTime = archivePath + currentTime + "/"; // ?? if (analysisService.isSuccessful(currentTime)) { continue; } // ??job? // fs.delete(new Path(destPathWithTime), true); // ? // if (!fs.exists(new Path(srcPathWithTime))) { // logger.warn("outPath does not exist,inputPath=" + // srcPathWithTime); // analysisService.saveFailureJob(job.getJobName(), // currentTime); // return -1; // } // ?classpath";"":" Job job = new Job(conf); String jars = job.getConfiguration().get("mapred.job.classpath.files"); job.getConfiguration().set("mapred.job.classpath.files", jars.replace(";", ":")); logger.info("current dir=" + currentTime); job.setJobName("format_" + currentTime); job.setJarByClass(Logcenter.class); job.setMapperClass(FormatAnalysisMapper.class); job.setReducerClass(FormatAnalysisReducer.class); job.setCombinerClass(FormatAnalysisReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); // job.setNumReduceTasks(0); // //??reduce????namenode FileInputFormat.addInputPath(job, new Path(srcPathWithTime)); FileOutputFormat.setOutputPath(job, new Path(destPathWithTime)); // ? boolean result = false; try { result = job.waitForCompletion(true); } catch (FileAlreadyExistsException e) { logger.warn(e.getMessage(), e); } if (!result) { logger.warn("job execute failure!"); analysisService.saveFailureJob(job.getJobName(), currentTime); continue; // return -1; } // , fs.delete(new Path(archPathWithTime), true); fs.rename(new Path(srcPathWithTime), new Path(archPathWithTime)); analysisService.saveSuccessJob(job.getJobName(), currentTime); } } FileSystem.closeAll(); return 0; }
From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.MapReduceUtils.java
License:LGPL
/** * Wait the completion of a job./*from ww w . j a va2 s .c o m*/ * @param job the job to submit * @param jobDescription the description of the job * @param waitTimeInMillis waiting time between 2 checks of the completion of * jobs * @param status step status * @param counterGroup group of the counter to log * @throws EoulsanException if the job fail or if an exception occurs while * submitting or waiting the end of the job */ public static void submitAndWaitForJob(final Job job, final String jobDescription, final int waitTimeInMillis, final TaskStatus status, final String counterGroup) throws EoulsanException { if (job == null) { throw new NullPointerException("The job is null"); } if (jobDescription == null) { throw new NullPointerException("The jobDescription is null"); } try { // Set the description of the context status.setDescription(job.getJobName()); // Submit the job job.submit(); // Add the Hadoop job to the list of job to kill if workflow fails HadoopJobEmergencyStopTask.addHadoopJobEmergencyStopTask(job); // Job the completion of the job (non verbose mode) job.waitForCompletion(false); // Remove the Hadoop job to the list of job to kill if workflow fails HadoopJobEmergencyStopTask.removeHadoopJobEmergencyStopTask(job); // Check if the job has been successfully executed if (!job.isSuccessful()) { status.setProgressMessage("FAILED"); throw new EoulsanException("Fail of the Hadoop job: " + job.getJobFile()); } // Set the counters status.setCounters(new HadoopReporter(job.getCounters()), counterGroup); } catch (ClassNotFoundException | InterruptedException | IOException e) { throw new EoulsanException(e); } }
From source file:gov.nasa.jpl.memex.pooledtimeseries.MeanChiSquareDistanceCalculation.java
License:Apache License
public static void main(String[] args) throws Exception { System.loadLibrary(Core.NATIVE_LIBRARY_NAME); Configuration baseConf = new Configuration(); baseConf.set("mapreduce.job.maps", "96"); baseConf.set("mapred.tasktracker.map.tasks.maximum", "96"); JobConf conf = new JobConf(); System.out.println("Before Map:" + conf.getNumMapTasks()); conf.setNumMapTasks(96);/*from ww w . j a va 2s . c om*/ System.out.println("After Map:" + conf.getNumMapTasks()); Job job = Job.getInstance(baseConf); job.setJarByClass(MeanChiSquareDistanceCalculation.class); job.setJobName("mean_chi_square_calculation"); System.out.println("Job ID" + job.getJobID()); System.out.println("Track:" + baseConf.get("mapred.job.tracker")); System.out.println("Job Name" + job.getJobName()); System.out.println(baseConf.get("mapreduce.job.maps")); System.out.println("Caching video-metric-bak.tgz"); job.addCacheArchive(new URI("/user/pts/video-metric-bak.tgz")); URI[] cacheFiles = job.getCacheFiles(); if (cacheFiles != null && cacheFiles.length > 0) { System.out.println("Cache file ->" + cacheFiles[0]); } System.out.println("Cached video-metric-bak.tgz"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DoubleWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(DoubleWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.waitForCompletion(true); }
From source file:gov.nasa.jpl.memex.pooledtimeseries.SimilarityCalculation.java
License:Apache License
public static void main(String[] args) throws Exception { System.loadLibrary(Core.NATIVE_LIBRARY_NAME); Configuration baseConf = new Configuration(); baseConf.set("mapreduce.job.maps", "96"); baseConf.set("mapreduce.job.reduces", "0"); baseConf.set("mapred.tasktracker.map.tasks.maximum", "96"); baseConf.set("meanDistsFilePath", args[2]); JobConf conf = new JobConf(); System.out.println("Before Map:" + conf.getNumMapTasks()); conf.setNumMapTasks(196);//w w w .j a v a2 s . c o m System.out.println("After Map:" + conf.getNumMapTasks()); Job job = Job.getInstance(baseConf); System.out.println("Track: " + baseConf.get("mapred.job.tracker")); System.out.println("Job ID" + job.getJobID()); System.out.println("Job Name" + job.getJobName()); System.out.println(baseConf.get("mapreduce.job.maps")); job.setJarByClass(SimilarityCalculation.class); job.setJobName("similarity_calc"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(Map.class); job.waitForCompletion(true); }