List of usage examples for org.apache.hadoop.mapreduce Job getJobName
public String getJobName()
From source file:com.asakusafw.runtime.stage.optimizer.ReducerSimplifierConfigurator.java
License:Apache License
@Override public void configure(Job job) throws IOException, InterruptedException { int count = job.getNumReduceTasks(); if (count <= TASKS_TINY) { return;/*from www . ja v a 2s. co m*/ } Configuration conf = job.getConfiguration(); long limit = conf.getLong(KEY_TINY_LIMIT, -1L); if (limit < 0L) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Reducer simplifier is disabled for tiny inputs: {0}", //$NON-NLS-1$ job.getJobName())); } return; } long estimated = StageInputDriver.estimateInputSize(job); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Reducer simplifier: job={0}, tiny-limit={1}, estimated={2}", //$NON-NLS-1$ job.getJobName(), limit, estimated)); } if (estimated < 0L || estimated > limit) { return; } LOG.info(MessageFormat.format("The number of reduce task ({0}) is configured: {1}->{2}", job.getJobName(), job.getNumReduceTasks(), TASKS_TINY)); job.setNumReduceTasks(TASKS_TINY); }
From source file:com.awcoleman.ExampleJobSummaryLogWithOutput.BinRecToAvroRecDriver.java
License:Apache License
private String createTempFileAppender(Job job) throws IOException { String sep = FileSystems.getDefault().getSeparator(); //JobID may not exist yet, but JobName does since we call getInstance with name, so use JobName as prefix java.nio.file.Path temppath = Files.createTempDirectory(job.getJobName() + "_"); String fapath = temppath + sep + "joblog.log"; FileAppender fa = new FileAppender(); fa.setName("TempFileAppender_" + job.getJobName()); fa.setFile(fapath);/*from w w w. j a v a 2s .com*/ fa.setLayout(new PatternLayout("%d{ISO8601} %p %c: %m%n")); fa.setThreshold(Level.INFO); fa.setAppend(true); fa.activateOptions(); Logger.getRootLogger().addAppender(fa); //Add cleanup hooks, log file itself should be deleted by copyFromLocalFile after copy to HDFS temppath.toFile().deleteOnExit(); return fapath; }
From source file:com.awcoleman.ExampleJobSummaryLogWithOutput.BinRecToAvroRecDriver.java
License:Apache License
private boolean copyTempFileAppenderToHDFSOutpath(Job job, String fapath, String outpath) { String sep = FileSystems.getDefault().getSeparator(); try {/*from w w w . j a v a2 s.co m*/ FileSystem hdfs = FileSystem.get(job.getConfiguration()); Path localfile = new Path("file://" + fapath); Path hdfsfile = new Path(outpath + sep + "_log" + sep + "joblog.log"); //About to move job summary log to HDFS, so remove from root logger and close FileAppender fa = (FileAppender) Logger.getRootLogger() .getAppender("TempFileAppender_" + job.getJobName()); Logger.getRootLogger().removeAppender(fa); fa.close(); hdfs.copyFromLocalFile(true, false, localfile, hdfsfile); return true; } catch (IOException ioe) { logger.warn("Unable to move job summary log to HDFS.", ioe); return false; } }
From source file:com.baynote.kafka.hadoop.KafkaJobBuilderTest.java
License:Apache License
@Test public void testConfigureWholeJob() throws Exception { // base configuration builder.setZkConnect("localhost:2181"); builder.addQueueInput("queue_name", "group_name", MockMapper.class); builder.setTextFileOutputFormat("/a/hdfs/path"); // extended configuration builder.setJobName("job_name"); builder.setMapOutputKeyClass(Text.class); builder.setMapOutputValueClass(BytesWritable.class); builder.setReducerClass(MockReducer.class); builder.setTaskMemorySettings("-Xmx2048m"); builder.setNumReduceTasks(100);/* w ww . j a va 2 s . c o m*/ builder.setParitioner(MockPartitioner.class); builder.setKafkaFetchSizeBytes(1024); Job job = builder.configureJob(conf); assertEquals("job_name", job.getJobName()); assertEquals(Text.class, job.getMapOutputKeyClass()); assertEquals(BytesWritable.class, job.getMapOutputValueClass()); assertEquals(MockReducer.class, job.getReducerClass()); assertEquals(MockMapper.class, job.getMapperClass()); assertEquals("-Xmx2048m", job.getConfiguration().get("mapred.child.java.opts")); assertEquals(100, job.getNumReduceTasks()); assertEquals(MockPartitioner.class, job.getPartitionerClass()); assertEquals(1024, KafkaInputFormat.getKafkaFetchSizeBytes(job.getConfiguration())); assertEquals(TextOutputFormat.class, job.getOutputFormatClass()); assertEquals(KafkaInputFormat.class, job.getInputFormatClass()); assertEquals("file:/a/hdfs/path", TextOutputFormat.getOutputPath(job).toString()); builder.setJobName(null); builder.setSequenceFileOutputFormat(); builder.setUseLazyOutput(); builder.addQueueInput("queue_name_2", "group_name_2", MockMapper.class); job = builder.configureJob(conf); assertEquals(LazyOutputFormat.class, job.getOutputFormatClass()); assertEquals(MultipleKafkaInputFormat.class, job.getInputFormatClass()); assertEquals(DelegatingMapper.class, job.getMapperClass()); assertEquals(BytesWritable.class, job.getOutputKeyClass()); assertEquals(BytesWritable.class, job.getOutputValueClass()); assertNotNull(SequenceFileOutputFormat.getOutputPath(job)); assertNotNull(job.getJobName()); // use s3 builder.useS3("my_aws_key", "s3cr3t", "my-bucket"); builder.setTextFileOutputFormat("/a/hdfs/path"); job = builder.configureJob(conf); assertEquals("my_aws_key", job.getConfiguration().get("fs.s3n.awsAccessKeyId")); assertEquals("s3cr3t", job.getConfiguration().get("fs.s3n.awsSecretAccessKey")); assertEquals("my_aws_key", job.getConfiguration().get("fs.s3.awsAccessKeyId")); assertEquals("s3cr3t", job.getConfiguration().get("fs.s3.awsSecretAccessKey")); }
From source file:com.cloudera.accumulo.upgrade.compatibility.DataCompatibilityVerify.java
License:Open Source License
@Override public int run(String[] args) throws Exception { final String jobName = this.getClass().getName(); options.parseArgs(jobName, args);/*from www. j ava 2 s. c om*/ try { final int totalMapSlots = getConf().getInt("mapred.map.tasks", DataCompatibilityTestCli.DEFAULT_NUM_ROWS); if (-1 == options.test.numRows) { options.test.numRows = totalMapSlots; } final TableOperations ops = options.connection.getConnector().tableOperations(); final List<String> names = options.test.getTableNames(ops); int totalReduceSlots = getConf().getInt("mapred.reduce.tasks", 0); if (-1 != options.test.numReduceSlots) { totalReduceSlots = options.test.numReduceSlots; } if (0 == totalReduceSlots) { totalReduceSlots = names.size(); } final int reducesPerJob = Math.max(1, totalReduceSlots / names.size()); final List<Job> jobs = new ArrayList(); for (String name : names) { final Job job = new Job(getConf(), jobName + " " + name); job.setJarByClass(this.getClass()); options.input.useAccumuloInputFormat(job, name); job.setMapperClass(DataVerifyMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setReducerClass(LongSumReducer.class); job.setCombinerClass(LongSumReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(options.test.output, name)); job.setNumReduceTasks(reducesPerJob); job.submit(); jobs.add(job); } boolean success = true; final long numCellsPerRow = options.test.qualifiers * DataCompatibilityLoad.FAMILIES.length; final long numCellsPerFamily = options.test.qualifiers * options.test.numRows; for (Job job : jobs) { success &= job.waitForCompletion(true); final CounterGroup group = job.getCounters().getGroup(DataVerifyMapper.class.getName()); if (null == group) { log.error("Job '" + job.getJobName() + "' doesn't have counters for the verification mapper."); success = false; } else { final Counter badCounter = group.findCounter(BAD_COUNTER); if (null != badCounter && 0 < badCounter.getValue()) { log.error("Job '" + job.getJobName() + "' has " + badCounter.getValue() + " entries with bad checksums."); success = false; } int numRows = 0; int numFamilies = 0; for (Counter counter : group) { if (counter.getName().startsWith(ROW_COUNTER_PREFIX)) { numRows++; if (numCellsPerRow != counter.getValue()) { log.error("Job '" + job.getJobName() + "', counter '" + counter.getName() + "' should have " + numCellsPerRow + " cells, but instead has " + counter.getValue()); success = false; } } else if (counter.getName().startsWith(FAMILY_COUNTER_PREFIX)) { numFamilies++; if (numCellsPerFamily != counter.getValue()) { log.error("Job '" + job.getJobName() + "', counter '" + counter.getName() + "' should have " + numCellsPerFamily + " cells, but instead has " + counter.getValue()); success = false; } } } if (options.test.numRows != numRows) { log.error("Job '" + job.getJobName() + "' is supposed to have " + options.test.numRows + " rows, but has " + numRows); success = false; } if (DataCompatibilityLoad.FAMILIES.length != numFamilies) { log.error("Job '" + job.getJobName() + "' is supposed to have " + DataCompatibilityLoad.FAMILIES.length + " families, but has " + numFamilies); success = false; } } } if (success) { log.info("All internal checks passed."); } else { log.info("Some checks failed. see log."); } return success ? 0 : 1; } finally { options.input.close(); } }
From source file:com.cloudera.oryx.computation.common.DistributedGenerationRunner.java
License:Open Source License
private static Collection<String> find(String instanceDir) throws IOException, InterruptedException { Collection<String> result = Lists.newArrayList(); // This is where we will see Hadoop config problems first, so log extra info Cluster cluster;/* ww w .j a va 2s. c o m*/ try { cluster = new Cluster(OryxConfiguration.get()); } catch (IOException ioe) { log.error("Unable to init the Hadoop cluster. Check that an MR2, not MR1, cluster is configured."); throw ioe; } try { JobStatus[] statuses = cluster.getAllJobStatuses(); if (statuses != null) { for (JobStatus jobStatus : statuses) { JobStatus.State state = jobStatus.getState(); if (state == JobStatus.State.RUNNING || state == JobStatus.State.PREP) { Job job = cluster.getJob(jobStatus.getJobID()); if (job != null) { String jobName = job.getJobName(); log.info("Found running job {}", jobName); if (jobName.startsWith("Oryx-" + instanceDir + '-')) { result.add(jobName); } } } } } } finally { cluster.close(); } return result; }
From source file:com.example.Driver.java
License:Open Source License
public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Your job name"); job.setJarByClass(Driver.class); logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: " + Arrays.toString(args)); if (args.length < 2) { logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar() + " input_files output_directory"); return 1; }// ww w . j a va 2 s. co m job.setMapperClass(WordcountMapper.class); logger.info("mapper class is " + job.getMapperClass()); //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(IntWritable.class); logger.info("mapper output key class is " + job.getMapOutputKeyClass()); logger.info("mapper output value class is " + job.getMapOutputValueClass()); job.setReducerClass(WordcountReducer.class); logger.info("reducer class is " + job.getReducerClass()); job.setCombinerClass(WordcountReducer.class); logger.info("combiner class is " + job.getCombinerClass()); //When you are not runnign any Reducer //OR job.setNumReduceTasks(0); // logger.info("number of reduce task is " + job.getNumReduceTasks()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); logger.info("output key class is " + job.getOutputKeyClass()); logger.info("output value class is " + job.getOutputValueClass()); job.setInputFormatClass(TextInputFormat.class); logger.info("input format class is " + job.getInputFormatClass()); job.setOutputFormatClass(TextOutputFormat.class); logger.info("output format class is " + job.getOutputFormatClass()); Path filePath = new Path(args[0]); logger.info("input path " + filePath); FileInputFormat.setInputPaths(job, filePath); Path outputPath = new Path(args[1]); logger.info("output path " + outputPath); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); return 0; }
From source file:com.ikanow.aleph2.analytics.hadoop.services.MockHadoopTechnologyService.java
License:Apache License
/** Utility to make local jobs check-able * @param job */ protected void handleLocalJob(Job job) { _jobs.put(job.getJobName(), job); }
From source file:com.kylinolap.job.hadoop.AbstractHadoopJob.java
License:Apache License
protected int waitForCompletion(Job job) throws IOException, InterruptedException, ClassNotFoundException { int retVal = 0; long start = System.nanoTime(); if (isAsync) { job.submit();//from w w w. j av a2 s. c om } else { job.waitForCompletion(true); retVal = job.isSuccessful() ? 0 : 1; } log.debug("Job '" + job.getJobName() + "' finished " + (job.isSuccessful() ? "successfully in " : "with failures. Time taken ") + StringUtils.formatTime((System.nanoTime() - start) / 1000000L)); return retVal; }
From source file:com.linkedin.mr_kluj.GenericClojureJob.java
License:Apache License
public void run() { info("Starting " + getClass().getSimpleName()); /*** Get clojure source ***/ final String cljSource; if (props.getProperty(LI_CLJ_SOURCE) == null) { final String resourceName = props.getProperty("li.clj.source.file"); if (resourceName == null) { throw new RuntimeException( "Must define either li.clj.source or li.clj.source.file on the Props object."); }/*from w w w. j a v a 2 s .c o m*/ URL resource = getClass().getClassLoader().getResource(resourceName); if (resource == null) { // Perhaps it's a URL for a Hadoop-understood file-system try { resource = getScriptFromPath(new Configuration(), resourceName).toURI().toURL(); } catch (Exception e) { // perhaps it wasn't... } } if (resource == null) { // Maybe it's a file File theFile = new File(resourceName); if (theFile.exists()) { try { resource = theFile.toURI().toURL(); } catch (MalformedURLException e) { throw new RuntimeException("WTF?", e); } } } if (resource == null) { throw new RuntimeException( String.format("Resource[%s] does not exist on the classpath.", resourceName)); } try { cljSource = new String(getBytes(resource.openStream())); } catch (IOException e) { throw new RuntimeException(e); } props.setProperty(LI_CLJ_SOURCE, cljSource); } else { cljSource = props.getProperty(LI_CLJ_SOURCE); } final String theActualFunction = String.format( "(require '[com.linkedin.mr-kluj.job :as job])\n\n" + "%s\n" + "(map job/starter the-jobs)\n", cljSource); info("--- Source: ---"); info(theActualFunction); info(" --------- "); boolean jobCompleted; try { RT.var("clojure.core", "require").invoke(Symbol.intern("clojure.main")); Var.pushThreadBindings(RT.map(RT.var("clojure.core", "*warn-on-reflection*"), RT.T, RT.var("user", "*context*"), null, RT.var("user", "*props*"), props)); Iterable<IFn> jobs = (Iterable<IFn>) clojure.lang.Compiler.load(new StringReader(theActualFunction), "start-job-input", "clj-job"); int count = 0; for (IFn ifn : jobs) { Job job = (Job) ifn.invoke(); job.getConfiguration().set(LI_CLJ_SOURCE, cljSource); job.getConfiguration().set(LI_CLJ_JOB_INDEX, String.valueOf(count)); ByteArrayOutputStream baos = new ByteArrayOutputStream(1024 * 10); props.storeToXML(baos, null); job.getConfiguration().set(LI_CLJ_PROPERTIES, new String(baos.toByteArray())); info(String.format("Starting job %s[%s]", job.getJobID(), job.getJobName())); jobCompleted = job.waitForCompletion(true); ++count; if (!jobCompleted) { throw new RuntimeException(String.format("Job[%s] failed for some reason.", job.getJobID())); } } } catch (Exception e) { throw new RuntimeException(e); } }