List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:com.conversantmedia.mapreduce.tool.annotation.handler.AvroNamedOutputAnnotationHandler.java
License:Apache License
@Override public void process(Annotation annotation, Job job, Object target) throws ToolException { AvroNamedOutput avroOut = (AvroNamedOutput) annotation; Schema schema = getSchema(avroOut.record()); String[] names = getNames(avroOut); for (String name : names) { name = (String) evaluateExpression(name); if (!configured.contains(name)) { AvroMultipleOutputs.addNamedOutput(job, name, avroOut.format(), schema); AvroMultipleOutputs.setCountersEnabled(job, avroOut.countersEnabled()); configured.add(name);//from www . j av a 2s.co m } } AvroSerialization.addToConfiguration(job.getConfiguration()); }
From source file:com.conversantmedia.mapreduce.tool.annotation.handler.MaraAnnotationUtil.java
License:Apache License
/** * * @param job the job * @param jobField the field to retrieve annotations from * @param driver the driver bean * @param context the tool context * @throws ToolException if any issue is encountered through reflection or expression evaluation *//*from w w w . ja v a2s.c o m*/ public void configureJobFromField(Job job, Field jobField, Object driver, AnnotatedToolContext context) throws ToolException { JobInfo jobInfo = jobField.getAnnotation(JobInfo.class); String name = StringUtils.isBlank(jobInfo.value()) ? jobInfo.name() : jobInfo.value(); if (StringUtils.isBlank(name)) { name = defaultDriverIdForClass(driver.getClass()); } name = (String) ExpressionEvaluator.instance().evaluate(driver, context, name); job.setJobName(name); if (!jobInfo.numReducers().equals("-1")) { if (NumberUtils.isNumber(jobInfo.numReducers())) { job.setNumReduceTasks(Integer.valueOf(jobInfo.numReducers())); } else { Object reducerValue = ExpressionEvaluator.instance().evaluate(driver, context, jobInfo.numReducers()); if (reducerValue != null) { job.setNumReduceTasks((Integer) reducerValue); } } } // We can override (the runjob script does) which jar to use instead of using running driver class if (StringUtils.isBlank(job.getConfiguration().get("mapred.jar"))) { job.setJarByClass(driver.getClass()); } handleJobFieldAnnotations(job, jobField, jobInfo); }
From source file:com.conversantmedia.mapreduce.tool.annotation.handler.TableInputAnnotationHandler.java
License:Apache License
@Override public void process(Annotation annotation, Job job, Object target) throws ToolException { TableInput tableInput = (TableInput) annotation; // Base setup of the table mapper job Configuration conf = job.getConfiguration(); HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf)); try {//ww w. j av a 2s.co m // Add dependencies TableMapReduceUtil.addDependencyJars(job); String tableName = getTableName(tableInput); Scan scan = getScan(tableInput); job.setInputFormatClass(TableInputFormat.class); conf.set(TableInputFormat.INPUT_TABLE, tableName); conf.set(TableInputFormat.SCAN, convertScanToString(scan)); } catch (IOException e) { throw new ToolException(e); } }
From source file:com.conversantmedia.mapreduce.tool.annotation.handler.TableOutputAnnotationHandler.java
License:Apache License
@Override public void process(Annotation annotation, Job job, Object target) throws ToolException { TableOutput tableOutput = (TableOutput) annotation; // Base setup of the table job Configuration conf = job.getConfiguration(); HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf)); // Add dependencies try {//from ww w . java2s .c om TableMapReduceUtil.addDependencyJars(job); } catch (IOException e) { throw new ToolException(e); } // Set table output format job.setOutputFormatClass(TableOutputFormat.class); // Set the table name String tableName = (String) this.evaluateExpression(tableOutput.value()); job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, tableName); }
From source file:com.conversantmedia.mapreduce.tool.BaseTool.java
License:Apache License
@Override public int run(String[] args) throws Exception { final T context = newContext(); context.setContextListener(new ToolContextListener() { @Override/*from ww w .j av a 2s . c o m*/ public void afterInitOptions(Options options) throws Exception { notifyListeners(Event.AFTER_INIT_CLI_OPTIONS, context, options); } @Override public void afterParseCommandLine(CommandLine commandLine) throws Exception { notifyListeners(Event.AFTER_PARSE_CLI, context, commandLine); } }); context.setDriverClass(this.getClass()); try { // Register ourselves as a listener this.addListener(this); context.parseFromArgs(args); // Useful info logger().info(context.toString()); // Perform any specific initialization tasks notifyListeners(Event.BEFORE_INIT_DRIVER, context, null); initInternal(context); // Notify any listeners before initializing job notifyListeners(Event.BEFORE_INIT_JOB, context, null); // Initialize our job Job job = initJob(context); // We can override (the runjob script does) which jar to use instead of using running driver class if (StringUtils.isBlank(job.getConfiguration().get("mapred.jar"))) { logger().info("Setting job jar by class [" + this.getClass() + "]"); job.setJarByClass(this.getClass()); } context.setJob(job); // Post-initialization routines jobPostInit(context); if (context.isDumpConfig()) { Console.out(context.toString()); dumpConfig(job.getConfiguration()); } if (context.isDryRun()) { Console.out("Dry run only. Job will not be executed."); return 0; } // Launches the job launchJob(context, job); // Now move our input to archive if (context.getReturnCode() == 0 && context.getArchive() != null) { archiveInputs(context); } // Clean up our job cleanUp(context); notifyListeners(Event.BEFORE_EXIT, context, null); } catch (ParseException pe) { // Output a more "friendly" message context.showHelpAndExit(context.initOptions(), 1, pe.getMessage()); } catch (Exception e) { logger().error("Problem running tool: " + e.getMessage(), e); notifyListeners(Event.EXCEPTION, context, e); } return context.getReturnCode(); }
From source file:com.conversantmedia.mapreduce.tool.DistributedResourceManager.java
License:Apache License
protected DistributedResourceManager(Job job) { this.job = job; setConf(job.getConfiguration()); }
From source file:com.datasalt.pangool.flow.Step.java
License:Apache License
/** * Convenience method that can be used by Jobs for executing Pangool's {@link CoGrouper} instances. *///www .j av a 2 s . c o m public int executeCoGrouper(TupleMRBuilder coGrouper) throws IOException, TupleMRException, InterruptedException, ClassNotFoundException { Job job = coGrouper.createJob(); try { if (nReducers > 0) { job.getConfiguration().setInt("mapred.reduce.tasks", nReducers); } if (job.waitForCompletion(true)) { return 1; } return -1; } finally { coGrouper.cleanUpInstanceFiles(); } }
From source file:com.datasalt.pangool.tuplemr.avro.AvroOutputFormat.java
License:Apache License
/** Enable output compression using the deflate codec and specify its level.*/ public static void setDeflateLevel(Job job, int level) { FileOutputFormat.setCompressOutput(job, true); job.getConfiguration().setInt(DEFLATE_LEVEL_KEY, level); }
From source file:com.datasalt.pangool.tuplemr.avro.AvroOutputFormat.java
License:Apache License
/** Set the sync interval to be used by the underlying {@link DataFileWriter}.*/ public static void setSyncInterval(Job job, int syncIntervalInBytes) { job.getConfiguration().setInt(SYNC_INTERVAL_KEY, syncIntervalInBytes); }
From source file:com.datasalt.pangool.tuplemr.MapOnlyJobBuilder.java
License:Apache License
public Job createJob() throws IOException, TupleMRException, URISyntaxException { // perform a deep copy of the configuration this.conf = new Configuration(this.conf); String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat"; try {//from w w w .j av a2s . co m InstancesDistributor.distribute(outputFormat, uniqueName, conf); instanceFilesCreated.add(uniqueName); } catch (URISyntaxException e1) { throw new TupleMRException(e1); } Job job; if (jobName == null) { job = new Job(conf); } else { job = new Job(conf, jobName); } job.setNumReduceTasks(0); job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName); job.setOutputFormatClass(ProxyOutputFormat.class); if (outputKeyClass == null) { throw new TupleMRException("Output spec must be defined, use setOutput()"); } job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); FileOutputFormat.setOutputPath(job, outputPath); Input lastInput = null; for (Input input : multipleInputs.getMultiInputs()) { if (input.inputProcessor == null) { input.inputProcessor = mapOnlyMapper; if (input.inputProcessor == null) { throw new TupleMRException("Either mapOnlyMapper property or full Input spec must be set."); } } lastInput = input; } if (lastInput == null) { throw new TupleMRException("At least one input must be specified"); } job.setJarByClass((jarByClass != null) ? jarByClass : lastInput.inputProcessor.getClass()); instanceFilesCreated.addAll(multipleInputs.configureJob(job)); instanceFilesCreated.addAll(namedOutputs.configureJob(job)); return job; }