List of usage examples for org.apache.hadoop.mapreduce Job setJobName
public void setJobName(String name) throws IllegalStateException
From source file:com.moz.fiji.mapreduce.gather.FijiGatherJobBuilder.java
License:Apache License
/** {@inheritDoc} */ @Override//from w w w. j a v a 2 s.c om protected void configureJob(Job job) throws IOException { // Construct the gatherer instance. if (null == mGathererClass) { throw new JobConfigurationException("Must specify a gatherer."); } final Configuration conf = job.getConfiguration(); // Serialize the gatherer class name into the job configuration. conf.setClass(FijiConfKeys.FIJI_GATHERER_CLASS, mGathererClass, FijiGatherer.class); if ((getJobOutput() instanceof HFileMapReduceJobOutput) && (null == mReducerClass)) { mReducerClass = IdentityReducer.class; } final StringBuilder name = new StringBuilder("Fiji gather: " + mGathererClass.getSimpleName()); if (null != mReducerClass) { name.append(" / " + mReducerClass.getSimpleName()); } job.setJobName(name.toString()); mGatherer = ReflectionUtils.newInstance(mGathererClass, conf); mMapper.setConf(conf); mDataRequest = mGatherer.getDataRequest(); // Construct the combiner instance (if specified). if (null != mCombinerClass) { mCombiner = ReflectionUtils.newInstance(mCombinerClass, conf); } // Construct the reducer instance (if specified). if (null != mReducerClass) { mReducer = ReflectionUtils.newInstance(mReducerClass, conf); } // Configure the table input job (requires mGatherer, mMapper and mReducer to be set): super.configureJob(job); // Some validation: if (getJobOutput() instanceof HFileMapReduceJobOutput) { if (mReducer instanceof IdentityReducer) { Preconditions.checkState(mGatherer.getOutputKeyClass() == HFileKeyValue.class, String.format("Gatherer '%s' writing HFiles must output HFileKeyValue keys, but got '%s'", mGathererClass.getName(), mGatherer.getOutputKeyClass().getName())); Preconditions.checkState(mGatherer.getOutputValueClass() == NullWritable.class, String.format("Gatherer '%s' writing HFiles must output NullWritable values, but got '%s'", mGathererClass.getName(), mGatherer.getOutputValueClass().getName())); } Preconditions.checkState(mReducer.getOutputKeyClass() == HFileKeyValue.class, String.format("Reducer '%s' writing HFiles must output HFileKeyValue keys, but got '%s'", mReducerClass.getName(), mReducer.getOutputKeyClass().getName())); Preconditions.checkState(mReducer.getOutputValueClass() == NullWritable.class, String.format("Reducer '%s' writing HFiles must output NullWritable values, but got '%s'", mReducerClass.getName(), mReducer.getOutputValueClass().getName())); } }
From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java
License:Apache License
public Job setupJob(String jobName, Path outputFile, Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass, EntityId startKey, EntityId limitKey, FijiRowFilter filter) throws Exception { final Job job = new Job(createConfiguration()); final Configuration conf = job.getConfiguration(); // Get settings for test. final FijiDataRequest request = FijiDataRequest.builder() .addColumns(ColumnsDef.create().add("info", "name").add("info", "email")).build(); job.setJarByClass(IntegrationTestFijiTableInputFormat.class); // Setup the InputFormat. FijiTableInputFormat.configureJob(job, getFooTable().getURI(), request, startKey, limitKey, filter); job.setInputFormatClass(HBaseFijiTableInputFormat.class); // Duplicate functionality from MapReduceJobBuilder, since we are not using it here: final List<Path> jarFiles = Lists.newArrayList(); final FileSystem fs = FileSystem.getLocal(conf); for (String cpEntry : System.getProperty("java.class.path").split(":")) { if (cpEntry.endsWith(".jar")) { jarFiles.add(fs.makeQualified(new Path(cpEntry))); }/*from w ww . java 2 s .c o m*/ } DistributedCacheJars.addJarsToDistributedCache(job, jarFiles); // Create a test job. job.setJobName(jobName); // Setup the OutputFormat. TextOutputFormat.setOutputPath(job, outputFile.getParent()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); // Set the mapper class. if (null != mapperClass) { job.setMapperClass(mapperClass); } // Set the reducer class. if (null != reducerClass) { job.setReducerClass(reducerClass); } return job; }
From source file:com.moz.fiji.mapreduce.pivot.FijiPivotJobBuilder.java
License:Apache License
/** {@inheritDoc} */ @Override// w ww . ja va 2s.co m protected void configureJob(Job job) throws IOException { final Configuration conf = job.getConfiguration(); if (null == mPivoterClass) { throw new JobConfigurationException("Must specify a FijiPivoter class."); } // Serialize the pivoter class name into the job configuration. conf.setClass(FijiConfKeys.FIJI_PIVOTER_CLASS, mPivoterClass, FijiPivoter.class); // Producers should output to HFiles. mMapper = new PivoterMapper(); mReducer = new IdentityReducer<Object, Object>(); job.setJobName("FijiPivoter: " + mPivoterClass.getSimpleName()); mPivoter = ReflectionUtils.newInstance(mPivoterClass, job.getConfiguration()); mDataRequest = mPivoter.getDataRequest(); // Configure the table input job. super.configureJob(job); }
From source file:com.moz.fiji.mapreduce.produce.FijiProduceJobBuilder.java
License:Apache License
/** {@inheritDoc} */ @Override/*from w w w .ja v a2 s . c om*/ protected void configureJob(Job job) throws IOException { final Configuration conf = job.getConfiguration(); // Construct the producer instance. if (null == mProducerClass) { throw new JobConfigurationException("Must specify a producer."); } // Serialize the producer class name into the job configuration. conf.setClass(FijiConfKeys.FIJI_PRODUCER_CLASS, mProducerClass, FijiProducer.class); // Write to the table, but make sure the output table is the same as the input table. if (!getInputTableURI().equals(mJobOutput.getOutputTableURI())) { throw new JobConfigurationException("Output table must be the same as the input table."); } // Producers should output to HFiles. mMapper = new ProduceMapper(); mReducer = new IdentityReducer<Object, Object>(); job.setJobName("Fiji produce: " + mProducerClass.getSimpleName()); mProducer = ReflectionUtils.newInstance(mProducerClass, job.getConfiguration()); mDataRequest = mProducer.getDataRequest(); // Configure the table input job. super.configureJob(job); }
From source file:com.mozilla.hadoop.Backup.java
License:Apache License
/** * @param args//w ww .j av a 2 s . c o m * @return * @throws IOException * @throws ParseException */ public Job initJob(String[] args) throws IOException, ParseException { Path inputPath = null; Path loadPath = null; String outputPath = null; boolean useSpecifiedPaths = false; for (int idx = 0; idx < args.length; idx++) { if ("-f".equals(args[idx])) { useSpecifiedPaths = true; loadPath = new Path(args[++idx]); } else if (idx == args.length - 1) { outputPath = args[idx]; } else { inputPath = new Path(args[idx]); } } Path mrOutputPath = new Path(NAME + "-results"); conf.setBoolean("mapred.map.tasks.speculative.execution", false); conf.set("backup.input.path", inputPath.toString()); conf.set("backup.output.path", outputPath); FileSystem inputFs = null; FileSystem outputFs = null; Path[] inputSources = null; try { inputFs = FileSystem.get(inputPath.toUri(), new Configuration()); outputFs = FileSystem.get(getConf()); if (useSpecifiedPaths) { inputSources = createInputSources(loadPaths(outputFs, loadPath), outputFs); } else { inputSources = createInputSources(getPaths(inputFs, inputPath, 0, 2), outputFs); } } finally { checkAndClose(inputFs); checkAndClose(outputFs); } Job job = new Job(getConf()); job.setJobName(NAME); job.setJarByClass(Backup.class); job.setMapperClass(BackupMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); for (Path source : inputSources) { System.out.println("Adding input path: " + source.toString()); FileInputFormat.addInputPath(job, source); } FileOutputFormat.setOutputPath(job, mrOutputPath); return job; }
From source file:com.mozilla.socorro.hadoop.CrashCountToHbase.java
License:LGPL
/** * @param args//from w ww . java2 s .c o m * @return * @throws IOException * @throws ParseException */ public Job initJob(String[] args) throws IOException { Job job = new Job(getConf()); job.setJobName(NAME); job.setJarByClass(CrashCountToHbase.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.setMapperClass(CrashCountToHBaseMapper.class); job.setReducerClass(CrashCountToHBaseReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job; }
From source file:com.mozilla.socorro.hadoop.CrashReportJob.java
License:LGPL
/** * @param args/* w ww . jav a 2 s . c o m*/ * @return * @throws IOException * @throws ParseException */ public static Job initJob(String jobName, Configuration conf, Class<?> mainClass, Class<? extends TableMapper> mapperClass, Class<? extends Reducer> combinerClass, Class<? extends Reducer> reducerClass, Map<byte[], byte[]> columns, Class<? extends WritableComparable> keyOut, Class<? extends Writable> valueOut, Path outputPath) throws IOException, ParseException { // Set both start/end time and start/stop row Calendar startCal = Calendar.getInstance(); Calendar endCal = Calendar.getInstance(); SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); String startDateStr = conf.get(START_DATE); String endDateStr = conf.get(END_DATE); if (!StringUtils.isBlank(startDateStr)) { startCal.setTime(sdf.parse(startDateStr)); } if (!StringUtils.isBlank(endDateStr)) { endCal.setTime(sdf.parse(endDateStr)); } conf.setLong(START_TIME, startCal.getTimeInMillis()); conf.setLong(END_TIME, DateUtil.getEndTimeAtResolution(endCal.getTimeInMillis(), Calendar.DATE)); Job job = new Job(conf); job.setJobName(jobName); job.setJarByClass(mainClass); // input table configuration Scan[] scans = MultiScanTableMapReduceUtil.generateScans(startCal, endCal, columns, 100, false); MultiScanTableMapReduceUtil.initMultiScanTableMapperJob(TABLE_NAME_CRASH_REPORTS, scans, mapperClass, keyOut, valueOut, job); if (combinerClass != null) { job.setCombinerClass(combinerClass); } if (reducerClass != null) { job.setReducerClass(reducerClass); } else { job.setNumReduceTasks(0); } FileOutputFormat.setOutputPath(job, outputPath); return job; }
From source file:com.mycompany.maprpractice.runnerClass.WordCount.java
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }/*from w ww . j a va 2s . com*/ Job job = new org.apache.hadoop.mapreduce.Job(); job.setJarByClass(WordCount.class); job.setJobName("WordCounter"); String inputPath = "C:\\Users\\priyamdixit\\Desktop\\TestData\\wordCount.txt"; String outputPath = "C:\\Users\\priyamdixit\\Desktop\\TestData"; FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); // FileInputFormat.addInputPath(job, new Path(args[0])); // FileOutputFormat.setOutputPath(job, new Path(args[1])); // job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); int returnValue = job.waitForCompletion(true) ? 0 : 1; System.out.println("job.isSuccessful " + job.isSuccessful()); return returnValue; }
From source file:com.ngdata.hbaseindexer.mr.HBaseMapReduceIndexerTool.java
License:Apache License
public int run(HBaseIndexingOptions hbaseIndexingOpts, JobProcessCallback callback) throws Exception { if (hbaseIndexingOpts.isDryRun) { return new IndexerDryRun(hbaseIndexingOpts, getConf(), System.out).run(); }/*www . j a v a2 s .com*/ long programStartTime = System.currentTimeMillis(); Configuration conf = getConf(); IndexingSpecification indexingSpec = hbaseIndexingOpts.getIndexingSpecification(); conf.set(HBaseIndexerMapper.INDEX_COMPONENT_FACTORY_KEY, indexingSpec.getIndexerComponentFactory()); conf.set(HBaseIndexerMapper.INDEX_CONFIGURATION_CONF_KEY, new String(indexingSpec.getConfiguration(), Charsets.UTF_8)); conf.set(HBaseIndexerMapper.INDEX_NAME_CONF_KEY, indexingSpec.getIndexerName()); conf.set(HBaseIndexerMapper.TABLE_NAME_CONF_KEY, indexingSpec.getTableName()); HBaseIndexerMapper.configureIndexConnectionParams(conf, indexingSpec.getIndexConnectionParams()); IndexerComponentFactory factory = IndexerComponentFactoryUtil.getComponentFactory( indexingSpec.getIndexerComponentFactory(), new ByteArrayInputStream(indexingSpec.getConfiguration()), indexingSpec.getIndexConnectionParams()); IndexerConf indexerConf = factory.createIndexerConf(); Map<String, String> params = indexerConf.getGlobalParams(); String morphlineFile = params.get(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM); if (hbaseIndexingOpts.morphlineFile != null) { morphlineFile = hbaseIndexingOpts.morphlineFile.getPath(); } if (morphlineFile != null) { conf.set(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM, new File(morphlineFile).getName()); ForkedMapReduceIndexerTool.addDistributedCacheFile(new File(morphlineFile), conf); } String morphlineId = params.get(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM); if (hbaseIndexingOpts.morphlineId != null) { morphlineId = hbaseIndexingOpts.morphlineId; } if (morphlineId != null) { conf.set(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM, morphlineId); } conf.setBoolean(HBaseIndexerMapper.INDEX_DIRECT_WRITE_CONF_KEY, hbaseIndexingOpts.isDirectWrite()); if (hbaseIndexingOpts.fairSchedulerPool != null) { conf.set("mapred.fairscheduler.pool", hbaseIndexingOpts.fairSchedulerPool); } // switch off a false warning about allegedly not implementing Tool // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html // also see https://issues.apache.org/jira/browse/HADOOP-8183 getConf().setBoolean("mapred.used.genericoptionsparser", true); if (hbaseIndexingOpts.log4jConfigFile != null) { Utils.setLogConfigFile(hbaseIndexingOpts.log4jConfigFile, getConf()); ForkedMapReduceIndexerTool.addDistributedCacheFile(hbaseIndexingOpts.log4jConfigFile, conf); } Job job = Job.getInstance(getConf()); job.setJobName(getClass().getSimpleName() + "/" + HBaseIndexerMapper.class.getSimpleName()); job.setJarByClass(HBaseIndexerMapper.class); // job.setUserClassesTakesPrecedence(true); TableMapReduceUtil.initTableMapperJob(hbaseIndexingOpts.getScans(), HBaseIndexerMapper.class, Text.class, SolrInputDocumentWritable.class, job); // explicitely set hbase configuration on the job because the TableMapReduceUtil overwrites it with the hbase defaults // (see HBASE-4297 which is not really fixed in hbase 0.94.6 on all code paths) HBaseConfiguration.merge(job.getConfiguration(), getConf()); int mappers = new JobClient(job.getConfiguration()).getClusterStatus().getMaxMapTasks(); // MR1 //mappers = job.getCluster().getClusterStatus().getMapSlotCapacity(); // Yarn only LOG.info("Cluster reports {} mapper slots", mappers); LOG.info("Using these parameters: " + "reducers: {}, shards: {}, fanout: {}, maxSegments: {}", new Object[] { hbaseIndexingOpts.reducers, hbaseIndexingOpts.shards, hbaseIndexingOpts.fanout, hbaseIndexingOpts.maxSegments }); if (hbaseIndexingOpts.isDirectWrite()) { CloudSolrServer solrServer = new CloudSolrServer(hbaseIndexingOpts.zkHost); solrServer.setDefaultCollection(hbaseIndexingOpts.collection); if (hbaseIndexingOpts.clearIndex) { clearSolr(indexingSpec.getIndexConnectionParams()); } // Run a mapper-only MR job that sends index documents directly to a live Solr instance. job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); job.submit(); callback.jobStarted(job.getJobID().toString(), job.getTrackingURL()); if (!ForkedMapReduceIndexerTool.waitForCompletion(job, hbaseIndexingOpts.isVerbose)) { return -1; // job failed } commitSolr(indexingSpec.getIndexConnectionParams()); ForkedMapReduceIndexerTool.goodbye(job, programStartTime); return 0; } else { FileSystem fileSystem = FileSystem.get(getConf()); if (fileSystem.exists(hbaseIndexingOpts.outputDir)) { if (hbaseIndexingOpts.overwriteOutputDir) { LOG.info("Removing existing output directory {}", hbaseIndexingOpts.outputDir); if (!fileSystem.delete(hbaseIndexingOpts.outputDir, true)) { LOG.error("Deleting output directory '{}' failed", hbaseIndexingOpts.outputDir); return -1; } } else { LOG.error("Output directory '{}' already exists. Run with --overwrite-output-dir to " + "overwrite it, or remove it manually", hbaseIndexingOpts.outputDir); return -1; } } int exitCode = ForkedMapReduceIndexerTool.runIndexingPipeline(job, callback, getConf(), hbaseIndexingOpts.asOptions(), programStartTime, fileSystem, null, -1, // File-based parameters -1, // num mappers, only of importance for file-based indexing hbaseIndexingOpts.reducers); if (hbaseIndexingOpts.isGeneratedOutputDir()) { LOG.info("Deleting generated output directory " + hbaseIndexingOpts.outputDir); fileSystem.delete(hbaseIndexingOpts.outputDir, true); } return exitCode; } }
From source file:com.nikoo28.excel.mapreduce.ExcelDriver.java
License:Apache License
/** * Main entry point for the example./*from w w w .j a va 2s . c o m*/ * * @param args arguments * @throws Exception when something goes wrong */ public static void main(String[] args) throws Exception { logger.info("Driver started"); Job job = new Job(); job.setJarByClass(ExcelDriver.class); job.setJobName("Excel Record Reader"); job.setMapperClass(ExcelMapper.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(ExcelInputFormat.class); job.waitForCompletion(true); }