List of usage examples for org.apache.hadoop.mapreduce Job getTrackingURL
public String getTrackingURL()
From source file:cascading.stats.hadoop.HadoopStepStats.java
License:Open Source License
public String getStatusURL() { Job runningJob = getJob(getJobStatusClient()); if (runningJob == null) return null; return runningJob.getTrackingURL(); }
From source file:com.inmobi.conduit.distcp.tools.DistCp.java
License:Apache License
/** * Implements the core-execution. Creates the file-list for copy, * and launches the Hadoop-job, to do the copy. * @return Job handle//from ww w . j ava 2 s .com * @throws Exception, on failure. */ public Job execute() throws Exception { assert inputOptions != null; assert getConf() != null; Job job = null; try { metaFolder = createMetaFolderPath(); jobFS = metaFolder.getFileSystem(getConf()); job = createJob(); createInputFileListing(job); job.submit(); submitted = true; } finally { if (!submitted) { cleanup(); } } String jobID = getJobID(job); job.getConfiguration().set(DistCpConstants.CONF_LABEL_DISTCP_JOB_ID, jobID); LOG.info("DistCp job-id: " + jobID); LOG.info("DistCp job may be tracked at: " + job.getTrackingURL()); LOG.info("To cancel, run the following command:\thadoop job -kill " + jobID); long jobStartTime = System.nanoTime(); if (inputOptions.shouldBlock() && !job.waitForCompletion(true)) { updateJobTimeInNanos(jobStartTime); throw new IOException("DistCp failure: Job " + jobID + " has failed. "); } updateJobTimeInNanos(jobStartTime); return job; }
From source file:com.metamx.druid.indexer.DeterminePartitionsJob.java
License:Open Source License
public boolean run() { try {/* ww w.java 2 s . c o m*/ /* * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear * in the final segment. */ if (!config.getPartitionsSpec().isAssumeGrouped()) { final Job groupByJob = new Job(new Configuration(), String.format( "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals())); injectSystemProperties(groupByJob); groupByJob.setInputFormatClass(TextInputFormat.class); groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class); groupByJob.setMapOutputKeyClass(BytesWritable.class); groupByJob.setMapOutputValueClass(NullWritable.class); groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setOutputKeyClass(BytesWritable.class); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); groupByJob.setJarByClass(DeterminePartitionsJob.class); config.addInputPaths(groupByJob); config.intoConfiguration(groupByJob); FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir()); groupByJob.submit(); log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL()); if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); return false; } } else { log.info("Skipping group-by job."); } /* * Read grouped data and determine appropriate partitions. */ final Job dimSelectionJob = new Job(new Configuration(), String.format( "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals())); dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19"); injectSystemProperties(dimSelectionJob); if (!config.getPartitionsSpec().isAssumeGrouped()) { // Read grouped data from the groupByJob. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class); dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir()); } else { // Directly read the source data, since we assume it's already grouped. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class); dimSelectionJob.setInputFormatClass(TextInputFormat.class); config.addInputPaths(dimSelectionJob); } SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob); dimSelectionJob.setMapOutputValueClass(Text.class); dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class); dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class); dimSelectionJob.setOutputKeyClass(BytesWritable.class); dimSelectionJob.setOutputValueClass(Text.class); dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class); dimSelectionJob.setJarByClass(DeterminePartitionsJob.class); config.intoConfiguration(dimSelectionJob); FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath()); dimSelectionJob.submit(); log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL()); if (!dimSelectionJob.waitForCompletion(true)) { log.error("Job failed: %s", dimSelectionJob.getJobID().toString()); return false; } /* * Load partitions determined by the previous job. */ log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals()); FileSystem fileSystem = null; Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance()); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals()) { DateTime bucket = segmentGranularity.getStart(); final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0)); if (fileSystem == null) { fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration()); } if (fileSystem.exists(partitionInfoPath)) { List<ShardSpec> specs = config.jsonMapper.readValue( Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference<List<ShardSpec>>() { }); List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size()); for (int i = 0; i < specs.size(); ++i) { actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++)); log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i)); } shardSpecs.put(bucket, actualSpecs); } else { log.info("Path[%s] didn't exist!?", partitionInfoPath); } } config.setShardSpecs(shardSpecs); return true; } catch (Exception e) { throw Throwables.propagate(e); } }
From source file:com.metamx.druid.indexer.IndexGeneratorJob.java
License:Open Source License
public boolean run() { try {/*w w w . j ava 2s .c o m*/ Job job = new Job(new Configuration(), String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals())); job.getConfiguration().set("io.sort.record.percent", "0.23"); for (String propName : System.getProperties().stringPropertyNames()) { Configuration conf = job.getConfiguration(); if (propName.startsWith("hadoop.")) { conf.set(propName.substring("hadoop.".length()), System.getProperty(propName)); } } job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(IndexGeneratorMapper.class); job.setMapOutputValueClass(Text.class); SortableBytes.useSortableBytesAsMapOutputKey(job); job.setNumReduceTasks(Iterables.size(config.getAllBuckets())); job.setPartitionerClass(IndexGeneratorPartitioner.class); job.setReducerClass(IndexGeneratorReducer.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(IndexGeneratorOutputFormat.class); FileOutputFormat.setOutputPath(job, config.makeIntermediatePath()); config.addInputPaths(job); config.intoConfiguration(job); job.setJarByClass(IndexGeneratorJob.class); job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); boolean success = job.waitForCompletion(true); Counter invalidRowCount = job.getCounters() .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER); jobStats.setInvalidRowCount(invalidRowCount.getValue()); return success; } catch (Exception e) { throw new RuntimeException(e); } }
From source file:com.netflix.Aegisthus.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(Aegisthus.class); CommandLine cl = getOptions(args);/*from w w w . j a va 2 s.com*/ if (cl == null) { return 1; } job.setInputFormatClass(AegisthusInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(CassReducer.class); List<Path> paths = Lists.newArrayList(); if (cl.hasOption(OPT_INPUT)) { for (String input : cl.getOptionValues(OPT_INPUT)) { paths.add(new Path(input)); } } if (cl.hasOption(OPT_INPUTDIR)) { paths.addAll(getDataFiles(job.getConfiguration(), cl.getOptionValue(OPT_INPUTDIR))); } TextInputFormat.setInputPaths(job, paths.toArray(new Path[0])); TextOutputFormat.setOutputPath(job, new Path(cl.getOptionValue(OPT_OUTPUT))); job.submit(); System.out.println(job.getJobID()); System.out.println(job.getTrackingURL()); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.ngdata.hbaseindexer.mr.HBaseMapReduceIndexerTool.java
License:Apache License
public int run(HBaseIndexingOptions hbaseIndexingOpts, JobProcessCallback callback) throws Exception { if (hbaseIndexingOpts.isDryRun) { return new IndexerDryRun(hbaseIndexingOpts, getConf(), System.out).run(); }/*from ww w .java 2s. c o m*/ long programStartTime = System.currentTimeMillis(); Configuration conf = getConf(); IndexingSpecification indexingSpec = hbaseIndexingOpts.getIndexingSpecification(); conf.set(HBaseIndexerMapper.INDEX_COMPONENT_FACTORY_KEY, indexingSpec.getIndexerComponentFactory()); conf.set(HBaseIndexerMapper.INDEX_CONFIGURATION_CONF_KEY, new String(indexingSpec.getConfiguration(), Charsets.UTF_8)); conf.set(HBaseIndexerMapper.INDEX_NAME_CONF_KEY, indexingSpec.getIndexerName()); conf.set(HBaseIndexerMapper.TABLE_NAME_CONF_KEY, indexingSpec.getTableName()); HBaseIndexerMapper.configureIndexConnectionParams(conf, indexingSpec.getIndexConnectionParams()); IndexerComponentFactory factory = IndexerComponentFactoryUtil.getComponentFactory( indexingSpec.getIndexerComponentFactory(), new ByteArrayInputStream(indexingSpec.getConfiguration()), indexingSpec.getIndexConnectionParams()); IndexerConf indexerConf = factory.createIndexerConf(); Map<String, String> params = indexerConf.getGlobalParams(); String morphlineFile = params.get(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM); if (hbaseIndexingOpts.morphlineFile != null) { morphlineFile = hbaseIndexingOpts.morphlineFile.getPath(); } if (morphlineFile != null) { conf.set(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM, new File(morphlineFile).getName()); ForkedMapReduceIndexerTool.addDistributedCacheFile(new File(morphlineFile), conf); } String morphlineId = params.get(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM); if (hbaseIndexingOpts.morphlineId != null) { morphlineId = hbaseIndexingOpts.morphlineId; } if (morphlineId != null) { conf.set(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM, morphlineId); } conf.setBoolean(HBaseIndexerMapper.INDEX_DIRECT_WRITE_CONF_KEY, hbaseIndexingOpts.isDirectWrite()); if (hbaseIndexingOpts.fairSchedulerPool != null) { conf.set("mapred.fairscheduler.pool", hbaseIndexingOpts.fairSchedulerPool); } // switch off a false warning about allegedly not implementing Tool // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html // also see https://issues.apache.org/jira/browse/HADOOP-8183 getConf().setBoolean("mapred.used.genericoptionsparser", true); if (hbaseIndexingOpts.log4jConfigFile != null) { Utils.setLogConfigFile(hbaseIndexingOpts.log4jConfigFile, getConf()); ForkedMapReduceIndexerTool.addDistributedCacheFile(hbaseIndexingOpts.log4jConfigFile, conf); } Job job = Job.getInstance(getConf()); job.setJobName(getClass().getSimpleName() + "/" + HBaseIndexerMapper.class.getSimpleName()); job.setJarByClass(HBaseIndexerMapper.class); // job.setUserClassesTakesPrecedence(true); TableMapReduceUtil.initTableMapperJob(hbaseIndexingOpts.getScans(), HBaseIndexerMapper.class, Text.class, SolrInputDocumentWritable.class, job); // explicitely set hbase configuration on the job because the TableMapReduceUtil overwrites it with the hbase defaults // (see HBASE-4297 which is not really fixed in hbase 0.94.6 on all code paths) HBaseConfiguration.merge(job.getConfiguration(), getConf()); int mappers = new JobClient(job.getConfiguration()).getClusterStatus().getMaxMapTasks(); // MR1 //mappers = job.getCluster().getClusterStatus().getMapSlotCapacity(); // Yarn only LOG.info("Cluster reports {} mapper slots", mappers); LOG.info("Using these parameters: " + "reducers: {}, shards: {}, fanout: {}, maxSegments: {}", new Object[] { hbaseIndexingOpts.reducers, hbaseIndexingOpts.shards, hbaseIndexingOpts.fanout, hbaseIndexingOpts.maxSegments }); if (hbaseIndexingOpts.isDirectWrite()) { CloudSolrServer solrServer = new CloudSolrServer(hbaseIndexingOpts.zkHost); solrServer.setDefaultCollection(hbaseIndexingOpts.collection); if (hbaseIndexingOpts.clearIndex) { clearSolr(indexingSpec.getIndexConnectionParams()); } // Run a mapper-only MR job that sends index documents directly to a live Solr instance. job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); job.submit(); callback.jobStarted(job.getJobID().toString(), job.getTrackingURL()); if (!ForkedMapReduceIndexerTool.waitForCompletion(job, hbaseIndexingOpts.isVerbose)) { return -1; // job failed } commitSolr(indexingSpec.getIndexConnectionParams()); ForkedMapReduceIndexerTool.goodbye(job, programStartTime); return 0; } else { FileSystem fileSystem = FileSystem.get(getConf()); if (fileSystem.exists(hbaseIndexingOpts.outputDir)) { if (hbaseIndexingOpts.overwriteOutputDir) { LOG.info("Removing existing output directory {}", hbaseIndexingOpts.outputDir); if (!fileSystem.delete(hbaseIndexingOpts.outputDir, true)) { LOG.error("Deleting output directory '{}' failed", hbaseIndexingOpts.outputDir); return -1; } } else { LOG.error("Output directory '{}' already exists. Run with --overwrite-output-dir to " + "overwrite it, or remove it manually", hbaseIndexingOpts.outputDir); return -1; } } int exitCode = ForkedMapReduceIndexerTool.runIndexingPipeline(job, callback, getConf(), hbaseIndexingOpts.asOptions(), programStartTime, fileSystem, null, -1, // File-based parameters -1, // num mappers, only of importance for file-based indexing hbaseIndexingOpts.reducers); if (hbaseIndexingOpts.isGeneratedOutputDir()) { LOG.info("Deleting generated output directory " + hbaseIndexingOpts.outputDir); fileSystem.delete(hbaseIndexingOpts.outputDir, true); } return exitCode; } }
From source file:com.streamsets.pipeline.stage.destination.mapreduce.MapReduceExecutor.java
License:Apache License
@Override public void write(Batch batch) throws StageException { EvalContext eval = new EvalContext(getContext()); Iterator<Record> it = batch.getRecords(); while (it.hasNext()) { final Record record = it.next(); eval.setRecord(record);/* w ww . jav a2 s . c o m*/ Job job = null; try { // Job configuration object is a clone of the original one that we're keeping in mapReduceConfig class final Configuration jobConfiguration = new Configuration(mapReduceConfig.getConfiguration()); // Evaluate all dynamic properties and store them in the configuration job for (Map.Entry<String, String> entry : jobConfig.jobConfigs.entrySet()) { String key = eval.evaluateToString("jobConfigs", entry.getKey(), true); String value = eval.evaluateToString("jobConfigs", entry.getValue(), false); jobConfiguration.set(key, value); } // For build-in job creators, evaluate their properties and persist them in the MR config switch (jobConfig.jobType) { case AVRO_PARQUET: jobConfiguration.set(AvroConversionCommonConstants.INPUT_FILE, eval .evaluateToString("inputFile", jobConfig.avroConversionCommonConfig.inputFile, true)); jobConfiguration.set(AvroConversionCommonConstants.OUTPUT_DIR, eval.evaluateToString( "outputDirectory", jobConfig.avroConversionCommonConfig.outputDirectory, true)); jobConfiguration.setBoolean(AvroConversionCommonConstants.KEEP_INPUT_FILE, jobConfig.avroConversionCommonConfig.keepInputFile); jobConfiguration.set(AvroParquetConstants.COMPRESSION_CODEC_NAME, eval.evaluateToString( "compressionCodec", jobConfig.avroParquetConfig.compressionCodec, false)); jobConfiguration.setInt(AvroParquetConstants.ROW_GROUP_SIZE, jobConfig.avroParquetConfig.rowGroupSize); jobConfiguration.setInt(AvroParquetConstants.PAGE_SIZE, jobConfig.avroParquetConfig.pageSize); jobConfiguration.setInt(AvroParquetConstants.DICTIONARY_PAGE_SIZE, jobConfig.avroParquetConfig.dictionaryPageSize); jobConfiguration.setInt(AvroParquetConstants.MAX_PADDING_SIZE, jobConfig.avroParquetConfig.maxPaddingSize); jobConfiguration.setBoolean(AvroConversionCommonConstants.OVERWRITE_TMP_FILE, jobConfig.avroConversionCommonConfig.overwriteTmpFile); break; case AVRO_ORC: jobConfiguration.set(AvroConversionCommonConstants.INPUT_FILE, eval .evaluateToString("inputFile", jobConfig.avroConversionCommonConfig.inputFile, true)); jobConfiguration.set(AvroConversionCommonConstants.OUTPUT_DIR, eval.evaluateToString( "outputDirectory", jobConfig.avroConversionCommonConfig.outputDirectory, true)); jobConfiguration.setBoolean(AvroConversionCommonConstants.KEEP_INPUT_FILE, jobConfig.avroConversionCommonConfig.keepInputFile); jobConfiguration.setBoolean(AvroConversionCommonConstants.OVERWRITE_TMP_FILE, jobConfig.avroConversionCommonConfig.overwriteTmpFile); jobConfiguration.setInt(AvroOrcConstants.ORC_BATCH_SIZE, jobConfig.avroOrcConfig.orcBatchSize); break; case CUSTOM: // Nothing because custom is generic one that have no special config properties break; default: throw new UnsupportedOperationException("Unsupported JobType: " + jobConfig.jobType); } job = createAndSubmitJob(jobConfiguration); } catch (IOException | InterruptedException | ELEvalException e) { LOG.error("Can't submit mapreduce job", e); errorRecordHandler.onError( new OnRecordErrorException(record, MapReduceErrors.MAPREDUCE_0005, e.getMessage(), e)); } if (job != null) { MapReduceExecutorEvents.JOB_CREATED.create(getContext()).with("tracking-url", job.getTrackingURL()) .with("job-id", job.getJobID().toString()).createAndSend(); } } }
From source file:edu.umn.cs.spatialHadoop.visualization.HadoopvizServer.java
License:Open Source License
/** * Visualizes a dataset.//from ww w . ja v a2 s. c o m * @param request * @param response */ private void handleVisualize(HttpServletRequest request, HttpServletResponse response) { try { String pathStr = request.getParameter("path"); final Path path = new Path(pathStr); FileSystem fs = path.getFileSystem(commonParams); // Check if the input is already visualized final Path imagePath = new Path(path, "_data.png"); if (fs.exists(imagePath)) { // Image is already visualized response.setStatus(HttpServletResponse.SC_MOVED_PERMANENTLY); response.setHeader("Location", "/hdfs" + imagePath); } else { // This dataset has never been visualized before String shapeName = request.getParameter("shape"); final OperationsParams vizParams = new OperationsParams(commonParams); vizParams.set("shape", shapeName); vizParams.setBoolean("background", true); vizParams.setInt("width", 2000); vizParams.setInt("height", 2000); // Retrieve the owner of the data directory String owner = fs.getFileStatus(path).getOwner(); UserGroupInformation ugi = UserGroupInformation.createRemoteUser(owner); Job vizJob = ugi.doAs(new PrivilegedExceptionAction<Job>() { public Job run() throws Exception { return GeometricPlot.plot(new Path[] { path }, imagePath, vizParams); } }); // Write the response response.setStatus(HttpServletResponse.SC_OK); response.setContentType("application/json;charset=utf-8"); PrintWriter out = response.getWriter(); out.printf("{\"JobID\":\"%s\", \"TrackURL\": \"%s\"}", vizJob.getJobID().toString(), vizJob.getTrackingURL()); out.close(); } } catch (Exception e) { System.out.println("error happened"); e.printStackTrace(); try { e.printStackTrace(response.getWriter()); } catch (IOException ioe) { ioe.printStackTrace(); e.printStackTrace(); } response.setContentType("text/plain;charset=utf-8"); response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); } }
From source file:gobblin.compaction.mapreduce.MRCompactorJobRunner.java
License:Apache License
private void submitAndWait(Job job) throws ClassNotFoundException, IOException, InterruptedException { job.submit();/*from w ww.j av a2s . co m*/ MRCompactor.addRunningHadoopJob(this.dataset, job); LOG.info(String.format("MR job submitted for dataset %s, input %s, url: %s", this.dataset, getInputPaths(), job.getTrackingURL())); while (!job.isComplete()) { if (this.policy == Policy.ABORT_ASAP) { LOG.info(String.format("MR job for dataset %s, input %s killed due to input data incompleteness." + " Will try again later", this.dataset, getInputPaths())); job.killJob(); return; } Thread.sleep(MR_JOB_CHECK_COMPLETE_INTERVAL_MS); } if (!job.isSuccessful()) { throw new RuntimeException(String.format("MR job failed for topic %s, input %s, url: %s", this.dataset, getInputPaths(), job.getTrackingURL())); } }
From source file:gobblin.runtime.mapreduce.MRTask.java
License:Apache License
@Override public void run() { try {/*w ww . ja v a 2s . c o m*/ Job job = createJob(); job.submit(); this.eventSubmitter.submit(Events.MR_JOB_STARTED_EVENT, Events.JOB_URL, job.getTrackingURL()); job.waitForCompletion(false); if (job.isSuccessful()) { this.eventSubmitter.submit(Events.MR_JOB_SUCCESSFUL, Events.JOB_URL, job.getTrackingURL()); this.workingState = WorkUnitState.WorkingState.SUCCESSFUL; } else { this.eventSubmitter.submit(Events.MR_JOB_FAILED, Events.JOB_URL, job.getTrackingURL()); this.workingState = WorkUnitState.WorkingState.FAILED; } } catch (Throwable t) { log.error("Failed to run MR job.", t); this.eventSubmitter.submit(Events.MR_JOB_FAILED, Events.FAILURE_CONTEXT, t.getMessage()); this.workingState = WorkUnitState.WorkingState.FAILED; } }