List of usage examples for org.apache.hadoop.mapreduce Job getJobID
public JobID getJobID()
From source file:com.inmobi.conduit.distcp.DistcpBaseService.java
License:Apache License
protected Boolean executeDistCp(String serviceName, Map<String, FileStatus> fileListingMap, Path targetPath) throws Exception { //Add Additional Default arguments to the array below which gets merged //with the arguments as sent in by the Derived Service Configuration conf = currentCluster.getHadoopConf(); conf.set(ConduitConstants.AUDIT_ENABLED_KEY, System.getProperty(ConduitConstants.AUDIT_ENABLED_KEY)); conf.set("mapred.job.name", serviceName); conf.set("tmpjars", auditUtilJarDestPath.toString()); // The first argument 'sourceFileListing' to DistCpOptions is not needed now // since ConduitDistCp writes listing file using fileListingMap instead of // relying on sourceFileListing path. Passing a dummy value. DistCpOptions options = new DistCpOptions(new Path("/tmp"), targetPath); options.setOutPutDirectory(tmpCounterOutputPath); DistCp distCp = new ConduitDistCp(conf, options, fileListingMap); try {/*from w w w.ja v a2 s.co m*/ Job job = distCp.execute(); long jobExecutionTimeInSecs = (distCp.getJobTimeInNanos() / NANO_SECONDS_IN_SECOND); LOG.info("Time taken to complete " + job.getJobID() + " job : " + jobExecutionTimeInSecs + "secs"); updateJobTimeCounter(jobExecutionTimeInSecs); } catch (Exception e) { LOG.error("Exception encountered ", e); throw e; } return true; }
From source file:com.inmobi.conduit.local.LocalStreamService.java
License:Apache License
@Override protected void execute() throws Exception { lastProcessedFile.clear();// w ww . j av a2 s .com List<AuditMessage> auditMsgList = new ArrayList<AuditMessage>(); try { FileSystem fs = FileSystem.get(srcCluster.getHadoopConf()); // Cleanup tmpPath before everyRun to avoid // any old data being used in this run if the old run was aborted cleanUpTmp(fs); LOG.info("TmpPath is [" + tmpPath + "]"); long commitTime = srcCluster.getCommitTime(); publishMissingPaths(fs, srcCluster.getLocalFinalDestDirRoot(), commitTime, streamsToProcess); Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>(); Set<FileStatus> trashSet = new HashSet<FileStatus>(); /* checkpointPaths table contains streamname as rowkey, source(collector) name as column key and checkpoint value as value */ Table<String, String, String> checkpointPaths = HashBasedTable.create(); long totalSize = createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths); if (fileListing.size() == 0) { LOG.info("Nothing to do!"); for (String eachStream : streamsToProcess) { if (lastProcessedFile.get(eachStream) != null) { ConduitMetrics.updateAbsoluteGauge(getServiceType(), LAST_FILE_PROCESSED, eachStream, lastProcessedFile.get(eachStream)); } } return; } Job job = createJob(tmpJobInputPath, totalSize); long jobStartTime = System.nanoTime(); job.waitForCompletion(true); long jobExecutionTimeInSecs = (System.nanoTime() - jobStartTime) / (NANO_SECONDS_IN_SECOND); LOG.info("Time taken to complete " + job.getJobID() + " job : " + jobExecutionTimeInSecs + "secs"); updateJobTimeCounter(jobExecutionTimeInSecs); if (job.isSuccessful()) { commitTime = srcCluster.getCommitTime(); LOG.info("Commiting mvPaths and ConsumerPaths"); commit(prepareForCommit(commitTime), false, auditMsgList, commitTime); updatePathsTobeRegisteredWithLatestDir(commitTime); checkPoint(checkpointPaths); LOG.info("Commiting trashPaths"); commit(populateTrashCommitPaths(trashSet), true, null, commitTime); LOG.info("Committed successfully at " + getLogDateString(commitTime)); for (String eachStream : streamsToProcess) { if (lastProcessedFile.get(eachStream) != null) { ConduitMetrics.updateAbsoluteGauge(getServiceType(), LAST_FILE_PROCESSED, eachStream, lastProcessedFile.get(eachStream)); } } } else { throw new IOException("LocaStreamService job failure: Job " + job.getJobID() + " has failed. "); } } catch (Exception e) { LOG.warn("Error in running LocalStreamService ", e); throw e; } finally { publishAuditMessages(auditMsgList); try { registerPartitions(); } catch (Exception e) { LOG.warn("Got exception while registering partitions. ", e); } } }
From source file:com.linkedin.mr_kluj.GenericClojureJob.java
License:Apache License
public void run() { info("Starting " + getClass().getSimpleName()); /*** Get clojure source ***/ final String cljSource; if (props.getProperty(LI_CLJ_SOURCE) == null) { final String resourceName = props.getProperty("li.clj.source.file"); if (resourceName == null) { throw new RuntimeException( "Must define either li.clj.source or li.clj.source.file on the Props object."); }//from w w w . j a va 2s . co m URL resource = getClass().getClassLoader().getResource(resourceName); if (resource == null) { // Perhaps it's a URL for a Hadoop-understood file-system try { resource = getScriptFromPath(new Configuration(), resourceName).toURI().toURL(); } catch (Exception e) { // perhaps it wasn't... } } if (resource == null) { // Maybe it's a file File theFile = new File(resourceName); if (theFile.exists()) { try { resource = theFile.toURI().toURL(); } catch (MalformedURLException e) { throw new RuntimeException("WTF?", e); } } } if (resource == null) { throw new RuntimeException( String.format("Resource[%s] does not exist on the classpath.", resourceName)); } try { cljSource = new String(getBytes(resource.openStream())); } catch (IOException e) { throw new RuntimeException(e); } props.setProperty(LI_CLJ_SOURCE, cljSource); } else { cljSource = props.getProperty(LI_CLJ_SOURCE); } final String theActualFunction = String.format( "(require '[com.linkedin.mr-kluj.job :as job])\n\n" + "%s\n" + "(map job/starter the-jobs)\n", cljSource); info("--- Source: ---"); info(theActualFunction); info(" --------- "); boolean jobCompleted; try { RT.var("clojure.core", "require").invoke(Symbol.intern("clojure.main")); Var.pushThreadBindings(RT.map(RT.var("clojure.core", "*warn-on-reflection*"), RT.T, RT.var("user", "*context*"), null, RT.var("user", "*props*"), props)); Iterable<IFn> jobs = (Iterable<IFn>) clojure.lang.Compiler.load(new StringReader(theActualFunction), "start-job-input", "clj-job"); int count = 0; for (IFn ifn : jobs) { Job job = (Job) ifn.invoke(); job.getConfiguration().set(LI_CLJ_SOURCE, cljSource); job.getConfiguration().set(LI_CLJ_JOB_INDEX, String.valueOf(count)); ByteArrayOutputStream baos = new ByteArrayOutputStream(1024 * 10); props.storeToXML(baos, null); job.getConfiguration().set(LI_CLJ_PROPERTIES, new String(baos.toByteArray())); info(String.format("Starting job %s[%s]", job.getJobID(), job.getJobName())); jobCompleted = job.waitForCompletion(true); ++count; if (!jobCompleted) { throw new RuntimeException(String.format("Job[%s] failed for some reason.", job.getJobID())); } } } catch (Exception e) { throw new RuntimeException(e); } }
From source file:com.metamx.druid.indexer.DeterminePartitionsJob.java
License:Open Source License
public boolean run() { try {/*from ww w . j a va2 s. co m*/ /* * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear * in the final segment. */ if (!config.getPartitionsSpec().isAssumeGrouped()) { final Job groupByJob = new Job(new Configuration(), String.format( "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals())); injectSystemProperties(groupByJob); groupByJob.setInputFormatClass(TextInputFormat.class); groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class); groupByJob.setMapOutputKeyClass(BytesWritable.class); groupByJob.setMapOutputValueClass(NullWritable.class); groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setOutputKeyClass(BytesWritable.class); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); groupByJob.setJarByClass(DeterminePartitionsJob.class); config.addInputPaths(groupByJob); config.intoConfiguration(groupByJob); FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir()); groupByJob.submit(); log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL()); if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); return false; } } else { log.info("Skipping group-by job."); } /* * Read grouped data and determine appropriate partitions. */ final Job dimSelectionJob = new Job(new Configuration(), String.format( "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals())); dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19"); injectSystemProperties(dimSelectionJob); if (!config.getPartitionsSpec().isAssumeGrouped()) { // Read grouped data from the groupByJob. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class); dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir()); } else { // Directly read the source data, since we assume it's already grouped. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class); dimSelectionJob.setInputFormatClass(TextInputFormat.class); config.addInputPaths(dimSelectionJob); } SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob); dimSelectionJob.setMapOutputValueClass(Text.class); dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class); dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class); dimSelectionJob.setOutputKeyClass(BytesWritable.class); dimSelectionJob.setOutputValueClass(Text.class); dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class); dimSelectionJob.setJarByClass(DeterminePartitionsJob.class); config.intoConfiguration(dimSelectionJob); FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath()); dimSelectionJob.submit(); log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL()); if (!dimSelectionJob.waitForCompletion(true)) { log.error("Job failed: %s", dimSelectionJob.getJobID().toString()); return false; } /* * Load partitions determined by the previous job. */ log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals()); FileSystem fileSystem = null; Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance()); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals()) { DateTime bucket = segmentGranularity.getStart(); final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0)); if (fileSystem == null) { fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration()); } if (fileSystem.exists(partitionInfoPath)) { List<ShardSpec> specs = config.jsonMapper.readValue( Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference<List<ShardSpec>>() { }); List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size()); for (int i = 0; i < specs.size(); ++i) { actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++)); log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i)); } shardSpecs.put(bucket, actualSpecs); } else { log.info("Path[%s] didn't exist!?", partitionInfoPath); } } config.setShardSpecs(shardSpecs); return true; } catch (Exception e) { throw Throwables.propagate(e); } }
From source file:com.moz.fiji.mapreduce.framework.JobHistoryFijiTable.java
License:Apache License
/** * Writes a job into the JobHistoryFijiTable. * * @param job The job to save./*from w ww.j a v a2 s .co m*/ * @param startTime The time the job began, in milliseconds. * @param endTime The time the job ended, in milliseconds * @throws IOException If there is an error writing to the table. */ public void recordJob(final Job job, final long startTime, final long endTime) throws IOException { recordJob(job.getJobID().toString(), job.getJobName(), startTime, endTime, job.isSuccessful(), job.getConfiguration(), getCounters(job), Collections.<String, String>emptyMap()); }
From source file:com.netflix.Aegisthus.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(Aegisthus.class); CommandLine cl = getOptions(args);/*from ww w.j av a 2 s .co m*/ if (cl == null) { return 1; } job.setInputFormatClass(AegisthusInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(CassReducer.class); List<Path> paths = Lists.newArrayList(); if (cl.hasOption(OPT_INPUT)) { for (String input : cl.getOptionValues(OPT_INPUT)) { paths.add(new Path(input)); } } if (cl.hasOption(OPT_INPUTDIR)) { paths.addAll(getDataFiles(job.getConfiguration(), cl.getOptionValue(OPT_INPUTDIR))); } TextInputFormat.setInputPaths(job, paths.toArray(new Path[0])); TextOutputFormat.setOutputPath(job, new Path(cl.getOptionValue(OPT_OUTPUT))); job.submit(); System.out.println(job.getJobID()); System.out.println(job.getTrackingURL()); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.ngdata.hbaseindexer.mr.HBaseMapReduceIndexerTool.java
License:Apache License
public int run(HBaseIndexingOptions hbaseIndexingOpts, JobProcessCallback callback) throws Exception { if (hbaseIndexingOpts.isDryRun) { return new IndexerDryRun(hbaseIndexingOpts, getConf(), System.out).run(); }//ww w .java 2 s . c o m long programStartTime = System.currentTimeMillis(); Configuration conf = getConf(); IndexingSpecification indexingSpec = hbaseIndexingOpts.getIndexingSpecification(); conf.set(HBaseIndexerMapper.INDEX_COMPONENT_FACTORY_KEY, indexingSpec.getIndexerComponentFactory()); conf.set(HBaseIndexerMapper.INDEX_CONFIGURATION_CONF_KEY, new String(indexingSpec.getConfiguration(), Charsets.UTF_8)); conf.set(HBaseIndexerMapper.INDEX_NAME_CONF_KEY, indexingSpec.getIndexerName()); conf.set(HBaseIndexerMapper.TABLE_NAME_CONF_KEY, indexingSpec.getTableName()); HBaseIndexerMapper.configureIndexConnectionParams(conf, indexingSpec.getIndexConnectionParams()); IndexerComponentFactory factory = IndexerComponentFactoryUtil.getComponentFactory( indexingSpec.getIndexerComponentFactory(), new ByteArrayInputStream(indexingSpec.getConfiguration()), indexingSpec.getIndexConnectionParams()); IndexerConf indexerConf = factory.createIndexerConf(); Map<String, String> params = indexerConf.getGlobalParams(); String morphlineFile = params.get(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM); if (hbaseIndexingOpts.morphlineFile != null) { morphlineFile = hbaseIndexingOpts.morphlineFile.getPath(); } if (morphlineFile != null) { conf.set(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM, new File(morphlineFile).getName()); ForkedMapReduceIndexerTool.addDistributedCacheFile(new File(morphlineFile), conf); } String morphlineId = params.get(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM); if (hbaseIndexingOpts.morphlineId != null) { morphlineId = hbaseIndexingOpts.morphlineId; } if (morphlineId != null) { conf.set(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM, morphlineId); } conf.setBoolean(HBaseIndexerMapper.INDEX_DIRECT_WRITE_CONF_KEY, hbaseIndexingOpts.isDirectWrite()); if (hbaseIndexingOpts.fairSchedulerPool != null) { conf.set("mapred.fairscheduler.pool", hbaseIndexingOpts.fairSchedulerPool); } // switch off a false warning about allegedly not implementing Tool // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html // also see https://issues.apache.org/jira/browse/HADOOP-8183 getConf().setBoolean("mapred.used.genericoptionsparser", true); if (hbaseIndexingOpts.log4jConfigFile != null) { Utils.setLogConfigFile(hbaseIndexingOpts.log4jConfigFile, getConf()); ForkedMapReduceIndexerTool.addDistributedCacheFile(hbaseIndexingOpts.log4jConfigFile, conf); } Job job = Job.getInstance(getConf()); job.setJobName(getClass().getSimpleName() + "/" + HBaseIndexerMapper.class.getSimpleName()); job.setJarByClass(HBaseIndexerMapper.class); // job.setUserClassesTakesPrecedence(true); TableMapReduceUtil.initTableMapperJob(hbaseIndexingOpts.getScans(), HBaseIndexerMapper.class, Text.class, SolrInputDocumentWritable.class, job); // explicitely set hbase configuration on the job because the TableMapReduceUtil overwrites it with the hbase defaults // (see HBASE-4297 which is not really fixed in hbase 0.94.6 on all code paths) HBaseConfiguration.merge(job.getConfiguration(), getConf()); int mappers = new JobClient(job.getConfiguration()).getClusterStatus().getMaxMapTasks(); // MR1 //mappers = job.getCluster().getClusterStatus().getMapSlotCapacity(); // Yarn only LOG.info("Cluster reports {} mapper slots", mappers); LOG.info("Using these parameters: " + "reducers: {}, shards: {}, fanout: {}, maxSegments: {}", new Object[] { hbaseIndexingOpts.reducers, hbaseIndexingOpts.shards, hbaseIndexingOpts.fanout, hbaseIndexingOpts.maxSegments }); if (hbaseIndexingOpts.isDirectWrite()) { CloudSolrServer solrServer = new CloudSolrServer(hbaseIndexingOpts.zkHost); solrServer.setDefaultCollection(hbaseIndexingOpts.collection); if (hbaseIndexingOpts.clearIndex) { clearSolr(indexingSpec.getIndexConnectionParams()); } // Run a mapper-only MR job that sends index documents directly to a live Solr instance. job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); job.submit(); callback.jobStarted(job.getJobID().toString(), job.getTrackingURL()); if (!ForkedMapReduceIndexerTool.waitForCompletion(job, hbaseIndexingOpts.isVerbose)) { return -1; // job failed } commitSolr(indexingSpec.getIndexConnectionParams()); ForkedMapReduceIndexerTool.goodbye(job, programStartTime); return 0; } else { FileSystem fileSystem = FileSystem.get(getConf()); if (fileSystem.exists(hbaseIndexingOpts.outputDir)) { if (hbaseIndexingOpts.overwriteOutputDir) { LOG.info("Removing existing output directory {}", hbaseIndexingOpts.outputDir); if (!fileSystem.delete(hbaseIndexingOpts.outputDir, true)) { LOG.error("Deleting output directory '{}' failed", hbaseIndexingOpts.outputDir); return -1; } } else { LOG.error("Output directory '{}' already exists. Run with --overwrite-output-dir to " + "overwrite it, or remove it manually", hbaseIndexingOpts.outputDir); return -1; } } int exitCode = ForkedMapReduceIndexerTool.runIndexingPipeline(job, callback, getConf(), hbaseIndexingOpts.asOptions(), programStartTime, fileSystem, null, -1, // File-based parameters -1, // num mappers, only of importance for file-based indexing hbaseIndexingOpts.reducers); if (hbaseIndexingOpts.isGeneratedOutputDir()) { LOG.info("Deleting generated output directory " + hbaseIndexingOpts.outputDir); fileSystem.delete(hbaseIndexingOpts.outputDir, true); } return exitCode; } }
From source file:com.sequenceiq.yarntest.mr.QuasiMonteCarlo.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi/* www . j a va 2 s . com*/ */ public static JobID submitPiEstimationMRApp(String jobName, int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(conf); //setup job conf job.setJobName(jobName); job.setJarByClass(QuasiMonteCarlo.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(BooleanWritable.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(QmcMapper.class); job.setReducerClass(QmcReducer.class); job.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. job.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(tmpDir, "in"); final Path outDir = new Path(tmpDir, "out"); FileInputFormat.setInputPaths(job, inDir); FileOutputFormat.setOutputPath(job, outDir); final FileSystem fs = FileSystem.get(conf); if (fs.exists(tmpDir)) { fs.delete(tmpDir, true); // throw new IOException("Tmp directory " + fs.makeQualified(tmpDir) // + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } // try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); job.submit(); // final double duration = (System.currentTimeMillis() - startTime)/1000.0; // System.out.println("Job Finished in " + duration + " seconds"); return job.getJobID(); // } finally { // fs.delete(tmpDir, true); // } }
From source file:com.streamsets.pipeline.stage.destination.mapreduce.MapReduceExecutor.java
License:Apache License
@Override public void write(Batch batch) throws StageException { EvalContext eval = new EvalContext(getContext()); Iterator<Record> it = batch.getRecords(); while (it.hasNext()) { final Record record = it.next(); eval.setRecord(record);/*w w w. j a v a 2 s. c om*/ Job job = null; try { // Job configuration object is a clone of the original one that we're keeping in mapReduceConfig class final Configuration jobConfiguration = new Configuration(mapReduceConfig.getConfiguration()); // Evaluate all dynamic properties and store them in the configuration job for (Map.Entry<String, String> entry : jobConfig.jobConfigs.entrySet()) { String key = eval.evaluateToString("jobConfigs", entry.getKey(), true); String value = eval.evaluateToString("jobConfigs", entry.getValue(), false); jobConfiguration.set(key, value); } // For build-in job creators, evaluate their properties and persist them in the MR config switch (jobConfig.jobType) { case AVRO_PARQUET: jobConfiguration.set(AvroConversionCommonConstants.INPUT_FILE, eval .evaluateToString("inputFile", jobConfig.avroConversionCommonConfig.inputFile, true)); jobConfiguration.set(AvroConversionCommonConstants.OUTPUT_DIR, eval.evaluateToString( "outputDirectory", jobConfig.avroConversionCommonConfig.outputDirectory, true)); jobConfiguration.setBoolean(AvroConversionCommonConstants.KEEP_INPUT_FILE, jobConfig.avroConversionCommonConfig.keepInputFile); jobConfiguration.set(AvroParquetConstants.COMPRESSION_CODEC_NAME, eval.evaluateToString( "compressionCodec", jobConfig.avroParquetConfig.compressionCodec, false)); jobConfiguration.setInt(AvroParquetConstants.ROW_GROUP_SIZE, jobConfig.avroParquetConfig.rowGroupSize); jobConfiguration.setInt(AvroParquetConstants.PAGE_SIZE, jobConfig.avroParquetConfig.pageSize); jobConfiguration.setInt(AvroParquetConstants.DICTIONARY_PAGE_SIZE, jobConfig.avroParquetConfig.dictionaryPageSize); jobConfiguration.setInt(AvroParquetConstants.MAX_PADDING_SIZE, jobConfig.avroParquetConfig.maxPaddingSize); jobConfiguration.setBoolean(AvroConversionCommonConstants.OVERWRITE_TMP_FILE, jobConfig.avroConversionCommonConfig.overwriteTmpFile); break; case AVRO_ORC: jobConfiguration.set(AvroConversionCommonConstants.INPUT_FILE, eval .evaluateToString("inputFile", jobConfig.avroConversionCommonConfig.inputFile, true)); jobConfiguration.set(AvroConversionCommonConstants.OUTPUT_DIR, eval.evaluateToString( "outputDirectory", jobConfig.avroConversionCommonConfig.outputDirectory, true)); jobConfiguration.setBoolean(AvroConversionCommonConstants.KEEP_INPUT_FILE, jobConfig.avroConversionCommonConfig.keepInputFile); jobConfiguration.setBoolean(AvroConversionCommonConstants.OVERWRITE_TMP_FILE, jobConfig.avroConversionCommonConfig.overwriteTmpFile); jobConfiguration.setInt(AvroOrcConstants.ORC_BATCH_SIZE, jobConfig.avroOrcConfig.orcBatchSize); break; case CUSTOM: // Nothing because custom is generic one that have no special config properties break; default: throw new UnsupportedOperationException("Unsupported JobType: " + jobConfig.jobType); } job = createAndSubmitJob(jobConfiguration); } catch (IOException | InterruptedException | ELEvalException e) { LOG.error("Can't submit mapreduce job", e); errorRecordHandler.onError( new OnRecordErrorException(record, MapReduceErrors.MAPREDUCE_0005, e.getMessage(), e)); } if (job != null) { MapReduceExecutorEvents.JOB_CREATED.create(getContext()).with("tracking-url", job.getTrackingURL()) .with("job-id", job.getJobID().toString()).createAndSend(); } } }
From source file:edu.umn.cs.spatialHadoop.nasa.MultiHDFPlot.java
License:Open Source License
public static boolean multiplot(Path[] input, Path output, OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException, ParseException { String timeRange = params.get("time"); final Date dateFrom, dateTo; final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy.MM.dd"); try {/*from w w w .ja v a 2 s . c o m*/ String[] parts = timeRange.split("\\.\\."); dateFrom = dateFormat.parse(parts[0]); dateTo = dateFormat.parse(parts[1]); } catch (ArrayIndexOutOfBoundsException e) { System.err.println("Use the seperator two periods '..' to seperate from and to dates"); return false; // To avoid an error that causes dateFrom to be uninitialized } catch (ParseException e) { System.err.println("Illegal date format in " + timeRange); return false; } // Number of frames to combine in each image int combine = params.getInt("combine", 1); // Retrieve all matching input directories based on date range Vector<Path> matchingPathsV = new Vector<Path>(); for (Path inputFile : input) { FileSystem inFs = inputFile.getFileSystem(params); FileStatus[] matchingDirs = inFs.listStatus(input, new PathFilter() { @Override public boolean accept(Path p) { String dirName = p.getName(); try { Date date = dateFormat.parse(dirName); return date.compareTo(dateFrom) >= 0 && date.compareTo(dateTo) <= 0; } catch (ParseException e) { LOG.warn("Cannot parse directory name: " + dirName); return false; } } }); for (FileStatus matchingDir : matchingDirs) matchingPathsV.add(new Path(matchingDir.getPath(), "*.hdf")); } if (matchingPathsV.isEmpty()) { LOG.warn("No matching directories to given input"); return false; } Path[] matchingPaths = matchingPathsV.toArray(new Path[matchingPathsV.size()]); Arrays.sort(matchingPaths); // Clear all paths to ensure we set our own paths for each job params.clearAllPaths(); // Create a water mask if we need to recover holes on write if (params.get("recover", "none").equals("write")) { // Recover images on write requires a water mask image to be generated first OperationsParams wmParams = new OperationsParams(params); wmParams.setBoolean("background", false); Path wmImage = new Path(output, new Path("water_mask")); HDFPlot.generateWaterMask(wmImage, wmParams); params.set(HDFPlot.PREPROCESSED_WATERMARK, wmImage.toString()); } // Start a job for each path int imageWidth = -1; int imageHeight = -1; boolean overwrite = params.getBoolean("overwrite", false); boolean pyramid = params.getBoolean("pyramid", false); FileSystem outFs = output.getFileSystem(params); Vector<Job> jobs = new Vector<Job>(); boolean background = params.getBoolean("background", false); Rectangle mbr = new Rectangle(-180, -90, 180, 90); for (int i = 0; i < matchingPaths.length; i += combine) { Path[] inputPaths = new Path[Math.min(combine, matchingPaths.length - i)]; System.arraycopy(matchingPaths, i, inputPaths, 0, inputPaths.length); Path outputPath = new Path(output, inputPaths[0].getParent().getName() + (pyramid ? "" : ".png")); if (overwrite || !outFs.exists(outputPath)) { // Need to plot Job rj = HDFPlot.plotHeatMap(inputPaths, outputPath, params); if (imageHeight == -1 || imageWidth == -1) { if (rj != null) { imageHeight = rj.getConfiguration().getInt("height", 1000); imageWidth = rj.getConfiguration().getInt("width", 1000); mbr = (Rectangle) OperationsParams.getShape(rj.getConfiguration(), "mbr"); } else { imageHeight = params.getInt("height", 1000); imageWidth = params.getInt("width", 1000); mbr = (Rectangle) OperationsParams.getShape(params, "mbr"); } } if (background && rj != null) jobs.add(rj); } } // Wait until all jobs are done while (!jobs.isEmpty()) { Job firstJob = jobs.firstElement(); firstJob.waitForCompletion(false); if (!firstJob.isSuccessful()) { System.err.println("Error running job " + firstJob.getJobID()); System.err.println("Killing all remaining jobs"); for (int j = 1; j < jobs.size(); j++) jobs.get(j).killJob(); throw new RuntimeException("Error running job " + firstJob.getJobID()); } jobs.remove(0); } // Draw the scale in the output path if needed String scalerange = params.get("scalerange"); if (scalerange != null) { String[] parts = scalerange.split("\\.\\."); double min = Double.parseDouble(parts[0]); double max = Double.parseDouble(parts[1]); String scale = params.get("scale", "none").toLowerCase(); if (scale.equals("vertical")) { MultiHDFPlot.drawVerticalScale(new Path(output, "scale.png"), min, max, 64, imageHeight, params); } else if (scale.equals("horizontal")) { MultiHDFPlot.drawHorizontalScale(new Path(output, "scale.png"), min, max, imageWidth, 64, params); } } // Add the KML file createKML(outFs, output, mbr, params); return true; }