Example usage for org.apache.hadoop.mapreduce Job getJobID

List of usage examples for org.apache.hadoop.mapreduce Job getJobID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getJobID.

Prototype

public JobID getJobID() 

Source Link

Document

Get the unique ID for the job.

Usage

From source file:com.inmobi.conduit.distcp.DistcpBaseService.java

License:Apache License

protected Boolean executeDistCp(String serviceName, Map<String, FileStatus> fileListingMap, Path targetPath)
        throws Exception {
    //Add Additional Default arguments to the array below which gets merged
    //with the arguments as sent in by the Derived Service
    Configuration conf = currentCluster.getHadoopConf();
    conf.set(ConduitConstants.AUDIT_ENABLED_KEY, System.getProperty(ConduitConstants.AUDIT_ENABLED_KEY));
    conf.set("mapred.job.name", serviceName);
    conf.set("tmpjars", auditUtilJarDestPath.toString());
    // The first argument 'sourceFileListing' to DistCpOptions is not needed now 
    // since ConduitDistCp writes listing file using fileListingMap instead of
    // relying on sourceFileListing path. Passing a dummy value.
    DistCpOptions options = new DistCpOptions(new Path("/tmp"), targetPath);
    options.setOutPutDirectory(tmpCounterOutputPath);
    DistCp distCp = new ConduitDistCp(conf, options, fileListingMap);
    try {/*from w  w  w.ja v  a2 s.co  m*/
        Job job = distCp.execute();
        long jobExecutionTimeInSecs = (distCp.getJobTimeInNanos() / NANO_SECONDS_IN_SECOND);
        LOG.info("Time taken to complete " + job.getJobID() + " job : " + jobExecutionTimeInSecs + "secs");
        updateJobTimeCounter(jobExecutionTimeInSecs);
    } catch (Exception e) {
        LOG.error("Exception encountered ", e);
        throw e;
    }
    return true;
}

From source file:com.inmobi.conduit.local.LocalStreamService.java

License:Apache License

@Override
protected void execute() throws Exception {
    lastProcessedFile.clear();// w  ww . j av  a2 s .com
    List<AuditMessage> auditMsgList = new ArrayList<AuditMessage>();
    try {
        FileSystem fs = FileSystem.get(srcCluster.getHadoopConf());
        // Cleanup tmpPath before everyRun to avoid
        // any old data being used in this run if the old run was aborted
        cleanUpTmp(fs);
        LOG.info("TmpPath is [" + tmpPath + "]");
        long commitTime = srcCluster.getCommitTime();
        publishMissingPaths(fs, srcCluster.getLocalFinalDestDirRoot(), commitTime, streamsToProcess);
        Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>();
        Set<FileStatus> trashSet = new HashSet<FileStatus>();
        /* checkpointPaths table contains streamname as rowkey,
        source(collector) name as column key and checkpoint value as value */
        Table<String, String, String> checkpointPaths = HashBasedTable.create();

        long totalSize = createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths);

        if (fileListing.size() == 0) {
            LOG.info("Nothing to do!");
            for (String eachStream : streamsToProcess) {
                if (lastProcessedFile.get(eachStream) != null) {
                    ConduitMetrics.updateAbsoluteGauge(getServiceType(), LAST_FILE_PROCESSED, eachStream,
                            lastProcessedFile.get(eachStream));
                }
            }
            return;
        }
        Job job = createJob(tmpJobInputPath, totalSize);
        long jobStartTime = System.nanoTime();
        job.waitForCompletion(true);
        long jobExecutionTimeInSecs = (System.nanoTime() - jobStartTime) / (NANO_SECONDS_IN_SECOND);
        LOG.info("Time taken to complete " + job.getJobID() + " job : " + jobExecutionTimeInSecs + "secs");
        updateJobTimeCounter(jobExecutionTimeInSecs);
        if (job.isSuccessful()) {
            commitTime = srcCluster.getCommitTime();
            LOG.info("Commiting mvPaths and ConsumerPaths");

            commit(prepareForCommit(commitTime), false, auditMsgList, commitTime);
            updatePathsTobeRegisteredWithLatestDir(commitTime);
            checkPoint(checkpointPaths);
            LOG.info("Commiting trashPaths");
            commit(populateTrashCommitPaths(trashSet), true, null, commitTime);
            LOG.info("Committed successfully at " + getLogDateString(commitTime));
            for (String eachStream : streamsToProcess) {
                if (lastProcessedFile.get(eachStream) != null) {
                    ConduitMetrics.updateAbsoluteGauge(getServiceType(), LAST_FILE_PROCESSED, eachStream,
                            lastProcessedFile.get(eachStream));
                }
            }
        } else {
            throw new IOException("LocaStreamService job failure: Job " + job.getJobID() + " has failed. ");
        }
    } catch (Exception e) {
        LOG.warn("Error in running LocalStreamService ", e);
        throw e;
    } finally {
        publishAuditMessages(auditMsgList);
        try {
            registerPartitions();
        } catch (Exception e) {
            LOG.warn("Got exception while registering partitions. ", e);
        }
    }
}

From source file:com.linkedin.mr_kluj.GenericClojureJob.java

License:Apache License

public void run() {
    info("Starting " + getClass().getSimpleName());

    /*** Get clojure source ***/
    final String cljSource;
    if (props.getProperty(LI_CLJ_SOURCE) == null) {
        final String resourceName = props.getProperty("li.clj.source.file");
        if (resourceName == null) {
            throw new RuntimeException(
                    "Must define either li.clj.source or li.clj.source.file on the Props object.");
        }//from w w w  . j a  va  2s  .  co  m

        URL resource = getClass().getClassLoader().getResource(resourceName);

        if (resource == null) { // Perhaps it's a URL for a Hadoop-understood file-system
            try {
                resource = getScriptFromPath(new Configuration(), resourceName).toURI().toURL();
            } catch (Exception e) {
                // perhaps it wasn't...
            }
        }

        if (resource == null) { // Maybe it's a file
            File theFile = new File(resourceName);
            if (theFile.exists()) {
                try {
                    resource = theFile.toURI().toURL();
                } catch (MalformedURLException e) {
                    throw new RuntimeException("WTF?", e);
                }
            }

        }

        if (resource == null) {
            throw new RuntimeException(
                    String.format("Resource[%s] does not exist on the classpath.", resourceName));
        }

        try {
            cljSource = new String(getBytes(resource.openStream()));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        props.setProperty(LI_CLJ_SOURCE, cljSource);
    } else {
        cljSource = props.getProperty(LI_CLJ_SOURCE);
    }

    final String theActualFunction = String.format(
            "(require '[com.linkedin.mr-kluj.job :as job])\n\n" + "%s\n" + "(map job/starter the-jobs)\n",
            cljSource);

    info("--- Source: ---");
    info(theActualFunction);
    info("       ---------       ");

    boolean jobCompleted;
    try {
        RT.var("clojure.core", "require").invoke(Symbol.intern("clojure.main"));

        Var.pushThreadBindings(RT.map(RT.var("clojure.core", "*warn-on-reflection*"), RT.T,
                RT.var("user", "*context*"), null, RT.var("user", "*props*"), props));

        Iterable<IFn> jobs = (Iterable<IFn>) clojure.lang.Compiler.load(new StringReader(theActualFunction),
                "start-job-input", "clj-job");

        int count = 0;
        for (IFn ifn : jobs) {
            Job job = (Job) ifn.invoke();
            job.getConfiguration().set(LI_CLJ_SOURCE, cljSource);
            job.getConfiguration().set(LI_CLJ_JOB_INDEX, String.valueOf(count));

            ByteArrayOutputStream baos = new ByteArrayOutputStream(1024 * 10);
            props.storeToXML(baos, null);

            job.getConfiguration().set(LI_CLJ_PROPERTIES, new String(baos.toByteArray()));

            info(String.format("Starting job %s[%s]", job.getJobID(), job.getJobName()));

            jobCompleted = job.waitForCompletion(true);
            ++count;

            if (!jobCompleted) {
                throw new RuntimeException(String.format("Job[%s] failed for some reason.", job.getJobID()));
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

}

From source file:com.metamx.druid.indexer.DeterminePartitionsJob.java

License:Open Source License

public boolean run() {
    try {/*from ww w  . j a  va2  s.  co m*/
        /*
         * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
         * in the final segment.
         */

        if (!config.getPartitionsSpec().isAssumeGrouped()) {
            final Job groupByJob = new Job(new Configuration(), String.format(
                    "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()));

            injectSystemProperties(groupByJob);
            groupByJob.setInputFormatClass(TextInputFormat.class);
            groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
            groupByJob.setMapOutputKeyClass(BytesWritable.class);
            groupByJob.setMapOutputValueClass(NullWritable.class);
            groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setOutputKeyClass(BytesWritable.class);
            groupByJob.setOutputValueClass(NullWritable.class);
            groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
            groupByJob.setJarByClass(DeterminePartitionsJob.class);

            config.addInputPaths(groupByJob);
            config.intoConfiguration(groupByJob);
            FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());

            groupByJob.submit();
            log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(),
                    groupByJob.getTrackingURL());

            if (!groupByJob.waitForCompletion(true)) {
                log.error("Job failed: %s", groupByJob.getJobID());
                return false;
            }
        } else {
            log.info("Skipping group-by job.");
        }

        /*
         * Read grouped data and determine appropriate partitions.
         */
        final Job dimSelectionJob = new Job(new Configuration(), String.format(
                "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals()));

        dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");

        injectSystemProperties(dimSelectionJob);

        if (!config.getPartitionsSpec().isAssumeGrouped()) {
            // Read grouped data from the groupByJob.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
            dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class);
            FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir());
        } else {
            // Directly read the source data, since we assume it's already grouped.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
            dimSelectionJob.setInputFormatClass(TextInputFormat.class);
            config.addInputPaths(dimSelectionJob);
        }

        SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob);
        dimSelectionJob.setMapOutputValueClass(Text.class);
        dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
        dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
        dimSelectionJob.setOutputKeyClass(BytesWritable.class);
        dimSelectionJob.setOutputValueClass(Text.class);
        dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
        dimSelectionJob.setJarByClass(DeterminePartitionsJob.class);

        config.intoConfiguration(dimSelectionJob);
        FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath());

        dimSelectionJob.submit();
        log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(),
                dimSelectionJob.getTrackingURL());

        if (!dimSelectionJob.waitForCompletion(true)) {
            log.error("Job failed: %s", dimSelectionJob.getJobID().toString());
            return false;
        }

        /*
         * Load partitions determined by the previous job.
         */

        log.info("Job completed, loading up partitions for intervals[%s].",
                config.getSegmentGranularIntervals());
        FileSystem fileSystem = null;
        Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
        int shardCount = 0;
        for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
            DateTime bucket = segmentGranularity.getStart();

            final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0));
            if (fileSystem == null) {
                fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
            }
            if (fileSystem.exists(partitionInfoPath)) {
                List<ShardSpec> specs = config.jsonMapper.readValue(
                        Utils.openInputStream(dimSelectionJob, partitionInfoPath),
                        new TypeReference<List<ShardSpec>>() {
                        });

                List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
                for (int i = 0; i < specs.size(); ++i) {
                    actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
                    log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
                }

                shardSpecs.put(bucket, actualSpecs);
            } else {
                log.info("Path[%s] didn't exist!?", partitionInfoPath);
            }
        }
        config.setShardSpecs(shardSpecs);

        return true;
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}

From source file:com.moz.fiji.mapreduce.framework.JobHistoryFijiTable.java

License:Apache License

/**
 * Writes a job into the JobHistoryFijiTable.
 *
 * @param job The job to save./*from   w  ww.j a  v  a2 s  .co  m*/
 * @param startTime The time the job began, in milliseconds.
 * @param endTime The time the job ended, in milliseconds
 * @throws IOException If there is an error writing to the table.
 */
public void recordJob(final Job job, final long startTime, final long endTime) throws IOException {
    recordJob(job.getJobID().toString(), job.getJobName(), startTime, endTime, job.isSuccessful(),
            job.getConfiguration(), getCounters(job), Collections.<String, String>emptyMap());
}

From source file:com.netflix.Aegisthus.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());

    job.setJarByClass(Aegisthus.class);
    CommandLine cl = getOptions(args);/*from   ww w.j  av a  2  s .co  m*/
    if (cl == null) {
        return 1;
    }
    job.setInputFormatClass(AegisthusInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(CassReducer.class);
    List<Path> paths = Lists.newArrayList();
    if (cl.hasOption(OPT_INPUT)) {
        for (String input : cl.getOptionValues(OPT_INPUT)) {
            paths.add(new Path(input));
        }
    }
    if (cl.hasOption(OPT_INPUTDIR)) {
        paths.addAll(getDataFiles(job.getConfiguration(), cl.getOptionValue(OPT_INPUTDIR)));
    }
    TextInputFormat.setInputPaths(job, paths.toArray(new Path[0]));
    TextOutputFormat.setOutputPath(job, new Path(cl.getOptionValue(OPT_OUTPUT)));

    job.submit();
    System.out.println(job.getJobID());
    System.out.println(job.getTrackingURL());
    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:com.ngdata.hbaseindexer.mr.HBaseMapReduceIndexerTool.java

License:Apache License

public int run(HBaseIndexingOptions hbaseIndexingOpts, JobProcessCallback callback) throws Exception {

    if (hbaseIndexingOpts.isDryRun) {
        return new IndexerDryRun(hbaseIndexingOpts, getConf(), System.out).run();
    }//ww  w .java 2 s . c o  m

    long programStartTime = System.currentTimeMillis();
    Configuration conf = getConf();

    IndexingSpecification indexingSpec = hbaseIndexingOpts.getIndexingSpecification();

    conf.set(HBaseIndexerMapper.INDEX_COMPONENT_FACTORY_KEY, indexingSpec.getIndexerComponentFactory());
    conf.set(HBaseIndexerMapper.INDEX_CONFIGURATION_CONF_KEY,
            new String(indexingSpec.getConfiguration(), Charsets.UTF_8));
    conf.set(HBaseIndexerMapper.INDEX_NAME_CONF_KEY, indexingSpec.getIndexerName());
    conf.set(HBaseIndexerMapper.TABLE_NAME_CONF_KEY, indexingSpec.getTableName());
    HBaseIndexerMapper.configureIndexConnectionParams(conf, indexingSpec.getIndexConnectionParams());

    IndexerComponentFactory factory = IndexerComponentFactoryUtil.getComponentFactory(
            indexingSpec.getIndexerComponentFactory(),
            new ByteArrayInputStream(indexingSpec.getConfiguration()), indexingSpec.getIndexConnectionParams());
    IndexerConf indexerConf = factory.createIndexerConf();

    Map<String, String> params = indexerConf.getGlobalParams();
    String morphlineFile = params.get(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM);
    if (hbaseIndexingOpts.morphlineFile != null) {
        morphlineFile = hbaseIndexingOpts.morphlineFile.getPath();
    }
    if (morphlineFile != null) {
        conf.set(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM, new File(morphlineFile).getName());
        ForkedMapReduceIndexerTool.addDistributedCacheFile(new File(morphlineFile), conf);
    }

    String morphlineId = params.get(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM);
    if (hbaseIndexingOpts.morphlineId != null) {
        morphlineId = hbaseIndexingOpts.morphlineId;
    }
    if (morphlineId != null) {
        conf.set(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM, morphlineId);
    }

    conf.setBoolean(HBaseIndexerMapper.INDEX_DIRECT_WRITE_CONF_KEY, hbaseIndexingOpts.isDirectWrite());

    if (hbaseIndexingOpts.fairSchedulerPool != null) {
        conf.set("mapred.fairscheduler.pool", hbaseIndexingOpts.fairSchedulerPool);
    }

    // switch off a false warning about allegedly not implementing Tool
    // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html
    // also see https://issues.apache.org/jira/browse/HADOOP-8183
    getConf().setBoolean("mapred.used.genericoptionsparser", true);

    if (hbaseIndexingOpts.log4jConfigFile != null) {
        Utils.setLogConfigFile(hbaseIndexingOpts.log4jConfigFile, getConf());
        ForkedMapReduceIndexerTool.addDistributedCacheFile(hbaseIndexingOpts.log4jConfigFile, conf);
    }

    Job job = Job.getInstance(getConf());
    job.setJobName(getClass().getSimpleName() + "/" + HBaseIndexerMapper.class.getSimpleName());
    job.setJarByClass(HBaseIndexerMapper.class);
    //        job.setUserClassesTakesPrecedence(true);

    TableMapReduceUtil.initTableMapperJob(hbaseIndexingOpts.getScans(), HBaseIndexerMapper.class, Text.class,
            SolrInputDocumentWritable.class, job);

    // explicitely set hbase configuration on the job because the TableMapReduceUtil overwrites it with the hbase defaults
    // (see HBASE-4297 which is not really fixed in hbase 0.94.6 on all code paths)
    HBaseConfiguration.merge(job.getConfiguration(), getConf());

    int mappers = new JobClient(job.getConfiguration()).getClusterStatus().getMaxMapTasks(); // MR1
    //mappers = job.getCluster().getClusterStatus().getMapSlotCapacity(); // Yarn only
    LOG.info("Cluster reports {} mapper slots", mappers);

    LOG.info("Using these parameters: " + "reducers: {}, shards: {}, fanout: {}, maxSegments: {}",
            new Object[] { hbaseIndexingOpts.reducers, hbaseIndexingOpts.shards, hbaseIndexingOpts.fanout,
                    hbaseIndexingOpts.maxSegments });

    if (hbaseIndexingOpts.isDirectWrite()) {
        CloudSolrServer solrServer = new CloudSolrServer(hbaseIndexingOpts.zkHost);
        solrServer.setDefaultCollection(hbaseIndexingOpts.collection);

        if (hbaseIndexingOpts.clearIndex) {
            clearSolr(indexingSpec.getIndexConnectionParams());
        }

        // Run a mapper-only MR job that sends index documents directly to a live Solr instance.
        job.setOutputFormatClass(NullOutputFormat.class);
        job.setNumReduceTasks(0);
        job.submit();
        callback.jobStarted(job.getJobID().toString(), job.getTrackingURL());
        if (!ForkedMapReduceIndexerTool.waitForCompletion(job, hbaseIndexingOpts.isVerbose)) {
            return -1; // job failed
        }
        commitSolr(indexingSpec.getIndexConnectionParams());
        ForkedMapReduceIndexerTool.goodbye(job, programStartTime);
        return 0;
    } else {
        FileSystem fileSystem = FileSystem.get(getConf());

        if (fileSystem.exists(hbaseIndexingOpts.outputDir)) {
            if (hbaseIndexingOpts.overwriteOutputDir) {
                LOG.info("Removing existing output directory {}", hbaseIndexingOpts.outputDir);
                if (!fileSystem.delete(hbaseIndexingOpts.outputDir, true)) {
                    LOG.error("Deleting output directory '{}' failed", hbaseIndexingOpts.outputDir);
                    return -1;
                }
            } else {
                LOG.error("Output directory '{}' already exists. Run with --overwrite-output-dir to "
                        + "overwrite it, or remove it manually", hbaseIndexingOpts.outputDir);
                return -1;
            }
        }

        int exitCode = ForkedMapReduceIndexerTool.runIndexingPipeline(job, callback, getConf(),
                hbaseIndexingOpts.asOptions(), programStartTime, fileSystem, null, -1, // File-based parameters
                -1, // num mappers, only of importance for file-based indexing
                hbaseIndexingOpts.reducers);

        if (hbaseIndexingOpts.isGeneratedOutputDir()) {
            LOG.info("Deleting generated output directory " + hbaseIndexingOpts.outputDir);
            fileSystem.delete(hbaseIndexingOpts.outputDir, true);
        }
        return exitCode;
    }
}

From source file:com.sequenceiq.yarntest.mr.QuasiMonteCarlo.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi/*  www . j  a va  2 s .  com*/
 */
public static JobID submitPiEstimationMRApp(String jobName, int numMaps, long numPoints, Path tmpDir,
        Configuration conf) throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(conf);
    //setup job conf
    job.setJobName(jobName);
    job.setJarByClass(QuasiMonteCarlo.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(BooleanWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(QmcMapper.class);

    job.setReducerClass(QmcReducer.class);
    job.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);

    //setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
        fs.delete(tmpDir, true);
        //      throw new IOException("Tmp directory " + fs.makeQualified(tmpDir)
        //          + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    //  try {
    //generate an input file for each map task
    for (int i = 0; i < numMaps; ++i) {
        final Path file = new Path(inDir, "part" + i);
        final LongWritable offset = new LongWritable(i * numPoints);
        final LongWritable size = new LongWritable(numPoints);
        final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class,
                LongWritable.class, CompressionType.NONE);
        try {
            writer.append(offset, size);
        } finally {
            writer.close();
        }
        System.out.println("Wrote input for Map #" + i);
    }

    //start a map/reduce job
    System.out.println("Starting Job");
    final long startTime = System.currentTimeMillis();
    job.submit();
    //      final double duration = (System.currentTimeMillis() - startTime)/1000.0;
    //      System.out.println("Job Finished in " + duration + " seconds");
    return job.getJobID();

    //    } finally {
    //      fs.delete(tmpDir, true);
    //    }
}

From source file:com.streamsets.pipeline.stage.destination.mapreduce.MapReduceExecutor.java

License:Apache License

@Override
public void write(Batch batch) throws StageException {
    EvalContext eval = new EvalContext(getContext());

    Iterator<Record> it = batch.getRecords();
    while (it.hasNext()) {
        final Record record = it.next();
        eval.setRecord(record);/*w w  w.  j a v  a  2 s.  c  om*/

        Job job = null;

        try {
            // Job configuration object is a clone of the original one that we're keeping in mapReduceConfig class
            final Configuration jobConfiguration = new Configuration(mapReduceConfig.getConfiguration());

            // Evaluate all dynamic properties and store them in the configuration job
            for (Map.Entry<String, String> entry : jobConfig.jobConfigs.entrySet()) {
                String key = eval.evaluateToString("jobConfigs", entry.getKey(), true);
                String value = eval.evaluateToString("jobConfigs", entry.getValue(), false);

                jobConfiguration.set(key, value);
            }

            // For build-in job creators, evaluate their properties and persist them in the MR config
            switch (jobConfig.jobType) {
            case AVRO_PARQUET:
                jobConfiguration.set(AvroConversionCommonConstants.INPUT_FILE, eval
                        .evaluateToString("inputFile", jobConfig.avroConversionCommonConfig.inputFile, true));
                jobConfiguration.set(AvroConversionCommonConstants.OUTPUT_DIR, eval.evaluateToString(
                        "outputDirectory", jobConfig.avroConversionCommonConfig.outputDirectory, true));
                jobConfiguration.setBoolean(AvroConversionCommonConstants.KEEP_INPUT_FILE,
                        jobConfig.avroConversionCommonConfig.keepInputFile);
                jobConfiguration.set(AvroParquetConstants.COMPRESSION_CODEC_NAME, eval.evaluateToString(
                        "compressionCodec", jobConfig.avroParquetConfig.compressionCodec, false));
                jobConfiguration.setInt(AvroParquetConstants.ROW_GROUP_SIZE,
                        jobConfig.avroParquetConfig.rowGroupSize);
                jobConfiguration.setInt(AvroParquetConstants.PAGE_SIZE, jobConfig.avroParquetConfig.pageSize);
                jobConfiguration.setInt(AvroParquetConstants.DICTIONARY_PAGE_SIZE,
                        jobConfig.avroParquetConfig.dictionaryPageSize);
                jobConfiguration.setInt(AvroParquetConstants.MAX_PADDING_SIZE,
                        jobConfig.avroParquetConfig.maxPaddingSize);
                jobConfiguration.setBoolean(AvroConversionCommonConstants.OVERWRITE_TMP_FILE,
                        jobConfig.avroConversionCommonConfig.overwriteTmpFile);
                break;
            case AVRO_ORC:
                jobConfiguration.set(AvroConversionCommonConstants.INPUT_FILE, eval
                        .evaluateToString("inputFile", jobConfig.avroConversionCommonConfig.inputFile, true));
                jobConfiguration.set(AvroConversionCommonConstants.OUTPUT_DIR, eval.evaluateToString(
                        "outputDirectory", jobConfig.avroConversionCommonConfig.outputDirectory, true));
                jobConfiguration.setBoolean(AvroConversionCommonConstants.KEEP_INPUT_FILE,
                        jobConfig.avroConversionCommonConfig.keepInputFile);
                jobConfiguration.setBoolean(AvroConversionCommonConstants.OVERWRITE_TMP_FILE,
                        jobConfig.avroConversionCommonConfig.overwriteTmpFile);
                jobConfiguration.setInt(AvroOrcConstants.ORC_BATCH_SIZE, jobConfig.avroOrcConfig.orcBatchSize);
                break;
            case CUSTOM:
                // Nothing because custom is generic one that have no special config properties
                break;
            default:
                throw new UnsupportedOperationException("Unsupported JobType: " + jobConfig.jobType);
            }

            job = createAndSubmitJob(jobConfiguration);
        } catch (IOException | InterruptedException | ELEvalException e) {
            LOG.error("Can't submit mapreduce job", e);
            errorRecordHandler.onError(
                    new OnRecordErrorException(record, MapReduceErrors.MAPREDUCE_0005, e.getMessage(), e));
        }

        if (job != null) {
            MapReduceExecutorEvents.JOB_CREATED.create(getContext()).with("tracking-url", job.getTrackingURL())
                    .with("job-id", job.getJobID().toString()).createAndSend();
        }
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.MultiHDFPlot.java

License:Open Source License

public static boolean multiplot(Path[] input, Path output, OperationsParams params)
        throws IOException, InterruptedException, ClassNotFoundException, ParseException {
    String timeRange = params.get("time");
    final Date dateFrom, dateTo;
    final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy.MM.dd");
    try {/*from  w w  w .ja v  a  2  s .  c o m*/
        String[] parts = timeRange.split("\\.\\.");
        dateFrom = dateFormat.parse(parts[0]);
        dateTo = dateFormat.parse(parts[1]);
    } catch (ArrayIndexOutOfBoundsException e) {
        System.err.println("Use the seperator two periods '..' to seperate from and to dates");
        return false; // To avoid an error that causes dateFrom to be uninitialized
    } catch (ParseException e) {
        System.err.println("Illegal date format in " + timeRange);
        return false;
    }
    // Number of frames to combine in each image
    int combine = params.getInt("combine", 1);
    // Retrieve all matching input directories based on date range
    Vector<Path> matchingPathsV = new Vector<Path>();
    for (Path inputFile : input) {
        FileSystem inFs = inputFile.getFileSystem(params);
        FileStatus[] matchingDirs = inFs.listStatus(input, new PathFilter() {
            @Override
            public boolean accept(Path p) {
                String dirName = p.getName();
                try {
                    Date date = dateFormat.parse(dirName);
                    return date.compareTo(dateFrom) >= 0 && date.compareTo(dateTo) <= 0;
                } catch (ParseException e) {
                    LOG.warn("Cannot parse directory name: " + dirName);
                    return false;
                }
            }
        });
        for (FileStatus matchingDir : matchingDirs)
            matchingPathsV.add(new Path(matchingDir.getPath(), "*.hdf"));
    }
    if (matchingPathsV.isEmpty()) {
        LOG.warn("No matching directories to given input");
        return false;
    }

    Path[] matchingPaths = matchingPathsV.toArray(new Path[matchingPathsV.size()]);
    Arrays.sort(matchingPaths);

    // Clear all paths to ensure we set our own paths for each job
    params.clearAllPaths();

    // Create a water mask if we need to recover holes on write
    if (params.get("recover", "none").equals("write")) {
        // Recover images on write requires a water mask image to be generated first
        OperationsParams wmParams = new OperationsParams(params);
        wmParams.setBoolean("background", false);
        Path wmImage = new Path(output, new Path("water_mask"));
        HDFPlot.generateWaterMask(wmImage, wmParams);
        params.set(HDFPlot.PREPROCESSED_WATERMARK, wmImage.toString());
    }
    // Start a job for each path
    int imageWidth = -1;
    int imageHeight = -1;
    boolean overwrite = params.getBoolean("overwrite", false);
    boolean pyramid = params.getBoolean("pyramid", false);
    FileSystem outFs = output.getFileSystem(params);
    Vector<Job> jobs = new Vector<Job>();
    boolean background = params.getBoolean("background", false);
    Rectangle mbr = new Rectangle(-180, -90, 180, 90);
    for (int i = 0; i < matchingPaths.length; i += combine) {
        Path[] inputPaths = new Path[Math.min(combine, matchingPaths.length - i)];
        System.arraycopy(matchingPaths, i, inputPaths, 0, inputPaths.length);
        Path outputPath = new Path(output, inputPaths[0].getParent().getName() + (pyramid ? "" : ".png"));
        if (overwrite || !outFs.exists(outputPath)) {
            // Need to plot
            Job rj = HDFPlot.plotHeatMap(inputPaths, outputPath, params);
            if (imageHeight == -1 || imageWidth == -1) {
                if (rj != null) {
                    imageHeight = rj.getConfiguration().getInt("height", 1000);
                    imageWidth = rj.getConfiguration().getInt("width", 1000);
                    mbr = (Rectangle) OperationsParams.getShape(rj.getConfiguration(), "mbr");
                } else {
                    imageHeight = params.getInt("height", 1000);
                    imageWidth = params.getInt("width", 1000);
                    mbr = (Rectangle) OperationsParams.getShape(params, "mbr");
                }
            }
            if (background && rj != null)
                jobs.add(rj);
        }
    }
    // Wait until all jobs are done
    while (!jobs.isEmpty()) {
        Job firstJob = jobs.firstElement();
        firstJob.waitForCompletion(false);
        if (!firstJob.isSuccessful()) {
            System.err.println("Error running job " + firstJob.getJobID());
            System.err.println("Killing all remaining jobs");
            for (int j = 1; j < jobs.size(); j++)
                jobs.get(j).killJob();
            throw new RuntimeException("Error running job " + firstJob.getJobID());
        }
        jobs.remove(0);
    }

    // Draw the scale in the output path if needed
    String scalerange = params.get("scalerange");
    if (scalerange != null) {
        String[] parts = scalerange.split("\\.\\.");
        double min = Double.parseDouble(parts[0]);
        double max = Double.parseDouble(parts[1]);
        String scale = params.get("scale", "none").toLowerCase();
        if (scale.equals("vertical")) {
            MultiHDFPlot.drawVerticalScale(new Path(output, "scale.png"), min, max, 64, imageHeight, params);
        } else if (scale.equals("horizontal")) {
            MultiHDFPlot.drawHorizontalScale(new Path(output, "scale.png"), min, max, imageWidth, 64, params);
        }
    }
    // Add the KML file
    createKML(outFs, output, mbr, params);
    return true;
}