Example usage for org.apache.hadoop.mapreduce Job getJobName

List of usage examples for org.apache.hadoop.mapreduce Job getJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getJobName.

Prototype

public String getJobName() 

Source Link

Document

The user-specified job name.

Usage

From source file:com.asakusafw.runtime.stage.optimizer.ReducerSimplifierConfigurator.java

License:Apache License

@Override
public void configure(Job job) throws IOException, InterruptedException {
    int count = job.getNumReduceTasks();
    if (count <= TASKS_TINY) {
        return;/*from www  .  ja v  a 2s.  co  m*/
    }
    Configuration conf = job.getConfiguration();
    long limit = conf.getLong(KEY_TINY_LIMIT, -1L);
    if (limit < 0L) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("Reducer simplifier is disabled for tiny inputs: {0}", //$NON-NLS-1$
                    job.getJobName()));
        }
        return;
    }
    long estimated = StageInputDriver.estimateInputSize(job);
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("Reducer simplifier: job={0}, tiny-limit={1}, estimated={2}", //$NON-NLS-1$
                job.getJobName(), limit, estimated));
    }
    if (estimated < 0L || estimated > limit) {
        return;
    }

    LOG.info(MessageFormat.format("The number of reduce task ({0}) is configured: {1}->{2}", job.getJobName(),
            job.getNumReduceTasks(), TASKS_TINY));

    job.setNumReduceTasks(TASKS_TINY);
}

From source file:com.awcoleman.ExampleJobSummaryLogWithOutput.BinRecToAvroRecDriver.java

License:Apache License

private String createTempFileAppender(Job job) throws IOException {
    String sep = FileSystems.getDefault().getSeparator();

    //JobID may not exist yet, but JobName does since we call getInstance with name, so use JobName as prefix
    java.nio.file.Path temppath = Files.createTempDirectory(job.getJobName() + "_");

    String fapath = temppath + sep + "joblog.log";

    FileAppender fa = new FileAppender();
    fa.setName("TempFileAppender_" + job.getJobName());
    fa.setFile(fapath);/*from  w  w w. j a v  a  2s  .com*/
    fa.setLayout(new PatternLayout("%d{ISO8601} %p %c: %m%n"));
    fa.setThreshold(Level.INFO);
    fa.setAppend(true);
    fa.activateOptions();

    Logger.getRootLogger().addAppender(fa);

    //Add cleanup hooks, log file itself should be deleted by copyFromLocalFile after copy to HDFS
    temppath.toFile().deleteOnExit();

    return fapath;
}

From source file:com.awcoleman.ExampleJobSummaryLogWithOutput.BinRecToAvroRecDriver.java

License:Apache License

private boolean copyTempFileAppenderToHDFSOutpath(Job job, String fapath, String outpath) {
    String sep = FileSystems.getDefault().getSeparator();

    try {/*from w w  w  . j a  v  a2 s.co m*/
        FileSystem hdfs = FileSystem.get(job.getConfiguration());

        Path localfile = new Path("file://" + fapath);
        Path hdfsfile = new Path(outpath + sep + "_log" + sep + "joblog.log");

        //About to move job summary log to HDFS, so remove from root logger and close
        FileAppender fa = (FileAppender) Logger.getRootLogger()
                .getAppender("TempFileAppender_" + job.getJobName());
        Logger.getRootLogger().removeAppender(fa);
        fa.close();

        hdfs.copyFromLocalFile(true, false, localfile, hdfsfile);

        return true;
    } catch (IOException ioe) {
        logger.warn("Unable to move job summary log to HDFS.", ioe);
        return false;
    }
}

From source file:com.baynote.kafka.hadoop.KafkaJobBuilderTest.java

License:Apache License

@Test
public void testConfigureWholeJob() throws Exception {
    // base configuration
    builder.setZkConnect("localhost:2181");
    builder.addQueueInput("queue_name", "group_name", MockMapper.class);
    builder.setTextFileOutputFormat("/a/hdfs/path");

    // extended configuration
    builder.setJobName("job_name");
    builder.setMapOutputKeyClass(Text.class);
    builder.setMapOutputValueClass(BytesWritable.class);
    builder.setReducerClass(MockReducer.class);
    builder.setTaskMemorySettings("-Xmx2048m");
    builder.setNumReduceTasks(100);/* w  ww .  j a va 2  s  .  c  o  m*/
    builder.setParitioner(MockPartitioner.class);
    builder.setKafkaFetchSizeBytes(1024);

    Job job = builder.configureJob(conf);

    assertEquals("job_name", job.getJobName());
    assertEquals(Text.class, job.getMapOutputKeyClass());
    assertEquals(BytesWritable.class, job.getMapOutputValueClass());
    assertEquals(MockReducer.class, job.getReducerClass());
    assertEquals(MockMapper.class, job.getMapperClass());
    assertEquals("-Xmx2048m", job.getConfiguration().get("mapred.child.java.opts"));
    assertEquals(100, job.getNumReduceTasks());
    assertEquals(MockPartitioner.class, job.getPartitionerClass());
    assertEquals(1024, KafkaInputFormat.getKafkaFetchSizeBytes(job.getConfiguration()));
    assertEquals(TextOutputFormat.class, job.getOutputFormatClass());
    assertEquals(KafkaInputFormat.class, job.getInputFormatClass());
    assertEquals("file:/a/hdfs/path", TextOutputFormat.getOutputPath(job).toString());

    builder.setJobName(null);
    builder.setSequenceFileOutputFormat();
    builder.setUseLazyOutput();
    builder.addQueueInput("queue_name_2", "group_name_2", MockMapper.class);

    job = builder.configureJob(conf);
    assertEquals(LazyOutputFormat.class, job.getOutputFormatClass());
    assertEquals(MultipleKafkaInputFormat.class, job.getInputFormatClass());
    assertEquals(DelegatingMapper.class, job.getMapperClass());
    assertEquals(BytesWritable.class, job.getOutputKeyClass());
    assertEquals(BytesWritable.class, job.getOutputValueClass());
    assertNotNull(SequenceFileOutputFormat.getOutputPath(job));
    assertNotNull(job.getJobName());

    // use s3
    builder.useS3("my_aws_key", "s3cr3t", "my-bucket");
    builder.setTextFileOutputFormat("/a/hdfs/path");
    job = builder.configureJob(conf);

    assertEquals("my_aws_key", job.getConfiguration().get("fs.s3n.awsAccessKeyId"));
    assertEquals("s3cr3t", job.getConfiguration().get("fs.s3n.awsSecretAccessKey"));
    assertEquals("my_aws_key", job.getConfiguration().get("fs.s3.awsAccessKeyId"));
    assertEquals("s3cr3t", job.getConfiguration().get("fs.s3.awsSecretAccessKey"));
}

From source file:com.cloudera.accumulo.upgrade.compatibility.DataCompatibilityVerify.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    final String jobName = this.getClass().getName();
    options.parseArgs(jobName, args);/*from  www. j  ava 2  s. c om*/
    try {
        final int totalMapSlots = getConf().getInt("mapred.map.tasks",
                DataCompatibilityTestCli.DEFAULT_NUM_ROWS);
        if (-1 == options.test.numRows) {
            options.test.numRows = totalMapSlots;
        }
        final TableOperations ops = options.connection.getConnector().tableOperations();
        final List<String> names = options.test.getTableNames(ops);
        int totalReduceSlots = getConf().getInt("mapred.reduce.tasks", 0);
        if (-1 != options.test.numReduceSlots) {
            totalReduceSlots = options.test.numReduceSlots;
        }
        if (0 == totalReduceSlots) {
            totalReduceSlots = names.size();
        }
        final int reducesPerJob = Math.max(1, totalReduceSlots / names.size());

        final List<Job> jobs = new ArrayList();
        for (String name : names) {
            final Job job = new Job(getConf(), jobName + " " + name);
            job.setJarByClass(this.getClass());
            options.input.useAccumuloInputFormat(job, name);
            job.setMapperClass(DataVerifyMapper.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(LongWritable.class);
            job.setReducerClass(LongSumReducer.class);
            job.setCombinerClass(LongSumReducer.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            TextOutputFormat.setOutputPath(job, new Path(options.test.output, name));
            job.setNumReduceTasks(reducesPerJob);
            job.submit();
            jobs.add(job);
        }

        boolean success = true;
        final long numCellsPerRow = options.test.qualifiers * DataCompatibilityLoad.FAMILIES.length;
        final long numCellsPerFamily = options.test.qualifiers * options.test.numRows;
        for (Job job : jobs) {
            success &= job.waitForCompletion(true);
            final CounterGroup group = job.getCounters().getGroup(DataVerifyMapper.class.getName());
            if (null == group) {
                log.error("Job '" + job.getJobName() + "' doesn't have counters for the verification mapper.");
                success = false;
            } else {
                final Counter badCounter = group.findCounter(BAD_COUNTER);
                if (null != badCounter && 0 < badCounter.getValue()) {
                    log.error("Job '" + job.getJobName() + "' has " + badCounter.getValue()
                            + " entries with bad checksums.");
                    success = false;
                }
                int numRows = 0;
                int numFamilies = 0;
                for (Counter counter : group) {
                    if (counter.getName().startsWith(ROW_COUNTER_PREFIX)) {
                        numRows++;
                        if (numCellsPerRow != counter.getValue()) {
                            log.error("Job '" + job.getJobName() + "', counter '" + counter.getName()
                                    + "' should have " + numCellsPerRow + " cells, but instead has "
                                    + counter.getValue());
                            success = false;
                        }
                    } else if (counter.getName().startsWith(FAMILY_COUNTER_PREFIX)) {
                        numFamilies++;
                        if (numCellsPerFamily != counter.getValue()) {
                            log.error("Job '" + job.getJobName() + "', counter '" + counter.getName()
                                    + "' should have " + numCellsPerFamily + " cells, but instead has "
                                    + counter.getValue());
                            success = false;
                        }
                    }
                }
                if (options.test.numRows != numRows) {
                    log.error("Job '" + job.getJobName() + "' is supposed to have " + options.test.numRows
                            + " rows, but has " + numRows);
                    success = false;
                }
                if (DataCompatibilityLoad.FAMILIES.length != numFamilies) {
                    log.error("Job '" + job.getJobName() + "' is supposed to have "
                            + DataCompatibilityLoad.FAMILIES.length + " families, but has " + numFamilies);
                    success = false;
                }
            }
        }
        if (success) {
            log.info("All internal checks passed.");
        } else {
            log.info("Some checks failed. see log.");
        }
        return success ? 0 : 1;
    } finally {
        options.input.close();
    }
}

From source file:com.cloudera.oryx.computation.common.DistributedGenerationRunner.java

License:Open Source License

private static Collection<String> find(String instanceDir) throws IOException, InterruptedException {
    Collection<String> result = Lists.newArrayList();
    // This is where we will see Hadoop config problems first, so log extra info
    Cluster cluster;/* ww w .j a va 2s.  c o  m*/
    try {
        cluster = new Cluster(OryxConfiguration.get());
    } catch (IOException ioe) {
        log.error("Unable to init the Hadoop cluster. Check that an MR2, not MR1, cluster is configured.");
        throw ioe;
    }
    try {
        JobStatus[] statuses = cluster.getAllJobStatuses();
        if (statuses != null) {
            for (JobStatus jobStatus : statuses) {
                JobStatus.State state = jobStatus.getState();
                if (state == JobStatus.State.RUNNING || state == JobStatus.State.PREP) {
                    Job job = cluster.getJob(jobStatus.getJobID());
                    if (job != null) {
                        String jobName = job.getJobName();
                        log.info("Found running job {}", jobName);
                        if (jobName.startsWith("Oryx-" + instanceDir + '-')) {
                            result.add(jobName);
                        }
                    }
                }
            }
        }
    } finally {
        cluster.close();
    }
    return result;
}

From source file:com.example.Driver.java

License:Open Source License

public int run(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Your job name");

    job.setJarByClass(Driver.class);

    logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: "
            + Arrays.toString(args));

    if (args.length < 2) {
        logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar()
                + " input_files output_directory");
        return 1;
    }// ww  w . j  a  va 2  s. co m

    job.setMapperClass(WordcountMapper.class);
    logger.info("mapper class is " + job.getMapperClass());

    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(IntWritable.class);
    logger.info("mapper output key class is " + job.getMapOutputKeyClass());
    logger.info("mapper output value class is " + job.getMapOutputValueClass());

    job.setReducerClass(WordcountReducer.class);
    logger.info("reducer class is " + job.getReducerClass());
    job.setCombinerClass(WordcountReducer.class);
    logger.info("combiner class is " + job.getCombinerClass());
    //When you are not runnign any Reducer
    //OR    job.setNumReduceTasks(0);
    //      logger.info("number of reduce task is " + job.getNumReduceTasks());

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    logger.info("output key class is " + job.getOutputKeyClass());
    logger.info("output value class is " + job.getOutputValueClass());

    job.setInputFormatClass(TextInputFormat.class);
    logger.info("input format class is " + job.getInputFormatClass());

    job.setOutputFormatClass(TextOutputFormat.class);
    logger.info("output format class is " + job.getOutputFormatClass());

    Path filePath = new Path(args[0]);
    logger.info("input path " + filePath);
    FileInputFormat.setInputPaths(job, filePath);

    Path outputPath = new Path(args[1]);
    logger.info("output path " + outputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.waitForCompletion(true);
    return 0;
}

From source file:com.ikanow.aleph2.analytics.hadoop.services.MockHadoopTechnologyService.java

License:Apache License

/** Utility to make local jobs check-able
 * @param job
 */
protected void handleLocalJob(Job job) {
    _jobs.put(job.getJobName(), job);
}

From source file:com.kylinolap.job.hadoop.AbstractHadoopJob.java

License:Apache License

protected int waitForCompletion(Job job) throws IOException, InterruptedException, ClassNotFoundException {
    int retVal = 0;
    long start = System.nanoTime();

    if (isAsync) {
        job.submit();//from  w  w w. j av a2  s.  c om
    } else {
        job.waitForCompletion(true);
        retVal = job.isSuccessful() ? 0 : 1;
    }

    log.debug("Job '" + job.getJobName() + "' finished "
            + (job.isSuccessful() ? "successfully in " : "with failures.  Time taken ")
            + StringUtils.formatTime((System.nanoTime() - start) / 1000000L));

    return retVal;
}

From source file:com.linkedin.mr_kluj.GenericClojureJob.java

License:Apache License

public void run() {
    info("Starting " + getClass().getSimpleName());

    /*** Get clojure source ***/
    final String cljSource;
    if (props.getProperty(LI_CLJ_SOURCE) == null) {
        final String resourceName = props.getProperty("li.clj.source.file");
        if (resourceName == null) {
            throw new RuntimeException(
                    "Must define either li.clj.source or li.clj.source.file on the Props object.");
        }/*from w  w  w.  j  a  v  a 2  s .c o m*/

        URL resource = getClass().getClassLoader().getResource(resourceName);

        if (resource == null) { // Perhaps it's a URL for a Hadoop-understood file-system
            try {
                resource = getScriptFromPath(new Configuration(), resourceName).toURI().toURL();
            } catch (Exception e) {
                // perhaps it wasn't...
            }
        }

        if (resource == null) { // Maybe it's a file
            File theFile = new File(resourceName);
            if (theFile.exists()) {
                try {
                    resource = theFile.toURI().toURL();
                } catch (MalformedURLException e) {
                    throw new RuntimeException("WTF?", e);
                }
            }

        }

        if (resource == null) {
            throw new RuntimeException(
                    String.format("Resource[%s] does not exist on the classpath.", resourceName));
        }

        try {
            cljSource = new String(getBytes(resource.openStream()));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        props.setProperty(LI_CLJ_SOURCE, cljSource);
    } else {
        cljSource = props.getProperty(LI_CLJ_SOURCE);
    }

    final String theActualFunction = String.format(
            "(require '[com.linkedin.mr-kluj.job :as job])\n\n" + "%s\n" + "(map job/starter the-jobs)\n",
            cljSource);

    info("--- Source: ---");
    info(theActualFunction);
    info("       ---------       ");

    boolean jobCompleted;
    try {
        RT.var("clojure.core", "require").invoke(Symbol.intern("clojure.main"));

        Var.pushThreadBindings(RT.map(RT.var("clojure.core", "*warn-on-reflection*"), RT.T,
                RT.var("user", "*context*"), null, RT.var("user", "*props*"), props));

        Iterable<IFn> jobs = (Iterable<IFn>) clojure.lang.Compiler.load(new StringReader(theActualFunction),
                "start-job-input", "clj-job");

        int count = 0;
        for (IFn ifn : jobs) {
            Job job = (Job) ifn.invoke();
            job.getConfiguration().set(LI_CLJ_SOURCE, cljSource);
            job.getConfiguration().set(LI_CLJ_JOB_INDEX, String.valueOf(count));

            ByteArrayOutputStream baos = new ByteArrayOutputStream(1024 * 10);
            props.storeToXML(baos, null);

            job.getConfiguration().set(LI_CLJ_PROPERTIES, new String(baos.toByteArray()));

            info(String.format("Starting job %s[%s]", job.getJobID(), job.getJobName()));

            jobCompleted = job.waitForCompletion(true);
            ++count;

            if (!jobCompleted) {
                throw new RuntimeException(String.format("Job[%s] failed for some reason.", job.getJobID()));
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

}