Example usage for org.apache.hadoop.mapred JobConf getJobName

List of usage examples for org.apache.hadoop.mapred JobConf getJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getJobName.

Prototype

public String getJobName() 

Source Link

Document

Get the user-specified job name.

Usage

From source file:edu.ucsb.cs.partitioning.lsh.LshPartitionMain.java

License:Apache License

public static void run(JobConf job) throws IOException {

    String ret = stars() + "\n  Running job:  " + job.getJobName() + "\n  Input Path:   {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }/*www  .  j av  a 2s.com*/
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path:  " + FileOutputFormat.getOutputPath(job) + "\n" + "  Threshold:    "
            + job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE) + "\n  k:            "
            + job.getInt(K_PROPERTY, K_VALUE) + "\n  l:            " + job.getInt(L_PROPERTY, L_VALUE);
    System.out.println(ret);
    JobClient.runJob(job);
}

From source file:edu.ucsb.cs.utilities.JobSubmitter.java

License:Apache License

public static void run(JobConf job, String title, float Threshold) {

    String ret = stars() + "\n [" + title + "]\n" + stars() + "\n  Running job:  " + job.getJobName()
            + "\n  Input Path:   {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }/*from w  w  w  .j a va  2 s . com*/
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path:  " + FileOutputFormat.getOutputPath(job) + "\n" + "  Num. of mappers: "
            + job.getNumMapTasks() + "\n" + "  Num. of reducers: " + job.getNumReduceTasks() + "\n";
    if (Threshold != -1)
        ret += "  Threshold: " + Threshold + "\n";
    //      for (int ctr = 0; ctr < Properties.requiredParameters.size(); ctr++)//check
    //         ret += Properties.requiredParameters.get(ctr) + "\n";
    System.out.println(ret);
    try {
        Date startTime = new Date();
        JobClient.runJob(job);
        Date end_time = new Date();
        System.err.println(
                "Job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds.");

    } catch (IOException e) {
        System.err.println("ERROR: While submitting the job :(");
        e.printStackTrace();
    }
}

From source file:edu.yale.cs.hadoopdb.exec.DBJobBase.java

License:Apache License

/**
 * Job config initialization (command-line params etc).  
 *//*from w  ww. j  a va2  s  .c o m*/
protected JobConf initConf(String[] args) throws Exception {

    List<String> other_args = new ArrayList<String>();

    Path configuration_file = null;
    boolean replication = false;

    for (int i = 0; i < args.length; ++i) {
        if (("-" + DBConst.DB_CONFIG_FILE).equals(args[i])) {
            configuration_file = new Path(args[++i]);
        } else if ("-replication".equals(args[i])) {
            replication = true;
        } else {
            other_args.add(args[i]);
        }
    }

    JobConf conf = null;

    conf = configureJob(other_args.toArray(new String[0]));
    LOG.info(conf.getJobName());
    LOG.info(conf.get(DBConst.DB_SQL_QUERY));

    if (conf.get(DBConst.DB_RELATION_ID) == null || conf.get(DBConst.DB_SQL_QUERY) == null
            || conf.get(DBConst.DB_RECORD_READER) == null) {
        throw new Exception(
                "ERROR: DB Job requires a relation, an SQL Query and a Record Reader class to be configured.\n"
                        + "Please specify using: conf.set(\"" + DBConst.DB_RELATION_ID
                        + "\", <relation name>), conf.set(\"" + DBConst.DB_SQL_QUERY + "\", <SQL QUERY>)\n"
                        + "and code an appropriate Record Reader and specify conf.set(\""
                        + DBConst.DB_RECORD_READER + "\", <Record reader class name>)\n");
    }

    if (replication) {
        conf.setBoolean(DBConst.DB_REPLICATION, true);
    }

    if (configuration_file == null) {
        if (conf.get(DBConst.DB_CONFIG_FILE) == null) {
            throw new Exception("No HadoopDB config file!");
        }
    } else {
        conf.set(DBConst.DB_CONFIG_FILE, configuration_file.toString());
    }

    setInputFormat(conf);

    return conf;
}

From source file:edu.yale.cs.hadoopdb.exec.DBJobBase.java

License:Apache License

public int run(String[] args) throws Exception {
    long startTime = System.currentTimeMillis();
    JobConf conf = null;
    try {//from www . j  a  v  a  2  s.c  o m
        conf = initConf(args);
    } catch (Exception e) {
        System.err.print("ERROR: " + StringUtils.stringifyException(e));
        return printDbUsage();
    }
    JobClient.runJob(conf);

    long endTime = System.currentTimeMillis();
    LOG.info("\n" + conf.getJobName() + " JOB TIME : " + (endTime - startTime) + " ms.\n");

    return 0;
}

From source file:edu.yale.cs.hadoopdb.exec.HDFSJobBase.java

License:Apache License

public int run(String[] args) throws Exception {

    long startTime = System.currentTimeMillis();

    List<String> other_args = new ArrayList<String>();

    for (int i = 0; i < args.length; ++i) {
        other_args.add(args[i]);//from w  ww  . ja  v  a2  s. co m
    }

    JobConf conf = null;
    try {
        conf = configureJob(other_args.toArray(new String[0]));
    } catch (Exception e) {
        System.err.print("ERROR: " + StringUtils.stringifyException(e));
        return printHDFSUsage();
    }

    LOG.info(conf.getJobName());
    JobClient.runJob(conf);

    long endTime = System.currentTimeMillis();
    LOG.info("\n" + conf.getJobName() + " JOB TIME : " + (endTime - startTime) + " ms.\n");

    return 0;
}

From source file:eu.scape_project.tb.chutney.Chutney.java

License:Apache License

/**
 * Method run at the beginning of the map.  Note this is where we can recover settings
 * passed to us by the parent class like job type and job name.
 *///from www . ja  v a  2 s. c  om
@Override
public void configure(JobConf pJob) {
    super.configure(pJob);

    //get the job name from the config
    gJobName = pJob.getJobName();
    //outputPath = job.get(Settings.OUTPUTPATH_CONF_SETTING);

    //get the type of job we are running from the config
    String jobType = pJob.get(Settings.JOBTYPE_CONF_SETTING);
    //could iterate over the enum instead?
    if (jobType.equals(JobType.CommandLineJob.toString())) {
        gJobType = JobType.CommandLineJob;
    } else if (jobType.equals(JobType.TavernaCommandLine.toString())) {
        gJobType = JobType.TavernaCommandLine;
    } else if (jobType.equals(JobType.TavernaServerJob.toString())) {
        gJobType = JobType.TavernaServerJob;
    } else if (jobType.equals(JobType.XMLWorkflowReport.toString())) {
        gJobType = JobType.XMLWorkflowReport;
    } else if (jobType.equals(JobType.XMLCommandLineJob.toString())) {
        gJobType = JobType.XMLCommandLineJob;
        gXmlCode = pJob.get(Settings.XMLCODE_CONF_SETTING);
    }

}

From source file:eu.scape_project.tb.chutney.ChutneyDriver.java

License:Apache License

/**
 * This method sets up and runs the job on Hadoop
 * @param args The passed through command line arguments
 *//*w  w w .  j a  v  a  2  s. co  m*/
public int run(String[] args) {

    CommandLineParser parser = new PosixParser();
    Options options = new Options();
    options.addOption("n", "jobname", true, "name to assign to the hadoop job");
    options.addOption("i", "inputlist", true,
            "text file containing list of input files (ensure no trailing carriage returns)");
    options.addOption("t", "jobtype", true,
            "type of job; CLJ (command line job), TSJ (Taverna Server job), TCL (Taverna command line job), XML (XML defined command line job), XWR (XML workflow report)");
    options.addOption("x", "xmlcode", true, "xml definition of job to run for XML jobs");
    options.addOption("h", "help", false, "help text");

    JobConf conf = new JobConf(ChutneyDriver.class);

    String input = null;
    String xmlcode = null;

    CommandLine com;
    try {
        com = parser.parse(options, args);
        if (com.hasOption("help")) {
            throw (new ParseException(""));
        }

        String jobName = Settings.JOB_NAME + "default";
        if (com.hasOption("jobname")) {
            //set the job name to something better than the default
            jobName = Settings.JOB_NAME + com.getOptionValue("jobname");
        }
        conf.setJobName(jobName);

        JobType jobType = JobType.CommandLineJob;
        if (com.hasOption("jobtype")) {
            String value = com.getOptionValue("jobtype").toUpperCase();
            if (value.equals(CommandLineJob.getShortJobType())) {
                jobType = CommandLineJob.getJobType();
            } else if (value.equals(TavernaCommandLineJob.getShortJobType())) {
                jobType = TavernaCommandLineJob.getJobType();
            } else if (value.equals(TavernaServerJob.getShortJobType())) {
                jobType = TavernaServerJob.getJobType();
            } else if (value.equals(XMLCommandLineJob.getShortJobType())) {
                jobType = XMLCommandLineJob.getJobType();
            } else if (value.equals(XMLWorkflowReport.getShortJobType())) {
                jobType = XMLWorkflowReport.getJobType();
            }
        }
        System.out.println("JobType: " + jobType.toString());
        conf.set(Settings.JOBTYPE_CONF_SETTING, jobType.toString());

        if (com.hasOption("xmlcode")) {
            //jobType == JobType.XMLCommandLineJob
            xmlcode = com.getOptionValue("xmlcode");
            //if it is a local file get the full path
            if (new File(xmlcode).exists())
                xmlcode = new File(xmlcode).getAbsolutePath();
            conf.set(Settings.XMLCODE_CONF_SETTING, xmlcode);
        }
        if ((jobType == JobType.XMLCommandLineJob) & (xmlcode == null)) {
            //i.e. no code specified
            System.out.println("No XML code specified on the command line");
            return -1;
        }

        if (com.hasOption("inputlist")) {
            input = com.getOptionValue("inputlist");
        }
        if (input.equals(null)) {
            System.out.println("no input given");
            return -2;
        }

    } catch (ParseException e) {
        HelpFormatter help = new HelpFormatter();
        help.printHelp("hadoop jar TavernaHadoopWrapper.jar", options);
        return -1;
    }

    //using matchbox it may take a while to process the jobs
    //set a longer timeout than the default (10 mins)
    //six hours should be more than enough :/        MMM*SS*MS
    //QAJob testing for 9 tests on ANJO files can take ~4.5hrs+
    conf.set("mapred.task.timeout", Integer.toString(360 * 60 * 1000));

    FileInputFormat.setInputPaths(conf, new Path(input));
    FileOutputFormat.setOutputPath(conf, new Path(conf.getJobName()));

    //set the mapper to this class' mapper
    conf.setMapperClass(Chutney.class);
    //we don't want to reduce
    //conf.setReducerClass(Reducer.class);

    //this input format should split the input by one line per map by default.
    conf.setInputFormat(NLineInputFormat.class);
    conf.setInt("mapred.line.input.format.linespermap", 1);

    //sets how the output is written cf. OutputFormat
    //we can use nulloutputformat if we are writing our own output
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    //this sets maximum jvm reuse
    conf.set("mapred.job.reuse.jvm.num.tasks", "-1");

    //we only want one reduce task
    conf.setNumReduceTasks(1);

    try {
        JobClient.runJob(conf);
    } catch (IOException ioe) {
        ioe.printStackTrace();
        return -1;
    }

    return 0;
}

From source file:ivory.core.preprocess.BuildWeightedTermDocVectors.java

License:Apache License

@SuppressWarnings("deprecation")
public int runTool() throws Exception {
    sLogger.info("PowerTool: GetWeightedTermDocVectors");

    JobConf conf = new JobConf(BuildWeightedTermDocVectors.class);
    FileSystem fs = FileSystem.get(conf);

    String indexPath = getConf().get("Ivory.IndexPath");
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    String outputPath = env.getWeightedTermDocVectorsDirectory();
    int mapTasks = getConf().getInt("Ivory.NumMapTasks", 0);
    int minSplitSize = getConf().getInt("Ivory.MinSplitSize", 0);
    String collectionName = getConf().get("Ivory.CollectionName");

    String termsFilePath = env.getIndexTermsData();
    String termsIdsFilePath = env.getIndexTermIdsData();
    String termIdMappingFilePath = env.getIndexTermIdMappingData();
    String dfByTermFilePath = env.getDfByTermData();

    Path inputPath = new Path(env.getTermDocVectorsDirectory());
    Path weightedVectorsPath = new Path(outputPath);

    if (fs.exists(weightedVectorsPath)) {
        //fs.delete(weightedVectorsPath, true);
        sLogger.info("Output path already exists!");
        return 0;
    }//from w w w.  j ava  2  s  .  com

    /* add terms file to cache */
    if (!fs.exists(new Path(termsFilePath)) || !fs.exists(new Path(termsIdsFilePath))
            || !fs.exists(new Path(termIdMappingFilePath))) {
        throw new RuntimeException("Error, terms file " + termsFilePath + "/" + termsIdsFilePath + "/"
                + termIdMappingFilePath + "doesn't exist!");
    }
    DistributedCache.addCacheFile(new URI(termsFilePath), conf);
    DistributedCache.addCacheFile(new URI(termsIdsFilePath), conf);
    DistributedCache.addCacheFile(new URI(termIdMappingFilePath), conf);

    /* add df table to cache */
    if (!fs.exists(new Path(dfByTermFilePath))) {
        throw new RuntimeException("Error, df data file " + dfByTermFilePath + "doesn't exist!");
    }
    DistributedCache.addCacheFile(new URI(dfByTermFilePath), conf);

    /* add dl table to cache */
    Path docLengthFile = env.getDoclengthsData();
    if (!fs.exists(docLengthFile)) {
        throw new RuntimeException("Error, doc-length data file " + docLengthFile + "doesn't exist!");
    }
    DistributedCache.addCacheFile(docLengthFile.toUri(), conf);

    conf.setMapperClass(MyMapper.class);
    //conf.setInt("mapred.task.timeout",3600000);
    conf.setJobName("GetWeightedTermDocVectors:" + collectionName);
    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(0);
    conf.setInt("mapred.min.split.size", minSplitSize);
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setInt("Ivory.MinNumTerms", getConf().getInt("Ivory.MinNumTerms", Integer.MAX_VALUE));
    conf.setBoolean("Ivory.Normalize", getConf().getBoolean("Ivory.Normalize", false));
    if (getConf().get("Ivory.ShortDocLengths") != null) {
        conf.set("Ivory.ShortDocLengths", getConf().get("Ivory.ShortDocLengths"));
    }
    conf.set("Ivory.ScoringModel", getConf().get("Ivory.ScoringModel"));

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, weightedVectorsPath);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(HMapSFW.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(HMapSFW.class);

    sLogger.info("Running job: " + conf.getJobName());

    long startTime = System.currentTimeMillis();
    RunningJob job = JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopUtils.java

License:Apache License

/**
 * Creates JobInfo from hadoop configuration.
 *
 * @param cfg Hadoop configuration./*  w  w  w  . ja va 2s  . c  o  m*/
 * @return Job info.
 * @throws IgniteCheckedException If failed.
 */
public static GridHadoopDefaultJobInfo createJobInfo(Configuration cfg) throws IgniteCheckedException {
    JobConf jobConf = new JobConf(cfg);

    boolean hasCombiner = jobConf.get("mapred.combiner.class") != null
            || jobConf.get(MRJobConfig.COMBINE_CLASS_ATTR) != null;

    int numReduces = jobConf.getNumReduceTasks();

    jobConf.setBooleanIfUnset("mapred.mapper.new-api", jobConf.get(OLD_MAP_CLASS_ATTR) == null);

    if (jobConf.getUseNewMapper()) {
        String mode = "new map API";

        ensureNotSet(jobConf, "mapred.input.format.class", mode);
        ensureNotSet(jobConf, OLD_MAP_CLASS_ATTR, mode);

        if (numReduces != 0)
            ensureNotSet(jobConf, "mapred.partitioner.class", mode);
        else
            ensureNotSet(jobConf, "mapred.output.format.class", mode);
    } else {
        String mode = "map compatibility";

        ensureNotSet(jobConf, MRJobConfig.INPUT_FORMAT_CLASS_ATTR, mode);
        ensureNotSet(jobConf, MRJobConfig.MAP_CLASS_ATTR, mode);

        if (numReduces != 0)
            ensureNotSet(jobConf, MRJobConfig.PARTITIONER_CLASS_ATTR, mode);
        else
            ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode);
    }

    if (numReduces != 0) {
        jobConf.setBooleanIfUnset("mapred.reducer.new-api", jobConf.get(OLD_REDUCE_CLASS_ATTR) == null);

        if (jobConf.getUseNewReducer()) {
            String mode = "new reduce API";

            ensureNotSet(jobConf, "mapred.output.format.class", mode);
            ensureNotSet(jobConf, OLD_REDUCE_CLASS_ATTR, mode);
        } else {
            String mode = "reduce compatibility";

            ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode);
            ensureNotSet(jobConf, MRJobConfig.REDUCE_CLASS_ATTR, mode);
        }
    }

    Map<String, String> props = new HashMap<>();

    for (Map.Entry<String, String> entry : jobConf)
        props.put(entry.getKey(), entry.getValue());

    return new GridHadoopDefaultJobInfo(jobConf.getJobName(), jobConf.getUser(), hasCombiner, numReduces,
            props);
}

From source file:org.apache.ignite.internal.processors.hadoop.HadoopUtils.java

License:Apache License

/**
 * Creates JobInfo from hadoop configuration.
 *
 * @param cfg Hadoop configuration.//  w  w w . ja v  a2  s. c  om
 * @return Job info.
 * @throws IgniteCheckedException If failed.
 */
public static HadoopDefaultJobInfo createJobInfo(Configuration cfg) throws IgniteCheckedException {
    JobConf jobConf = new JobConf(cfg);

    boolean hasCombiner = jobConf.get("mapred.combiner.class") != null
            || jobConf.get(MRJobConfig.COMBINE_CLASS_ATTR) != null;

    int numReduces = jobConf.getNumReduceTasks();

    jobConf.setBooleanIfUnset("mapred.mapper.new-api", jobConf.get(OLD_MAP_CLASS_ATTR) == null);

    if (jobConf.getUseNewMapper()) {
        String mode = "new map API";

        ensureNotSet(jobConf, "mapred.input.format.class", mode);
        ensureNotSet(jobConf, OLD_MAP_CLASS_ATTR, mode);

        if (numReduces != 0)
            ensureNotSet(jobConf, "mapred.partitioner.class", mode);
        else
            ensureNotSet(jobConf, "mapred.output.format.class", mode);
    } else {
        String mode = "map compatibility";

        ensureNotSet(jobConf, MRJobConfig.INPUT_FORMAT_CLASS_ATTR, mode);
        ensureNotSet(jobConf, MRJobConfig.MAP_CLASS_ATTR, mode);

        if (numReduces != 0)
            ensureNotSet(jobConf, MRJobConfig.PARTITIONER_CLASS_ATTR, mode);
        else
            ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode);
    }

    if (numReduces != 0) {
        jobConf.setBooleanIfUnset("mapred.reducer.new-api", jobConf.get(OLD_REDUCE_CLASS_ATTR) == null);

        if (jobConf.getUseNewReducer()) {
            String mode = "new reduce API";

            ensureNotSet(jobConf, "mapred.output.format.class", mode);
            ensureNotSet(jobConf, OLD_REDUCE_CLASS_ATTR, mode);
        } else {
            String mode = "reduce compatibility";

            ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode);
            ensureNotSet(jobConf, MRJobConfig.REDUCE_CLASS_ATTR, mode);
        }
    }

    Map<String, String> props = new HashMap<>();

    for (Map.Entry<String, String> entry : jobConf)
        props.put(entry.getKey(), entry.getValue());

    return new HadoopDefaultJobInfo(jobConf.getJobName(), jobConf.getUser(), hasCombiner, numReduces, props);
}