List of usage examples for org.apache.hadoop.mapred JobConf getJobName
public String getJobName()
From source file:edu.ucsb.cs.partitioning.lsh.LshPartitionMain.java
License:Apache License
public static void run(JobConf job) throws IOException { String ret = stars() + "\n Running job: " + job.getJobName() + "\n Input Path: {"; Path inputs[] = FileInputFormat.getInputPaths(job); for (int ctr = 0; ctr < inputs.length; ctr++) { if (ctr > 0) { ret += "\n "; }/*www . j av a 2s.com*/ ret += inputs[ctr].toString(); } ret += "}\n"; ret += " Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + " Threshold: " + job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE) + "\n k: " + job.getInt(K_PROPERTY, K_VALUE) + "\n l: " + job.getInt(L_PROPERTY, L_VALUE); System.out.println(ret); JobClient.runJob(job); }
From source file:edu.ucsb.cs.utilities.JobSubmitter.java
License:Apache License
public static void run(JobConf job, String title, float Threshold) { String ret = stars() + "\n [" + title + "]\n" + stars() + "\n Running job: " + job.getJobName() + "\n Input Path: {"; Path inputs[] = FileInputFormat.getInputPaths(job); for (int ctr = 0; ctr < inputs.length; ctr++) { if (ctr > 0) { ret += "\n "; }/*from w w w .j a va 2 s . com*/ ret += inputs[ctr].toString(); } ret += "}\n"; ret += " Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + " Num. of mappers: " + job.getNumMapTasks() + "\n" + " Num. of reducers: " + job.getNumReduceTasks() + "\n"; if (Threshold != -1) ret += " Threshold: " + Threshold + "\n"; // for (int ctr = 0; ctr < Properties.requiredParameters.size(); ctr++)//check // ret += Properties.requiredParameters.get(ctr) + "\n"; System.out.println(ret); try { Date startTime = new Date(); JobClient.runJob(job); Date end_time = new Date(); System.err.println( "Job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds."); } catch (IOException e) { System.err.println("ERROR: While submitting the job :("); e.printStackTrace(); } }
From source file:edu.yale.cs.hadoopdb.exec.DBJobBase.java
License:Apache License
/** * Job config initialization (command-line params etc). *//*from w ww. j a va2 s .c o m*/ protected JobConf initConf(String[] args) throws Exception { List<String> other_args = new ArrayList<String>(); Path configuration_file = null; boolean replication = false; for (int i = 0; i < args.length; ++i) { if (("-" + DBConst.DB_CONFIG_FILE).equals(args[i])) { configuration_file = new Path(args[++i]); } else if ("-replication".equals(args[i])) { replication = true; } else { other_args.add(args[i]); } } JobConf conf = null; conf = configureJob(other_args.toArray(new String[0])); LOG.info(conf.getJobName()); LOG.info(conf.get(DBConst.DB_SQL_QUERY)); if (conf.get(DBConst.DB_RELATION_ID) == null || conf.get(DBConst.DB_SQL_QUERY) == null || conf.get(DBConst.DB_RECORD_READER) == null) { throw new Exception( "ERROR: DB Job requires a relation, an SQL Query and a Record Reader class to be configured.\n" + "Please specify using: conf.set(\"" + DBConst.DB_RELATION_ID + "\", <relation name>), conf.set(\"" + DBConst.DB_SQL_QUERY + "\", <SQL QUERY>)\n" + "and code an appropriate Record Reader and specify conf.set(\"" + DBConst.DB_RECORD_READER + "\", <Record reader class name>)\n"); } if (replication) { conf.setBoolean(DBConst.DB_REPLICATION, true); } if (configuration_file == null) { if (conf.get(DBConst.DB_CONFIG_FILE) == null) { throw new Exception("No HadoopDB config file!"); } } else { conf.set(DBConst.DB_CONFIG_FILE, configuration_file.toString()); } setInputFormat(conf); return conf; }
From source file:edu.yale.cs.hadoopdb.exec.DBJobBase.java
License:Apache License
public int run(String[] args) throws Exception { long startTime = System.currentTimeMillis(); JobConf conf = null; try {//from www . j a v a 2 s.c o m conf = initConf(args); } catch (Exception e) { System.err.print("ERROR: " + StringUtils.stringifyException(e)); return printDbUsage(); } JobClient.runJob(conf); long endTime = System.currentTimeMillis(); LOG.info("\n" + conf.getJobName() + " JOB TIME : " + (endTime - startTime) + " ms.\n"); return 0; }
From source file:edu.yale.cs.hadoopdb.exec.HDFSJobBase.java
License:Apache License
public int run(String[] args) throws Exception { long startTime = System.currentTimeMillis(); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { other_args.add(args[i]);//from w ww . ja v a2 s. co m } JobConf conf = null; try { conf = configureJob(other_args.toArray(new String[0])); } catch (Exception e) { System.err.print("ERROR: " + StringUtils.stringifyException(e)); return printHDFSUsage(); } LOG.info(conf.getJobName()); JobClient.runJob(conf); long endTime = System.currentTimeMillis(); LOG.info("\n" + conf.getJobName() + " JOB TIME : " + (endTime - startTime) + " ms.\n"); return 0; }
From source file:eu.scape_project.tb.chutney.Chutney.java
License:Apache License
/** * Method run at the beginning of the map. Note this is where we can recover settings * passed to us by the parent class like job type and job name. *///from www . ja v a 2 s. c om @Override public void configure(JobConf pJob) { super.configure(pJob); //get the job name from the config gJobName = pJob.getJobName(); //outputPath = job.get(Settings.OUTPUTPATH_CONF_SETTING); //get the type of job we are running from the config String jobType = pJob.get(Settings.JOBTYPE_CONF_SETTING); //could iterate over the enum instead? if (jobType.equals(JobType.CommandLineJob.toString())) { gJobType = JobType.CommandLineJob; } else if (jobType.equals(JobType.TavernaCommandLine.toString())) { gJobType = JobType.TavernaCommandLine; } else if (jobType.equals(JobType.TavernaServerJob.toString())) { gJobType = JobType.TavernaServerJob; } else if (jobType.equals(JobType.XMLWorkflowReport.toString())) { gJobType = JobType.XMLWorkflowReport; } else if (jobType.equals(JobType.XMLCommandLineJob.toString())) { gJobType = JobType.XMLCommandLineJob; gXmlCode = pJob.get(Settings.XMLCODE_CONF_SETTING); } }
From source file:eu.scape_project.tb.chutney.ChutneyDriver.java
License:Apache License
/** * This method sets up and runs the job on Hadoop * @param args The passed through command line arguments *//*w w w . j a v a 2 s. co m*/ public int run(String[] args) { CommandLineParser parser = new PosixParser(); Options options = new Options(); options.addOption("n", "jobname", true, "name to assign to the hadoop job"); options.addOption("i", "inputlist", true, "text file containing list of input files (ensure no trailing carriage returns)"); options.addOption("t", "jobtype", true, "type of job; CLJ (command line job), TSJ (Taverna Server job), TCL (Taverna command line job), XML (XML defined command line job), XWR (XML workflow report)"); options.addOption("x", "xmlcode", true, "xml definition of job to run for XML jobs"); options.addOption("h", "help", false, "help text"); JobConf conf = new JobConf(ChutneyDriver.class); String input = null; String xmlcode = null; CommandLine com; try { com = parser.parse(options, args); if (com.hasOption("help")) { throw (new ParseException("")); } String jobName = Settings.JOB_NAME + "default"; if (com.hasOption("jobname")) { //set the job name to something better than the default jobName = Settings.JOB_NAME + com.getOptionValue("jobname"); } conf.setJobName(jobName); JobType jobType = JobType.CommandLineJob; if (com.hasOption("jobtype")) { String value = com.getOptionValue("jobtype").toUpperCase(); if (value.equals(CommandLineJob.getShortJobType())) { jobType = CommandLineJob.getJobType(); } else if (value.equals(TavernaCommandLineJob.getShortJobType())) { jobType = TavernaCommandLineJob.getJobType(); } else if (value.equals(TavernaServerJob.getShortJobType())) { jobType = TavernaServerJob.getJobType(); } else if (value.equals(XMLCommandLineJob.getShortJobType())) { jobType = XMLCommandLineJob.getJobType(); } else if (value.equals(XMLWorkflowReport.getShortJobType())) { jobType = XMLWorkflowReport.getJobType(); } } System.out.println("JobType: " + jobType.toString()); conf.set(Settings.JOBTYPE_CONF_SETTING, jobType.toString()); if (com.hasOption("xmlcode")) { //jobType == JobType.XMLCommandLineJob xmlcode = com.getOptionValue("xmlcode"); //if it is a local file get the full path if (new File(xmlcode).exists()) xmlcode = new File(xmlcode).getAbsolutePath(); conf.set(Settings.XMLCODE_CONF_SETTING, xmlcode); } if ((jobType == JobType.XMLCommandLineJob) & (xmlcode == null)) { //i.e. no code specified System.out.println("No XML code specified on the command line"); return -1; } if (com.hasOption("inputlist")) { input = com.getOptionValue("inputlist"); } if (input.equals(null)) { System.out.println("no input given"); return -2; } } catch (ParseException e) { HelpFormatter help = new HelpFormatter(); help.printHelp("hadoop jar TavernaHadoopWrapper.jar", options); return -1; } //using matchbox it may take a while to process the jobs //set a longer timeout than the default (10 mins) //six hours should be more than enough :/ MMM*SS*MS //QAJob testing for 9 tests on ANJO files can take ~4.5hrs+ conf.set("mapred.task.timeout", Integer.toString(360 * 60 * 1000)); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(conf.getJobName())); //set the mapper to this class' mapper conf.setMapperClass(Chutney.class); //we don't want to reduce //conf.setReducerClass(Reducer.class); //this input format should split the input by one line per map by default. conf.setInputFormat(NLineInputFormat.class); conf.setInt("mapred.line.input.format.linespermap", 1); //sets how the output is written cf. OutputFormat //we can use nulloutputformat if we are writing our own output conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); //this sets maximum jvm reuse conf.set("mapred.job.reuse.jvm.num.tasks", "-1"); //we only want one reduce task conf.setNumReduceTasks(1); try { JobClient.runJob(conf); } catch (IOException ioe) { ioe.printStackTrace(); return -1; } return 0; }
From source file:ivory.core.preprocess.BuildWeightedTermDocVectors.java
License:Apache License
@SuppressWarnings("deprecation") public int runTool() throws Exception { sLogger.info("PowerTool: GetWeightedTermDocVectors"); JobConf conf = new JobConf(BuildWeightedTermDocVectors.class); FileSystem fs = FileSystem.get(conf); String indexPath = getConf().get("Ivory.IndexPath"); RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs); String outputPath = env.getWeightedTermDocVectorsDirectory(); int mapTasks = getConf().getInt("Ivory.NumMapTasks", 0); int minSplitSize = getConf().getInt("Ivory.MinSplitSize", 0); String collectionName = getConf().get("Ivory.CollectionName"); String termsFilePath = env.getIndexTermsData(); String termsIdsFilePath = env.getIndexTermIdsData(); String termIdMappingFilePath = env.getIndexTermIdMappingData(); String dfByTermFilePath = env.getDfByTermData(); Path inputPath = new Path(env.getTermDocVectorsDirectory()); Path weightedVectorsPath = new Path(outputPath); if (fs.exists(weightedVectorsPath)) { //fs.delete(weightedVectorsPath, true); sLogger.info("Output path already exists!"); return 0; }//from w w w. j ava 2 s . com /* add terms file to cache */ if (!fs.exists(new Path(termsFilePath)) || !fs.exists(new Path(termsIdsFilePath)) || !fs.exists(new Path(termIdMappingFilePath))) { throw new RuntimeException("Error, terms file " + termsFilePath + "/" + termsIdsFilePath + "/" + termIdMappingFilePath + "doesn't exist!"); } DistributedCache.addCacheFile(new URI(termsFilePath), conf); DistributedCache.addCacheFile(new URI(termsIdsFilePath), conf); DistributedCache.addCacheFile(new URI(termIdMappingFilePath), conf); /* add df table to cache */ if (!fs.exists(new Path(dfByTermFilePath))) { throw new RuntimeException("Error, df data file " + dfByTermFilePath + "doesn't exist!"); } DistributedCache.addCacheFile(new URI(dfByTermFilePath), conf); /* add dl table to cache */ Path docLengthFile = env.getDoclengthsData(); if (!fs.exists(docLengthFile)) { throw new RuntimeException("Error, doc-length data file " + docLengthFile + "doesn't exist!"); } DistributedCache.addCacheFile(docLengthFile.toUri(), conf); conf.setMapperClass(MyMapper.class); //conf.setInt("mapred.task.timeout",3600000); conf.setJobName("GetWeightedTermDocVectors:" + collectionName); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(0); conf.setInt("mapred.min.split.size", minSplitSize); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInt("Ivory.MinNumTerms", getConf().getInt("Ivory.MinNumTerms", Integer.MAX_VALUE)); conf.setBoolean("Ivory.Normalize", getConf().getBoolean("Ivory.Normalize", false)); if (getConf().get("Ivory.ShortDocLengths") != null) { conf.set("Ivory.ShortDocLengths", getConf().get("Ivory.ShortDocLengths")); } conf.set("Ivory.ScoringModel", getConf().get("Ivory.ScoringModel")); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, weightedVectorsPath); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(HMapSFW.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(HMapSFW.class); sLogger.info("Running job: " + conf.getJobName()); long startTime = System.currentTimeMillis(); RunningJob job = JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopUtils.java
License:Apache License
/** * Creates JobInfo from hadoop configuration. * * @param cfg Hadoop configuration./* w w w . ja va 2s . c o m*/ * @return Job info. * @throws IgniteCheckedException If failed. */ public static GridHadoopDefaultJobInfo createJobInfo(Configuration cfg) throws IgniteCheckedException { JobConf jobConf = new JobConf(cfg); boolean hasCombiner = jobConf.get("mapred.combiner.class") != null || jobConf.get(MRJobConfig.COMBINE_CLASS_ATTR) != null; int numReduces = jobConf.getNumReduceTasks(); jobConf.setBooleanIfUnset("mapred.mapper.new-api", jobConf.get(OLD_MAP_CLASS_ATTR) == null); if (jobConf.getUseNewMapper()) { String mode = "new map API"; ensureNotSet(jobConf, "mapred.input.format.class", mode); ensureNotSet(jobConf, OLD_MAP_CLASS_ATTR, mode); if (numReduces != 0) ensureNotSet(jobConf, "mapred.partitioner.class", mode); else ensureNotSet(jobConf, "mapred.output.format.class", mode); } else { String mode = "map compatibility"; ensureNotSet(jobConf, MRJobConfig.INPUT_FORMAT_CLASS_ATTR, mode); ensureNotSet(jobConf, MRJobConfig.MAP_CLASS_ATTR, mode); if (numReduces != 0) ensureNotSet(jobConf, MRJobConfig.PARTITIONER_CLASS_ATTR, mode); else ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode); } if (numReduces != 0) { jobConf.setBooleanIfUnset("mapred.reducer.new-api", jobConf.get(OLD_REDUCE_CLASS_ATTR) == null); if (jobConf.getUseNewReducer()) { String mode = "new reduce API"; ensureNotSet(jobConf, "mapred.output.format.class", mode); ensureNotSet(jobConf, OLD_REDUCE_CLASS_ATTR, mode); } else { String mode = "reduce compatibility"; ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode); ensureNotSet(jobConf, MRJobConfig.REDUCE_CLASS_ATTR, mode); } } Map<String, String> props = new HashMap<>(); for (Map.Entry<String, String> entry : jobConf) props.put(entry.getKey(), entry.getValue()); return new GridHadoopDefaultJobInfo(jobConf.getJobName(), jobConf.getUser(), hasCombiner, numReduces, props); }
From source file:org.apache.ignite.internal.processors.hadoop.HadoopUtils.java
License:Apache License
/** * Creates JobInfo from hadoop configuration. * * @param cfg Hadoop configuration.// w w w . ja v a2 s. c om * @return Job info. * @throws IgniteCheckedException If failed. */ public static HadoopDefaultJobInfo createJobInfo(Configuration cfg) throws IgniteCheckedException { JobConf jobConf = new JobConf(cfg); boolean hasCombiner = jobConf.get("mapred.combiner.class") != null || jobConf.get(MRJobConfig.COMBINE_CLASS_ATTR) != null; int numReduces = jobConf.getNumReduceTasks(); jobConf.setBooleanIfUnset("mapred.mapper.new-api", jobConf.get(OLD_MAP_CLASS_ATTR) == null); if (jobConf.getUseNewMapper()) { String mode = "new map API"; ensureNotSet(jobConf, "mapred.input.format.class", mode); ensureNotSet(jobConf, OLD_MAP_CLASS_ATTR, mode); if (numReduces != 0) ensureNotSet(jobConf, "mapred.partitioner.class", mode); else ensureNotSet(jobConf, "mapred.output.format.class", mode); } else { String mode = "map compatibility"; ensureNotSet(jobConf, MRJobConfig.INPUT_FORMAT_CLASS_ATTR, mode); ensureNotSet(jobConf, MRJobConfig.MAP_CLASS_ATTR, mode); if (numReduces != 0) ensureNotSet(jobConf, MRJobConfig.PARTITIONER_CLASS_ATTR, mode); else ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode); } if (numReduces != 0) { jobConf.setBooleanIfUnset("mapred.reducer.new-api", jobConf.get(OLD_REDUCE_CLASS_ATTR) == null); if (jobConf.getUseNewReducer()) { String mode = "new reduce API"; ensureNotSet(jobConf, "mapred.output.format.class", mode); ensureNotSet(jobConf, OLD_REDUCE_CLASS_ATTR, mode); } else { String mode = "reduce compatibility"; ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode); ensureNotSet(jobConf, MRJobConfig.REDUCE_CLASS_ATTR, mode); } } Map<String, String> props = new HashMap<>(); for (Map.Entry<String, String> entry : jobConf) props.put(entry.getKey(), entry.getValue()); return new HadoopDefaultJobInfo(jobConf.getJobName(), jobConf.getUser(), hasCombiner, numReduces, props); }