List of usage examples for org.apache.hadoop.mapred JobConf setInt
public void setInt(String name, int value)
name
property to an int
. From source file:eu.scape_project.tb.chutney.ChutneyDriver.java
License:Apache License
/** * This method sets up and runs the job on Hadoop * @param args The passed through command line arguments *///from w w w . java 2 s . c o m public int run(String[] args) { CommandLineParser parser = new PosixParser(); Options options = new Options(); options.addOption("n", "jobname", true, "name to assign to the hadoop job"); options.addOption("i", "inputlist", true, "text file containing list of input files (ensure no trailing carriage returns)"); options.addOption("t", "jobtype", true, "type of job; CLJ (command line job), TSJ (Taverna Server job), TCL (Taverna command line job), XML (XML defined command line job), XWR (XML workflow report)"); options.addOption("x", "xmlcode", true, "xml definition of job to run for XML jobs"); options.addOption("h", "help", false, "help text"); JobConf conf = new JobConf(ChutneyDriver.class); String input = null; String xmlcode = null; CommandLine com; try { com = parser.parse(options, args); if (com.hasOption("help")) { throw (new ParseException("")); } String jobName = Settings.JOB_NAME + "default"; if (com.hasOption("jobname")) { //set the job name to something better than the default jobName = Settings.JOB_NAME + com.getOptionValue("jobname"); } conf.setJobName(jobName); JobType jobType = JobType.CommandLineJob; if (com.hasOption("jobtype")) { String value = com.getOptionValue("jobtype").toUpperCase(); if (value.equals(CommandLineJob.getShortJobType())) { jobType = CommandLineJob.getJobType(); } else if (value.equals(TavernaCommandLineJob.getShortJobType())) { jobType = TavernaCommandLineJob.getJobType(); } else if (value.equals(TavernaServerJob.getShortJobType())) { jobType = TavernaServerJob.getJobType(); } else if (value.equals(XMLCommandLineJob.getShortJobType())) { jobType = XMLCommandLineJob.getJobType(); } else if (value.equals(XMLWorkflowReport.getShortJobType())) { jobType = XMLWorkflowReport.getJobType(); } } System.out.println("JobType: " + jobType.toString()); conf.set(Settings.JOBTYPE_CONF_SETTING, jobType.toString()); if (com.hasOption("xmlcode")) { //jobType == JobType.XMLCommandLineJob xmlcode = com.getOptionValue("xmlcode"); //if it is a local file get the full path if (new File(xmlcode).exists()) xmlcode = new File(xmlcode).getAbsolutePath(); conf.set(Settings.XMLCODE_CONF_SETTING, xmlcode); } if ((jobType == JobType.XMLCommandLineJob) & (xmlcode == null)) { //i.e. no code specified System.out.println("No XML code specified on the command line"); return -1; } if (com.hasOption("inputlist")) { input = com.getOptionValue("inputlist"); } if (input.equals(null)) { System.out.println("no input given"); return -2; } } catch (ParseException e) { HelpFormatter help = new HelpFormatter(); help.printHelp("hadoop jar TavernaHadoopWrapper.jar", options); return -1; } //using matchbox it may take a while to process the jobs //set a longer timeout than the default (10 mins) //six hours should be more than enough :/ MMM*SS*MS //QAJob testing for 9 tests on ANJO files can take ~4.5hrs+ conf.set("mapred.task.timeout", Integer.toString(360 * 60 * 1000)); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(conf.getJobName())); //set the mapper to this class' mapper conf.setMapperClass(Chutney.class); //we don't want to reduce //conf.setReducerClass(Reducer.class); //this input format should split the input by one line per map by default. conf.setInputFormat(NLineInputFormat.class); conf.setInt("mapred.line.input.format.linespermap", 1); //sets how the output is written cf. OutputFormat //we can use nulloutputformat if we are writing our own output conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); //this sets maximum jvm reuse conf.set("mapred.job.reuse.jvm.num.tasks", "-1"); //we only want one reduce task conf.setNumReduceTasks(1); try { JobClient.runJob(conf); } catch (IOException ioe) { ioe.printStackTrace(); return -1; } return 0; }
From source file:eu.stratosphere.myriad.driver.hadoop.MyriadInputFormat.java
License:Apache License
public static void setNodeCount(JobConf conf, short nodeCount) { conf.setInt("mapred.myriad.dgen.node.count", nodeCount); }
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java
License:LGPL
/** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments/* www . ja va2 s . co m*/ */ private static void setup(final Configuration conf, final JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); // set boolean values final boolean update = args.flags.contains(Options.UPDATE); final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, args.flags.contains(Options.IGNORE_READ_FAILURES)); jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP); FileSystem dstfs = args.dst.getFileSystem(conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_distcp_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (null == parent) { // If dst is '/' on S3, it might not exist yet, but dst.getParent() // will return null. In this case, use '/' as its own parent to // prevent // NPE errors below. parent = args.dst; } if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_distcp_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory OR we're updating/overwriting // the contents of the destination directory. final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { // skip file if the src and the dst files are the same. skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst)); // skip file if it exceed file limit or size limit skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; if (!skipfile) { ++fileCount; byteCount += child.getLen(); // if (LOG.isTraceEnabled()) { // LOG.trace("adding file " + child.getPath()); // } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { getLogger().info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_distcp_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); if (dststatus != null && args.flags.contains(Options.DELETE)) { deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf); } Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_distcp_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); // Explicitly create the tmpDir to ensure that it can be cleaned // up by fullyDelete() later. tmpDir.getFileSystem(conf).mkdirs(tmpDir); getLogger().info("srcCount=" + srcCount); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(byteCount, jobConf); }
From source file:gov.nih.ncgc.hadoop.BioIsostere.java
public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), BioIsostere.class); jobConf.setJobName(BioIsostere.class.getSimpleName()); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(IntWritable.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(MoleculePairWritable.class); jobConf.setMapperClass(BioisostereMapper.class); jobConf.setReducerClass(MoleculePairReducer.class); // jobConf.setInputFormat(TextInputFormat.class); jobConf.setInt("mapred.line.input.format.linespermap", 10); jobConf.setInputFormat(NLineInputFormat.class); jobConf.setOutputFormat(TextOutputFormat.class); if (args.length != 3) { System.err.println("Usage: bisos <datafile> <out> <license file>"); System.exit(2);/*from w w w. ja va 2 s . c om*/ } FileInputFormat.setInputPaths(jobConf, new Path(args[0])); FileOutputFormat.setOutputPath(jobConf, new Path(args[1])); // make the license file available vis dist cache DistributedCache.addCacheFile(new Path(args[2]).toUri(), jobConf); long start = System.currentTimeMillis(); JobClient.runJob(jobConf); double duration = (System.currentTimeMillis() - start) / 1000.0; System.out.println("Total runtime was " + duration + "s"); return 0; }
From source file:gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.java
public static void main(String[] args) { //paths and input handling Path inputRatingsPath = new Path(args[0]); //movieid, userid, rating (text files) Path job1OutputPath = new Path("/user/hduser/partialResults"); Path partialDistancesPath = new Path("/user/hduser/partialResults/part-*"); //member_nonMember \t partialDistance (sequence files) Path candidateMoviesPath = new Path("/user/hduser/partialResults/candidateMovies-*"); //candidateMovieId, nonMemberUserId_rating (text files) Path userSimilaritiesPath = new Path("/user/hduser/userSimilarities"); //similarity of each group member to his friends (text files) Path finalScoresPath = new Path(args[1]); //movieId \t outputScore int numReduceTasks = 56; //defaultValue if (args.length == 3) { numReduceTasks = Integer.parseInt(args[2]); }//from w ww.j av a 2 s. co m final float friendsSimThresh = 0.8f; String groupFilePath = "/user/hduser/group.txt"; //one-line csv file with user ids (text file) if (args.length < 2 || args.length > 3) { System.err.println( "Incorrect input. Example usage: hadoop jar ~/GroupRecs/MultiUser.jar inputPath outputPath [numReduceTasks]"); return; } //JOB 1// JobClient client = new JobClient(); JobConf conf = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class); try { FileSystem fs = FileSystem.get(conf); if (fs.exists(job1OutputPath)) { fs.delete(job1OutputPath, true); } if (fs.exists(userSimilaritiesPath)) { fs.delete(userSimilaritiesPath, true); } if (fs.exists(finalScoresPath)) { fs.delete(finalScoresPath, true); } } catch (IOException ex) { Logger.getLogger(MultiUserMain.class.getName()).log(Level.SEVERE, null, ex); } conf.setJobName("Multi-user approach - Job 1"); System.out.println("Starting Job 1 (Multi-user approach)..."); conf.setMapOutputKeyClass(VIntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(ByteWritable.class); conf.setInputFormat(TextInputFormat.class); //conf.setOutputFormat(TextOutputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK); FileInputFormat.setInputPaths(conf, inputRatingsPath); //user ratings FileOutputFormat.setOutputPath(conf, job1OutputPath); //partial distances MultipleOutputs.addNamedOutput(conf, "candidateMovies", SequenceFileOutputFormat.class, VIntWritable.class, Text.class); //movieId, userId_rating conf.setMapperClass(gr.forth.ics.isl.grouprecsmr.job1.Job1Mapper.class); conf.setReducerClass(gr.forth.ics.isl.grouprecsmr.job1.Job1Reducer.class); conf.setNumReduceTasks(numReduceTasks); try { DistributedCache.addCacheFile(new URI(groupFilePath), conf); // group } catch (URISyntaxException e1) { System.err.println(e1.toString()); } conf.setInt("mapred.task.timeout", 6000000); client.setConf(conf); RunningJob job; try { job = JobClient.runJob(conf); job.waitForCompletion(); } catch (Exception e) { System.err.println(e); } //JOB 2// System.out.println("Starting Job 2 (Multi-user approach)..."); JobClient client2 = new JobClient(); JobConf conf2 = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class); conf2.setJobName("Multi-user approach - Job 2"); conf2.setMapOutputKeyClass(Text.class); //user pair (member_nonMember), where nonMember is in friends conf2.setMapOutputValueClass(ByteWritable.class);//similarity part unsquared conf2.setOutputKeyClass(Text.class); //user pair (member_nonMember), where nonMember is in friends conf2.setOutputValueClass(DoubleWritable.class);//similarity conf2.setInputFormat(SequenceFileInputFormat.class); //conf2.setInputFormat(TextInputFormat.class); conf2.setOutputFormat(TextOutputFormat.class); //conf2.setOutputFormat(SequenceFileOutputFormat.class); //SequenceFileOutputFormat.setOutputCompressionType(conf2, SequenceFile.CompressionType.BLOCK); FileInputFormat.setInputPaths(conf2, partialDistancesPath); //Job 1 output FileOutputFormat.setOutputPath(conf2, userSimilaritiesPath); //Job 2 output (similarity of each group member to his friends) conf2.setMapperClass(IdentityMapper.class); conf2.setReducerClass(gr.forth.ics.isl.grouprecsmr.job2.Job2ReducerMulti.class); int numSimilaritiesPartitions = numReduceTasks; conf2.setNumReduceTasks(numSimilaritiesPartitions); conf2.setFloat("friendsSimThreshold", friendsSimThresh); conf2.setInt("mapred.task.timeout", 6000000); conf2.set("io.sort.mb", "500"); client2.setConf(conf2); RunningJob job2; try { job2 = JobClient.runJob(conf2); job2.waitForCompletion(); } catch (Exception e) { System.err.println(e); } //JOB 3// System.out.println("Starting Job 3 (Multi-user approach)..."); JobClient client3 = new JobClient(); JobConf conf3 = new JobConf(gr.forth.ics.isl.grouprecsmr.multiuser.MultiUserMain.class); conf3.setJobName("Multi-user approach - Job 3"); conf3.setMapOutputKeyClass(VIntWritable.class); conf3.setMapOutputValueClass(Text.class); conf3.setOutputKeyClass(VIntWritable.class); conf3.setOutputValueClass(DoubleWritable.class); conf3.setInputFormat(SequenceFileInputFormat.class); //conf3.setInputFormat(TextInputFormat.class); conf3.setOutputFormat(TextOutputFormat.class); //conf3.setOutputFormat(SequenceFileOutputFormat.class); //SequenceFileOutputFormat.setOutputCompressionType(conf3,SequenceFile.CompressionType.BLOCK); try { DistributedCache.addCacheFile(new URI(groupFilePath), conf3); } catch (URISyntaxException ex) { System.err.println("Could not add group file to distributed cache. " + ex); } for (int i = 0; i < numSimilaritiesPartitions; i++) { String reduceId = String.format("%05d", i); //5-digit int with leading try { DistributedCache.addCacheFile(new URI(userSimilaritiesPath.toString() + "/part-" + reduceId), conf3); } catch (URISyntaxException ex) { System.err.println("Could not add similarities files to distributed cache. " + ex); } } FileInputFormat.setInputPaths(conf3, candidateMoviesPath); //Job 1 output (candidate movies) FileOutputFormat.setOutputPath(conf3, finalScoresPath); //Job 3 output (movie \t outputScore) // conf3.setMapperClass(IdentityMapper.class); conf3.setMapperClass(gr.forth.ics.isl.grouprecsmr.job3.Job3MapperMulti.class); //filtering out ratings from non-Friends conf3.setReducerClass(gr.forth.ics.isl.grouprecsmr.job3.Job3ReducerMulti.class); conf3.setInt("mapred.task.timeout", 6000000); conf3.set("io.sort.mb", "500"); conf3.setNumReduceTasks(numReduceTasks); client3.setConf(conf3); RunningJob job3; try { job3 = JobClient.runJob(conf3); job3.waitForCompletion(); } catch (Exception e) { System.err.println(e); } }
From source file:graphbuilding.GenomixDriver.java
License:Apache License
public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath) throws IOException { JobConf conf = new JobConf(GenomixDriver.class); conf.setInt("sizeKmer", sizeKmer); if (defaultConfPath != null) { conf.addResource(new Path(defaultConfPath)); }/*from w w w . java 2 s . c om*/ conf.setJobName("Genomix Graph Building"); conf.setMapperClass(GenomixMapper.class); conf.setReducerClass(GenomixReducer.class); conf.setCombinerClass(GenomixCombiner.class); conf.setMapOutputKeyClass(Kmer.class); conf.setMapOutputValueClass(KmerCountValue.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(Kmer.class); conf.setOutputValueClass(KmerCountValue.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setNumReduceTasks(numReducers); FileSystem dfs = FileSystem.get(conf); dfs.delete(new Path(outputPath), true); JobClient.runJob(conf); }
From source file:hibench.HtmlConf.java
License:Apache License
public void setJobConf(JobConf job) { job.setInt("agents", agents); job.setLong("pages", pages); job.setLong("agentpages", agentpages); long vLinkElems = (null == linkZipf) ? -1 : linkZipf.velems; long vWordElems = (null == wordZipf) ? -1 : wordZipf.velems; job.setLong("vLinkElems", vLinkElems); job.setLong("vWordElems", vWordElems); }
From source file:hibench.VisitConf.java
License:Apache License
public void setJobConf(JobConf job) { job.setInt("agents", agents); job.setLong("visits", visits); job.setLong("agentvisits", agentvisits); job.setLong("pages", pages); }
From source file:hibench.ZipfRandom.java
License:Apache License
public void setJobConf(JobConf job) { job.setInt("agents", agents); job.setLong("elems", elems); job.setLong("agentelems", agentelems); job.setFloat("exponent", (float) exponent); job.setFloat("scale", (float) scale); }
From source file:io.fluo.stress.trie.Generate.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 4) { log.error("Usage: " + this.getClass().getSimpleName() + " <numMappers> <numbersPerMapper> <max> <output dir>"); System.exit(-1);/*from www . j av a 2s . c o m*/ } int numMappers = Integer.parseInt(args[0]); int numPerMapper = Integer.parseInt(args[1]); long max = Long.parseLong(args[2]); Path out = new Path(args[3]); Preconditions.checkArgument(numMappers > 0, "numMappers <= 0"); Preconditions.checkArgument(numPerMapper > 0, "numPerMapper <= 0"); Preconditions.checkArgument(max > 0, "max <= 0"); JobConf job = new JobConf(getConf()); job.setJobName(this.getClass().getName()); job.setJarByClass(Generate.class); job.setInt(TRIE_GEN_NUM_PER_MAPPER_PROP, numPerMapper); job.setInt(TRIE_GEN_NUM_MAPPERS_PROP, numMappers); job.setLong(TRIE_GEN_MAX_PROP, max); job.setInputFormat(RandomLongInputFormat.class); job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, out); RunningJob runningJob = JobClient.runJob(job); runningJob.waitForCompletion(); return runningJob.isSuccessful() ? 0 : -1; }