List of usage examples for org.apache.hadoop.mapred JobConf set
public void set(String name, String value)
value
of the name
property. From source file:edu.iu.kmeans.sgxsimu.KMeansLauncher.java
License:Apache License
/** * Launches all the tasks in order./*from w ww. j a v a 2 s .c om*/ */ @Override public int run(String[] args) throws Exception { /* Put shared libraries into the distributed cache */ Configuration conf = this.getConf(); Initialize init = new Initialize(conf, args); // load args init.loadSysArgs(); init.loadDistributedLibs(); //load app args conf.setInt(HarpDAALConstants.FILE_DIM, Integer.parseInt(args[init.getSysArgNum()])); conf.setInt(HarpDAALConstants.FEATURE_DIM, Integer.parseInt(args[init.getSysArgNum() + 1])); conf.setInt(HarpDAALConstants.NUM_CENTROIDS, Integer.parseInt(args[init.getSysArgNum() + 2])); conf.setInt(Constants.ENCLAVE_TOTAL, Integer.parseInt(args[init.getSysArgNum() + 3])); conf.setInt(Constants.ENCLAVE_PER_THD, Integer.parseInt(args[init.getSysArgNum() + 4])); conf.setInt(Constants.ENCLAVE_TASK, Integer.parseInt(args[init.getSysArgNum() + 5])); conf.setBoolean(Constants.ENABLE_SIMU, Boolean.parseBoolean(args[init.getSysArgNum() + 6])); // config job System.out.println("Starting Job"); long perJobSubmitTime = System.currentTimeMillis(); System.out.println( "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime())); Job kmeansJob = init.createJob("kmeansJob", KMeansLauncher.class, KMeansCollectiveMapper.class); // initialize centroids data JobConf thisjobConf = (JobConf) kmeansJob.getConfiguration(); FileSystem fs = FileSystem.get(conf); int nFeatures = Integer.parseInt(args[init.getSysArgNum() + 1]); int numCentroids = Integer.parseInt(args[init.getSysArgNum() + 2]); Path workPath = init.getWorkPath(); Path cenDir = new Path(workPath, "centroids"); fs.mkdirs(cenDir); if (fs.exists(cenDir)) { fs.delete(cenDir, true); } Path initCenDir = new Path(cenDir, "init_centroids"); DataGenerator.generateDenseDataSingle(numCentroids, nFeatures, 1000, 0, " ", initCenDir, fs); thisjobConf.set(HarpDAALConstants.CEN_DIR, cenDir.toString()); thisjobConf.set(HarpDAALConstants.CENTROID_FILE_NAME, "init_centroids"); //generate Data if required boolean generateData = Boolean.parseBoolean(args[init.getSysArgNum() + 7]); if (generateData) { Path inputPath = init.getInputPath(); int total_points = Integer.parseInt(args[init.getSysArgNum() + 8]); int total_files = Integer.parseInt(args[init.getSysArgNum() + 9]); String tmpDirPathName = args[init.getSysArgNum() + 10]; DataGenerator.generateDenseDataMulti(total_points, nFeatures, total_files, 2, 1, ",", inputPath, tmpDirPathName, fs); } // finish job boolean jobSuccess = kmeansJob.waitForCompletion(true); System.out.println( "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime())); System.out.println( "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |"); if (!jobSuccess) { kmeansJob.killJob(); System.out.println("kmeansJob failed"); } return 0; }
From source file:edu.iu.lda.LDALauncher.java
License:Apache License
private Job configureLDAJob(Path docDir, int numTopics, double alpha, double beta, int numIterations, int minBound, int maxBound, int numMapTasks, int numThreadsPerWorker, double scheduleRatio, int mem, boolean printModel, Path modelDir, Path outputDir, Configuration configuration, int jobID) throws IOException, URISyntaxException { configuration.setInt(Constants.NUM_TOPICS, numTopics); configuration.setDouble(Constants.ALPHA, alpha); configuration.setDouble(Constants.BETA, beta); configuration.setInt(Constants.NUM_ITERATIONS, numIterations); configuration.setInt(Constants.MIN_BOUND, minBound); configuration.setInt(Constants.MAX_BOUND, maxBound); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); configuration.setDouble(Constants.SCHEDULE_RATIO, scheduleRatio); System.out.println("Model Dir Path: " + modelDir.toString()); configuration.set(Constants.MODEL_DIR, modelDir.toString()); configuration.setBoolean(Constants.PRINT_MODEL, printModel); Job job = Job.getInstance(configuration, "lda_job_" + jobID); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); // mapreduce.map.collective.memory.mb // 125000//from w w w.j a va 2 s.co m jobConf.setInt("mapreduce.map.collective.memory.mb", mem); // mapreduce.map.collective.java.opts // -Xmx120000m -Xms120000m int xmx = (mem - 5000) > (mem * 0.9) ? (mem - 5000) : (int) Math.ceil(mem * 0.9); int xmn = (int) Math.ceil(0.25 * xmx); jobConf.set("mapreduce.map.collective.java.opts", "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m"); jobConf.setNumMapTasks(numMapTasks); jobConf.setInt("mapreduce.job.max.split.locations", 10000); FileInputFormat.setInputPaths(job, docDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(LDALauncher.class); job.setMapperClass(LDAMPCollectiveMapper.class); job.setNumReduceTasks(0); return job; }
From source file:edu.iu.mds.MDSMapReduce.java
License:Apache License
private Job prepareBCCalcJob(String inputDir, String xFile, String outputDirPath, int iterationCount, int jobCount, Configuration configuration, int numPoints, int xWidth, int numMapTasks, int partitionPerWorker) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Job job = new Job(configuration, "map-collective-mds-bc" + jobCount); Configuration jobConfig = job.getConfiguration(); Path outputDir = new Path(outputDirPath); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); jobConfig.setInt(MDSConstants.ITERATION, iterationCount); jobConfig.setInt(MDSConstants.NUMPOINTS, numPoints); jobConfig.setInt(MDSConstants.XWIDTH, xWidth); jobConfig.set(MDSConstants.XFILE, xFile); jobConfig.setInt(MDSConstants.NUM_MAPS, numMapTasks); jobConfig.setInt(MDSConstants.PARTITION_PER_WORKER, partitionPerWorker); // input class to file-based class job.setInputFormatClass(MultiFileInputFormat.class); // job.setInputFormatClass(DataFileInputFormat.class); // job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setJarByClass(MDSMapReduce.class); job.setMapperClass(MDSAllgatherMultiThreadMapper.class); // When use MultiFileInputFormat, remember to set the number of map tasks org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMapTasks); job.setNumReduceTasks(0);// w w w . j a v a 2 s.c o m return job; }
From source file:edu.iu.pagerank.PRJobLauncher.java
License:Apache License
private Job prepareJob(String inputDir, int totalVtx, int iterationCount, int jobCount, int numMapTasks, int partiitonPerWorker, String outputDirPath, Configuration configuration) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Job job = new Job(configuration, "harp-pagerank-" + jobCount); Configuration jobConfig = job.getConfiguration(); Path outputDir = new Path(outputDirPath); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); jobConfig.setInt(PRConstants.ITERATION, iterationCount); jobConfig.setInt(PRConstants.TOTAL_VTX, totalVtx); jobConfig.setInt(PRConstants.NUM_MAPS, numMapTasks); jobConfig.setInt(PRConstants.PARTITION_PER_WORKER, partiitonPerWorker); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(PRJobLauncher.class); job.setMapperClass(PRMultiThreadMapper.class); org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMapTasks); job.setNumReduceTasks(0);//from w ww . j av a 2 s. c o m return job; }
From source file:edu.iu.sgd.SGDLauncher.java
License:Apache License
private Job configureSGDJob(Path inputDir, int r, double lambda, double epsilon, int numIterations, int trainRatio, int numMapTasks, int numThreadsPerWorker, double scheduleRatio, int mem, Path modelDir, Path outputDir, String testFilePath, Configuration configuration, int jobID) throws IOException, URISyntaxException { configuration.setInt(Constants.R, r); configuration.setDouble(Constants.LAMBDA, lambda); configuration.setDouble(Constants.EPSILON, epsilon); configuration.setInt(Constants.NUM_ITERATIONS, numIterations); configuration.setInt(Constants.TRAIN_RATIO, trainRatio); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); configuration.setDouble(Constants.SCHEDULE_RATIO, scheduleRatio); configuration.set(Constants.MODEL_DIR, modelDir.toString()); configuration.set(Constants.TEST_FILE_PATH, testFilePath); Job job = Job.getInstance(configuration, "sgd_job_" + jobID); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); // mapreduce.map.collective.memory.mb // 125000/*ww w .j a va 2 s. c o m*/ jobConf.setInt("mapreduce.map.collective.memory.mb", mem); // mapreduce.map.collective.java.opts // -Xmx120000m -Xms120000m int xmx = (mem - 5000) > (mem * 0.9) ? (mem - 5000) : (int) Math.ceil(mem * 0.9); int xmn = (int) Math.ceil(0.25 * xmx); jobConf.set("mapreduce.map.collective.java.opts", "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m"); jobConf.setNumMapTasks(numMapTasks); jobConf.setInt("mapreduce.job.max.split.locations", 10000); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(SGDLauncher.class); job.setMapperClass(SGDCollectiveMapper.class); job.setNumReduceTasks(0); return job; }
From source file:edu.iu.wdamds.MDSLauncher.java
License:Apache License
private Job prepareWDAMDSJob(int numMapTasks, Path dataDirPath, Path xFilePath, Path xOutFilePath, Path outDirPath, String idsFile, String labelsFile, double threshold, int d, double alpha, int n, int cgIter, int numThreads) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf(), "map-collective-wdamds"); Configuration jobConfig = job.getConfiguration(); FileInputFormat.setInputPaths(job, dataDirPath); FileOutputFormat.setOutputPath(job, outDirPath); jobConfig.setInt(MDSConstants.NUM_MAPS, numMapTasks); // Load from HDFS // Now we ignore and don't read x file from // HDFS/*from ww w . jav a2s. c om*/ jobConfig.set(MDSConstants.X_FILE_PATH, xFilePath.toString()); // Output to HDFS jobConfig.set(MDSConstants.X_OUT_FILE_PATH, xOutFilePath.toString()); // Load from shared file system jobConfig.set(MDSConstants.IDS_FILE, idsFile); // Load from shared file system jobConfig.set(MDSConstants.LABELS_FILE, labelsFile); jobConfig.setDouble(MDSConstants.THRESHOLD, threshold); jobConfig.setInt(MDSConstants.D, d); jobConfig.setDouble(MDSConstants.ALPHA, alpha); jobConfig.setInt(MDSConstants.N, n); jobConfig.setInt(MDSConstants.CG_ITER, cgIter); jobConfig.setInt(MDSConstants.NUM_THREADS, numThreads); // input class to file-based class job.setInputFormatClass(SingleFileInputFormat.class); job.setJarByClass(MDSLauncher.class); job.setMapperClass(WDAMDSMapper.class); // When use MultiFileInputFormat, remember to // set the number of map tasks org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMapTasks); job.setNumReduceTasks(0); return job; }
From source file:edu.iu.wordcount.CollectiveWordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);/*from w w w . j av a 2s . c o m*/ } Job job = new Job(conf, "collective word count"); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumReduceTasks(0); job.setJarByClass(CollectiveWordCount.class); job.setMapperClass(WordCountMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:edu.ohsu.sonmezsysbio.cloudbreak.command.CommandNovoalignSingleEnds.java
public void runHadoopJob(Configuration configuration) throws IOException, URISyntaxException { JobConf conf = new JobConf(configuration); conf.setJobName("Single End Alignment"); conf.setJarByClass(Cloudbreak.class); FileInputFormat.addInputPath(conf, new Path(hdfsDataDir)); Path outputDir = new Path(hdfsAlignmentsDir); FileSystem.get(conf).delete(outputDir); FileOutputFormat.setOutputPath(conf, outputDir); addDistributedCacheFile(conf, reference, "novoalign.reference"); addDistributedCacheFile(conf, pathToNovoalign, "novoalign.executable"); if (pathToNovoalignLicense != null) { addDistributedCacheFile(conf, pathToNovoalignLicense, "novoalign.license"); }//from w w w . j a va 2 s. c o m DistributedCache.createSymlink(conf); conf.set("mapred.task.timeout", "3600000"); conf.set("novoalign.threshold", threshold); conf.set("novoalign.quality.format", qualityFormat); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapperClass(NovoalignSingleEndMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setCompressMapOutput(true); conf.setReducerClass(SingleEndAlignmentsToPairsReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.set("mapred.output.compress", "true"); conf.set("mapred.output.compression", "org.apache.hadoop.io.compress.SnappyCodec"); JobClient.runJob(conf); }
From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java
License:Apache License
public void submit(JobRequest request, long submissionID, File mapperFile, File reducerFile, File packageDir, Path inputPath) throws ValidationException, NotFoundException, CompilationException, InternalException { // Generate job output path Path outputDir = new Path(_homeDir, "out"); Path outputPath;//ww w .jav a 2 s. com try { FileSystem fs = outputDir.getFileSystem(new Configuration()); outputPath = JobServiceHandler.getNonexistantPath(outputDir, request.getName(), fs); } catch (IOException ex) { throw JobServiceHandler.wrapException("Could not construct output path.", ex); } JobConf conf = new JobConf(); conf.setJobName(request.getName()); // Set mapper and number of tasks if specified StreamJob.setStreamMapper(conf, mapperFile.toString()); if (request.isSetMapTasks()) conf.setNumMapTasks(request.getMapTasks()); // Set reducer and number of tasks if specified StreamJob.setStreamReducer(conf, reducerFile.toString()); if (request.isSetReduceTasks()) conf.setNumReduceTasks(request.getReduceTasks()); // Create and set job JAR, including necessary files ArrayList<String> jarFiles = new ArrayList<String>(); jarFiles.add(packageDir.toString()); String jarPath; try { jarPath = StreamJob.createJobJar(conf, jarFiles, _tempDir); } catch (IOException ex) { throw JobServiceHandler.wrapException("Could not create job jar.", ex); } if (jarPath != null) conf.setJar(jarPath); // TODO: This is a hack. Rewrite streaming to use DistributedCache. //conf.setPattern("mapreduce.job.jar.unpack.pattern", // Pattern.compile(".*")); // Set I/O formats and paths conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.addInputPath(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); // Use numeric sort if appropriate conf.setBoolean(CONF_NUMERIC, request.isNumericSort()); if (request.isNumericSort()) { conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class); conf.setPartitionerClass(KeyFieldBasedPartitioner.class); conf.setKeyFieldComparatorOptions("-n"); conf.setKeyFieldPartitionerOptions("-n"); } // Set other job information conf.set(CONF_USER, request.getUser()); conf.set(CONF_LANGUAGE, request.getLanguage()); conf.set(CONF_MAPPER, request.getMapper()); conf.set(CONF_REDUCER, request.getReducer()); // Attempt to submit the job RunningJob job; try { JobClient client = new JobClient(new JobConf()); job = client.submitJob(conf); } catch (IOException ex) { throw JobServiceHandler.wrapException("There was a serious error while attempting to submit the job.", ex); } try { SubmissionDatabase.setSubmitted(submissionID); SubmissionDatabase.setHadoopID(submissionID, job.getID().toString()); } catch (SQLException ex) { throw JobServiceHandler.wrapException("Could not update submission in database.", ex); } }
From source file:edu.stolaf.cs.wmrserver.streaming.StreamJob.java
License:Apache License
public static void setStreamMapper(JobConf conf, String mapCommand) { conf.setMapperClass(PipeMapper.class); conf.setMapRunnerClass(PipeMapRunner.class); try {//from w w w .j a v a 2 s .c o m conf.set("stream.map.streamprocessor", URLEncoder.encode(mapCommand, "UTF-8")); } catch (UnsupportedEncodingException ex) { // This is VERY likely to happen. Especially since the ENTIRE FREAKING // STRING IMPLEMENTATION is based on UTF-8. Thanks, Java. throw new RuntimeException("The sky is falling! Java doesn't support UTF-8."); } }