List of usage examples for org.apache.hadoop.mapred JobConf set
public void set(String name, String value)
value
of the name
property. From source file:de.tudarmstadt.lt.n2n.hadoop.pipetests.GoogleSyntacticsJob5.java
License:Apache License
@Override public void configure(JobConf job) { String extractorConfigurationFiles = job.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS); if (extractorConfigurationFiles == null) { extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ','); System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n", SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles); job.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles); }//from w w w.java2 s. c o m try { String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(","); for (int i = 0; i < extractorConfigurationFilesArr.length; i++) DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), job); } catch (IOException e) { e.printStackTrace(); } Text2CASInputFormat.setDocumentTextExtractorClass(job, KeyPlusValueAsDocumentExtractor.class); job.setMapperClass(JoBimMapper.class); job.setReducerClass(JoBimReducer.class); job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setMemoryForMapTask(4096); job.setMemoryForReduceTask(4096); job.set("mapred.child.java.opts", "-Xmx4096m"); job.setNumReduceTasks(1); // reset to default }
From source file:de.tudarmstadt.lt.n2n.hadoop.PreparsedJob.java
License:Apache License
@Override public void configure(JobConf job) { String extractorConfigurationFiles = job.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS); if (extractorConfigurationFiles == null) { extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ','); System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n", SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles); job.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles); }/* w ww . j a v a2s .c o m*/ try { String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(","); for (int i = 0; i < extractorConfigurationFilesArr.length; i++) DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), job); } catch (IOException e1) { e1.printStackTrace(); } Text2CASInputFormat.setDocumentTextExtractorClass(job, KeyPlusValueAsDocumentExtractor.class); job.setOutputFormat(NullOutputFormat.class); // ignore the serialized cas and use only the output from the CasConsumer }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.examples.CasConsumerExample.java
License:Apache License
@Override public void configure(JobConf job) { job.set("mapreduce.job.queuename", "smalljob"); job.setInputFormat(Text2CASInputFormat.class); }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.examples.Text2CASExample.java
License:Apache License
@Override public void configure(JobConf job) { job.set("mapreduce.job.queuename", "smalljob"); /*/*from w w w. j av a2 s . co m*/ * Use Text2Cas InputFormat, read texts directly from h */ job.setInputFormat(Text2CASInputFormat.class); }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.io.hadoop.XCASSequenceFileWriter.java
License:Apache License
@SuppressWarnings("deprecation") @Override/*from w w w.j a v a 2s . c o m*/ public void initialize(org.apache.uima.UimaContext context) throws org.apache.uima.resource.ResourceInitializationException { super.initialize(context); final JobConf conf = new JobConf(); this.path = new File((String) context.getConfigParameterValue(PARAM_PATH)); conf.set("fs.default.name", this.fileSystemName); // Compress Map output if (this.compress) { System.out.println("compressing"); conf.set("mapred.compress.map.output", "true"); conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); } // Compress MapReduce output conf.set("mapred.output.compress", "true"); conf.set("mapred.output.compression", "org.apache.hadoop.io.compress.SnappyCodec"); FileOutputFormat.setCompressOutput(conf, true); // FileOutputFormat.setOutputCompressorClass(conf, SnappyCodec.class); final String filename = this.path + "/" + "part-00000"; try { final FileSystem fs = FileSystem.get(URI.create(filename), conf); final Path path = new Path(URI.create(filename).toString()); this.writer = SequenceFile.createWriter(fs, conf, path, Text.class, Text.class); } catch (final IOException e) { throw new ResourceInitializationException(); } }
From source file:edu.brown.cs.mapreduce.BenchmarkBase.java
License:Open Source License
public JobConf getJobConf() { JobConf jobConf = new JobConf(this.conf, this.benchmarkClass); //// w ww . j a v a 2 s . c o m // Options // List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; i++) { try { // // Print property and exit // if ("-property".equals(args[i])) { String prop = jobConf.get(args[i + 1]); System.out.println(prop); System.exit(0); // // # of Maps // } else if ("-m".equals(args[i])) { this.num_of_maps = Integer.parseInt(args[++i]); // // # of Reduces // } else if ("-r".equals(args[i])) { this.num_of_reduces = Integer.parseInt(args[++i]); // // Enable debug // } else if ("-debug".equals(args[i])) { this.debug = true; // // Enable single output file for results // } else if ("-combine".equals(args[i])) { this.combine = true; // // Tell jobs to compress their intermediate output files // } else if ("-compress".equals(args[i])) { this.compress = true; // // We're using TupleWritable (which has to be in a SequenceFile) // } else if ("-tuple".equals(args[i])) { this.tuple_data = true; this.sequence_file = true; // // Use SequenceFiles for initial input // } else if ("-sequence".equals(args[i])) { this.sequence_file = true; // // Recursively load directories // } else if ("-recursive-dirs".equals(args[i])) { this.load_directories = true; // // Job Basename // } else if ("-basename".equals(args[i])) { this.job_name = args[++i]; // // Misc. Properties // } else if ("-D".equals(args[i].substring(0, 2))) { String arg = args[i].substring(2); int pos = arg.indexOf('='); if (pos == -1) { System.err.println("ERROR: Invalid properties option '" + arg + "'"); System.exit(1); } this.options.put(arg.substring(0, pos), arg.substring(pos + 1)); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.err.println("ERROR: Integer expected instead of " + args[i]); System.exit(1); } catch (ArrayIndexOutOfBoundsException except) { System.err.println("ERROR: Required parameter missing from " + args[i - 1]); System.exit(1); } } // FOR // // Make sure there are exactly 2 parameters left. // if (otherArgs.size() < 2) { System.err.println("ERROR: Wrong number of parameters: " + otherArgs.size()); System.exit(1); } // // Set these flags so the jobs know about them // if (this.getSequenceFile()) this.options.put(PROPERTY_SEQUENCEFILE, "true"); if (this.getTupleData()) this.options.put(PROPERTY_TUPLEDATA, "true"); if (this.getDebug()) this.options.put(PROPERTY_DEBUG, "true"); FileSystem fs = null; try { fs = FileSystem.get(conf); } catch (Exception ex) { ex.printStackTrace(); System.exit(-1); } // // Input Paths // int cnt = otherArgs.size() - 1; this.input_paths = new ArrayList<Path>(); for (int ctr = 0; ctr < cnt; ctr++) { Path new_path = new Path(otherArgs.get(ctr)); try { if (this.load_directories && fs.getFileStatus(new_path).isDir()) { //int limit = 10; FileStatus paths[] = fs.listStatus(new_path); for (FileStatus p : paths) { this.input_paths.add(p.getPath()); FileInputFormat.addInputPath(jobConf, p.getPath()); //if (limit-- <= 0) break; } // FOR } else { this.input_paths.add(new_path); FileInputFormat.addInputPath(jobConf, new_path); } } catch (Exception ex) { ex.printStackTrace(); System.exit(-1); } } // FOR if (this.input_paths.isEmpty()) { System.err.println( "ERROR: No input paths were defined for '" + this.benchmarkClass.getSimpleName() + "'"); System.exit(-1); } // // Output Paths // this.output_path = new Path(otherArgs.get(otherArgs.size() - 1)); FileOutputFormat.setOutputPath(jobConf, this.output_path); jobConf.setJobName(this.job_name != null ? this.job_name : this.benchmarkClass.getSimpleName()); if (this.num_of_maps >= 0) jobConf.setNumMapTasks(this.num_of_maps); if (this.num_of_reduces >= 0) jobConf.setNumReduceTasks(this.num_of_reduces); // // Set all properties // for (String key : this.options.keySet()) { jobConf.set(key, this.options.get(key)); } return (jobConf); }
From source file:edu.iu.benchmark.JobLauncher.java
License:Apache License
private Job configureBenchmarkJob(String cmd, int bytesPerPartition, int numPartitions, int numMappers, int numIterations, Path inputDirPath, Path outputDirPath) throws IOException, URISyntaxException { Job job = Job.getInstance(getConf(), "benchmark_job"); FileInputFormat.setInputPaths(job, inputDirPath); FileOutputFormat.setOutputPath(job, outputDirPath); job.setInputFormatClass(SingleFileInputFormat.class); job.setJarByClass(JobLauncher.class); job.setMapperClass(BenchmarkMapper.class); org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMappers);//from w ww . j a va 2 s .c o m job.setNumReduceTasks(0); jobConf.set(Constants.BENCHMARK_CMD, cmd); jobConf.setInt(Constants.BYTES_PER_PARTITION, bytesPerPartition); jobConf.setInt(Constants.NUM_PARTITIONS, numPartitions); jobConf.setInt(Constants.NUM_MAPPERS, numMappers); jobConf.setInt(Constants.NUM_ITERATIONS, numIterations); return job; }
From source file:edu.iu.ccd.CCDLauncher.java
License:Apache License
private Job configureCCDJob(Path inputDir, int r, double lambda, int numIterations, int numMapTasks, int numThreadsPerWorker, int numModelSlices, Path modelDir, Path outputDir, String testFilePath, Configuration configuration, int jobID) throws IOException, URISyntaxException { configuration.setInt(Constants.R, r); configuration.setDouble(Constants.LAMBDA, lambda); configuration.setInt(Constants.NUM_ITERATIONS, numIterations); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); System.out.println("Model Dir Path: " + modelDir.toString()); configuration.set(Constants.MODEL_DIR, modelDir.toString()); configuration.setInt(Constants.NUM_MODEL_SLICES, numModelSlices); configuration.set(Constants.TEST_FILE_PATH, testFilePath); Job job = Job.getInstance(configuration, "ccd_job_" + jobID); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMapTasks); jobConf.setInt("mapreduce.job.max.split.locations", 10000); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(CCDLauncher.class); job.setMapperClass(CCDMPCollectiveMapper.class); job.setNumReduceTasks(0);//from w w w . j a v a 2s.c o m return job; }
From source file:edu.iu.daal_cov.COVDaalLauncher.java
License:Apache License
private Job configureCOVJob(Path inputDir, int mem, int numMapTasks, int numThreadsPerWorker, Path modelDir, Path outputDir, Configuration configuration) throws IOException, URISyntaxException { // configuration.set(Constants.TEST_FILE_PATH, testDirPath); // configuration.set(Constants.TEST_TRUTH_PATH, testGroundTruthDirPath); configuration.setInt(Constants.NUM_MAPPERS, numMapTasks); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); // configuration.setInt(Constants.BATCH_SIZE, batchSize); Job job = Job.getInstance(configuration, "cov_job"); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setInt("mapreduce.job.max.split.locations", 10000); jobConf.setInt("mapreduce.map.collective.memory.mb", mem); int xmx = (int) Math.ceil((mem - 2000) * 0.5); int xmn = (int) Math.ceil(0.25 * xmx); jobConf.set("mapreduce.map.collective.java.opts", "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m"); jobConf.setNumMapTasks(numMapTasks); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(COVDaalLauncher.class); job.setMapperClass(COVDaalCollectiveMapper.class); job.setNumReduceTasks(0);//from w w w . ja va2 s. co m System.out.println("Launcher launched"); return job; }
From source file:edu.iu.daal_kmeans.regroupallgather.KMeansDaalLauncher.java
License:Apache License
/** * Launches all the tasks in order./*from ww w .ja v a 2s. c om*/ */ @Override public int run(String[] args) throws Exception { /* Put shared libraries into the distributed cache */ Configuration conf = this.getConf(); Initialize init = new Initialize(conf, args); /* Put shared libraries into the distributed cache */ init.loadDistributedLibs(); // load args init.loadSysArgs(); //load app args conf.setInt(HarpDAALConstants.FILE_DIM, Integer.parseInt(args[init.getSysArgNum()])); conf.setInt(HarpDAALConstants.FEATURE_DIM, Integer.parseInt(args[init.getSysArgNum() + 1])); conf.setInt(HarpDAALConstants.NUM_CENTROIDS, Integer.parseInt(args[init.getSysArgNum() + 2])); // config job System.out.println("Starting Job"); long perJobSubmitTime = System.currentTimeMillis(); System.out.println( "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime())); Job kmeansJob = init.createJob("kmeansJob", KMeansDaalLauncher.class, KMeansDaalCollectiveMapper.class); // initialize centroids data JobConf thisjobConf = (JobConf) kmeansJob.getConfiguration(); FileSystem fs = FileSystem.get(conf); int nFeatures = Integer.parseInt(args[init.getSysArgNum() + 1]); int numCentroids = Integer.parseInt(args[init.getSysArgNum() + 2]); Path workPath = init.getWorkPath(); Path cenDir = new Path(workPath, "centroids"); fs.mkdirs(cenDir); if (fs.exists(cenDir)) { fs.delete(cenDir, true); } Path initCenDir = new Path(cenDir, "init_centroids"); DataGenerator.generateDenseDataSingle(numCentroids, nFeatures, 1000, 0, " ", initCenDir, fs); thisjobConf.set(HarpDAALConstants.CEN_DIR, cenDir.toString()); thisjobConf.set(HarpDAALConstants.CENTROID_FILE_NAME, "init_centroids"); //generate Data if required boolean generateData = Boolean.parseBoolean(args[init.getSysArgNum() + 3]); if (generateData) { Path inputPath = init.getInputPath(); int total_points = Integer.parseInt(args[init.getSysArgNum() + 4]); int total_files = Integer.parseInt(args[init.getSysArgNum() + 5]); String tmpDirPathName = args[init.getSysArgNum() + 6]; DataGenerator.generateDenseDataMulti(total_points, nFeatures, total_files, 2, 1, ",", inputPath, tmpDirPathName, fs); } // finish job boolean jobSuccess = kmeansJob.waitForCompletion(true); System.out.println( "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime())); System.out.println( "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |"); if (!jobSuccess) { kmeansJob.killJob(); System.out.println("kmeansJob failed"); } return 0; }