List of usage examples for org.apache.hadoop.mapred JobConf setKeyFieldComparatorOptions
public void setKeyFieldComparatorOptions(String keySpec)
From source file:crunch.MaxTemperature.java
License:Apache License
private void check(String options, Text l1, Text l2, int c) throws IOException { JobConf conf = new JobConf(); conf.setKeyFieldComparatorOptions(options); KeyFieldBasedComparator comp = new KeyFieldBasedComparator(); comp.configure(conf);/* ww w . ja v a 2 s . c o m*/ DataOutputBuffer out1 = serialize(l1); DataOutputBuffer out2 = serialize(l2); assertThat(options, comp.compare(out1.getData(), 0, out1.getLength(), out2.getData(), 0, out2.getLength()), is(c)); }
From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java
License:Apache License
public void submit(JobRequest request, long submissionID, File mapperFile, File reducerFile, File packageDir, Path inputPath) throws ValidationException, NotFoundException, CompilationException, InternalException { // Generate job output path Path outputDir = new Path(_homeDir, "out"); Path outputPath;/* www.j av a 2 s. c o m*/ try { FileSystem fs = outputDir.getFileSystem(new Configuration()); outputPath = JobServiceHandler.getNonexistantPath(outputDir, request.getName(), fs); } catch (IOException ex) { throw JobServiceHandler.wrapException("Could not construct output path.", ex); } JobConf conf = new JobConf(); conf.setJobName(request.getName()); // Set mapper and number of tasks if specified StreamJob.setStreamMapper(conf, mapperFile.toString()); if (request.isSetMapTasks()) conf.setNumMapTasks(request.getMapTasks()); // Set reducer and number of tasks if specified StreamJob.setStreamReducer(conf, reducerFile.toString()); if (request.isSetReduceTasks()) conf.setNumReduceTasks(request.getReduceTasks()); // Create and set job JAR, including necessary files ArrayList<String> jarFiles = new ArrayList<String>(); jarFiles.add(packageDir.toString()); String jarPath; try { jarPath = StreamJob.createJobJar(conf, jarFiles, _tempDir); } catch (IOException ex) { throw JobServiceHandler.wrapException("Could not create job jar.", ex); } if (jarPath != null) conf.setJar(jarPath); // TODO: This is a hack. Rewrite streaming to use DistributedCache. //conf.setPattern("mapreduce.job.jar.unpack.pattern", // Pattern.compile(".*")); // Set I/O formats and paths conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.addInputPath(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); // Use numeric sort if appropriate conf.setBoolean(CONF_NUMERIC, request.isNumericSort()); if (request.isNumericSort()) { conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class); conf.setPartitionerClass(KeyFieldBasedPartitioner.class); conf.setKeyFieldComparatorOptions("-n"); conf.setKeyFieldPartitionerOptions("-n"); } // Set other job information conf.set(CONF_USER, request.getUser()); conf.set(CONF_LANGUAGE, request.getLanguage()); conf.set(CONF_MAPPER, request.getMapper()); conf.set(CONF_REDUCER, request.getReducer()); // Attempt to submit the job RunningJob job; try { JobClient client = new JobClient(new JobConf()); job = client.submitJob(conf); } catch (IOException ex) { throw JobServiceHandler.wrapException("There was a serious error while attempting to submit the job.", ex); } try { SubmissionDatabase.setSubmitted(submissionID); SubmissionDatabase.setHadoopID(submissionID, job.getID().toString()); } catch (SQLException ex) { throw JobServiceHandler.wrapException("Could not update submission in database.", ex); } }