List of usage examples for org.apache.hadoop.mapred JobConf setInputFormat
public void setInputFormat(Class<? extends InputFormat> theClass)
From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java
License:Apache License
public void submit(JobRequest request, long submissionID, File mapperFile, File reducerFile, File packageDir, Path inputPath) throws ValidationException, NotFoundException, CompilationException, InternalException { // Generate job output path Path outputDir = new Path(_homeDir, "out"); Path outputPath;//from w w w. jav a 2 s . co m try { FileSystem fs = outputDir.getFileSystem(new Configuration()); outputPath = JobServiceHandler.getNonexistantPath(outputDir, request.getName(), fs); } catch (IOException ex) { throw JobServiceHandler.wrapException("Could not construct output path.", ex); } JobConf conf = new JobConf(); conf.setJobName(request.getName()); // Set mapper and number of tasks if specified StreamJob.setStreamMapper(conf, mapperFile.toString()); if (request.isSetMapTasks()) conf.setNumMapTasks(request.getMapTasks()); // Set reducer and number of tasks if specified StreamJob.setStreamReducer(conf, reducerFile.toString()); if (request.isSetReduceTasks()) conf.setNumReduceTasks(request.getReduceTasks()); // Create and set job JAR, including necessary files ArrayList<String> jarFiles = new ArrayList<String>(); jarFiles.add(packageDir.toString()); String jarPath; try { jarPath = StreamJob.createJobJar(conf, jarFiles, _tempDir); } catch (IOException ex) { throw JobServiceHandler.wrapException("Could not create job jar.", ex); } if (jarPath != null) conf.setJar(jarPath); // TODO: This is a hack. Rewrite streaming to use DistributedCache. //conf.setPattern("mapreduce.job.jar.unpack.pattern", // Pattern.compile(".*")); // Set I/O formats and paths conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.addInputPath(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); // Use numeric sort if appropriate conf.setBoolean(CONF_NUMERIC, request.isNumericSort()); if (request.isNumericSort()) { conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class); conf.setPartitionerClass(KeyFieldBasedPartitioner.class); conf.setKeyFieldComparatorOptions("-n"); conf.setKeyFieldPartitionerOptions("-n"); } // Set other job information conf.set(CONF_USER, request.getUser()); conf.set(CONF_LANGUAGE, request.getLanguage()); conf.set(CONF_MAPPER, request.getMapper()); conf.set(CONF_REDUCER, request.getReducer()); // Attempt to submit the job RunningJob job; try { JobClient client = new JobClient(new JobConf()); job = client.submitJob(conf); } catch (IOException ex) { throw JobServiceHandler.wrapException("There was a serious error while attempting to submit the job.", ex); } try { SubmissionDatabase.setSubmitted(submissionID); SubmissionDatabase.setHadoopID(submissionID, job.getID().toString()); } catch (SQLException ex) { throw JobServiceHandler.wrapException("Could not update submission in database.", ex); } }
From source file:edu.ub.ahstfg.indexer.Indexer.java
License:Open Source License
@Override public int run(String[] arg0) throws Exception { LOG.info("Creating Hadoop job for Indexer."); JobConf job = new JobConf(getConf()); job.setJarByClass(Indexer.class); LOG.info("Setting input path to '" + INPUT_PATH + "'"); FileInputFormat.setInputPaths(job, new Path(INPUT_PATH)); // Set filters if it's necessary. LOG.info("Clearing the output path at '" + OUTPUT_PATH + "'"); // Change URI to Path if it's necessary. FileSystem fs = FileSystem.get(new URI(OUTPUT_PATH), job); if (fs.exists(new Path(OUTPUT_PATH))) { fs.delete(new Path(OUTPUT_PATH), true); }//from w w w . ja v a 2 s . c om LOG.info("Setting output path to '" + OUTPUT_PATH + "'"); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH)); FileOutputFormat.setCompressOutput(job, false); LOG.info("Setting input format."); job.setInputFormat(ArcInputFormat.class); LOG.info("Setting output format."); job.setOutputFormat(IndexOutputFormat.class); LOG.info("Setting output data types."); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IndexRecord.class); LOG.info("Setting mapper and reducer."); job.setMapperClass(IndexerMapper.class); job.setMapOutputValueClass(ParsedDocument.class); job.setReducerClass(IndexerReducer.class); if (JobClient.runJob(job).isSuccessful()) { return 0; } else { return 1; } }
From source file:edu.ub.ahstfg.indexer.wordcount.WordCount.java
License:Open Source License
@Override public int run(String[] args) throws Exception { LOG.info("Creating Hadoop job for ARC input files word count."); JobConf job = new JobConf(getConf()); job.setJarByClass(WordCount.class); LOG.info("Setting input path to '" + inputPath + "'"); FileInputFormat.setInputPaths(job, new Path(inputPath)); // Set filters if it's necessary. LOG.info("Clearing the output path at '" + outputPath + "'"); // Change URI to Path if it's necessary. FileSystem fs = FileSystem.get(new URI(outputPath), job); if (fs.exists(new Path(outputPath))) { fs.delete(new Path(outputPath), true); }/* w w w . ja v a 2s . c o m*/ LOG.info("Setting output path to '" + outputPath + "'"); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileOutputFormat.setCompressOutput(job, false); LOG.info("Setting input format."); // job.setInputFormat(TextInputFormat.class); job.setInputFormat(ArcInputFormat.class); LOG.info("Setting output format."); job.setOutputFormat(TextOutputFormat.class); LOG.info("Setting output data types."); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); LOG.info("Setting mapper and reducer."); // job.setMapperClass(WordCountTextInputMapper.class); job.setMapperClass(WordCountArcInputMapper.class); job.setReducerClass(LongSumReducer.class); if (JobClient.runJob(job).isSuccessful()) { return 0; } else { return 1; } }
From source file:edu.ubc.mirrors.holographs.mapreduce.Driver.java
License:Open Source License
public int run(String[] args) throws Exception { JobConf job = new JobConf(getConf()); job.setClassLoader(Driver.class.getClassLoader()); job.setInputFormat(SnapshotObjectsOfTypeInputFormat.class); job.setMapperClass(InvokeMethodMapper.class); job.setCombinerClass(TextCountSumReducer.class); job.setReducerClass(TextCountSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.set("snapshotPath", args[0]); job.set("targetClassName", "org.eclipse.cdt.internal.core.dom.parser.cpp.CPPASTName"); job.setInt("splitSize", 10000); job.setInt("maxNumObjects", 100000); FileInputFormat.addInputPath(job, new Path(args[0])); String outputPath = args[1];// www.j av a2s . c o m int suffix = 2; while (new File(outputPath).exists()) { outputPath = args[1] + suffix++; } FileOutputFormat.setOutputPath(job, new Path(outputPath)); JobClient.runJob(job); return 0; }
From source file:edu.uci.ics.hyracks.imru.dataflow.Hdtest.java
License:Apache License
public static JobSpecification createJob() throws Exception { JobSpecification spec = new JobSpecification(); spec.setFrameSize(4096);//from ww w .jav a2s . c o m String PATH_TO_HADOOP_CONF = "/home/wangrui/a/imru/hadoop-0.20.2/conf"; String HDFS_INPUT_PATH = "/customer/customer.tbl,/customer_result/part-0"; JobConf conf = new JobConf(); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml")); FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH); conf.setInputFormat(TextInputFormat.class); RecordDescriptor recordDesc = new RecordDescriptor( new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() }); InputSplit[] splits = conf.getInputFormat().getSplits(conf, 1); HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(spec, recordDesc, conf, splits, new String[] { "NC0", "NC1" }, new IKeyValueParserFactory<LongWritable, Text>() { @Override public IKeyValueParser<LongWritable, Text> createKeyValueParser(final IHyracksTaskContext ctx) { return new IKeyValueParser<LongWritable, Text>() { TupleWriter tupleWriter; @Override public void open(IFrameWriter writer) throws HyracksDataException { tupleWriter = new TupleWriter(ctx, writer, 1); } @Override public void parse(LongWritable key, Text value, IFrameWriter writer, String fileString) throws HyracksDataException { try { tupleWriter.write(value.getBytes(), 0, value.getLength()); tupleWriter.finishField(); tupleWriter.finishTuple(); } catch (IOException e) { throw new HyracksDataException(e); } } @Override public void close(IFrameWriter writer) throws HyracksDataException { tupleWriter.close(); } }; } }); // createPartitionConstraint(spec, readOperator, new String[] {"NC0"}); PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, readOperator, new String[] { "NC0", "NC1" }); IOperatorDescriptor writer = new HDFSOD(spec, null, null, null); // createPartitionConstraint(spec, writer, outSplits); spec.connect(new OneToOneConnectorDescriptor(spec), readOperator, 0, writer, 0); spec.addRoot(writer); return spec; }
From source file:edu.uci.ics.hyracks.imru.jobgen.IMRUJobFactory.java
License:Apache License
public InputSplit[] getInputSplits() throws IOException { JobConf conf = getConf(); FileInputFormat.setInputPaths(conf, inputPaths); conf.setInputFormat(HDFSBlockFormat.class); return conf.getInputFormat().getSplits(conf, 1); }
From source file:edu.uci.ics.hyracks.imru.util.DataBalancer.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(DataBalancer.class); job.setJobName(DataBalancer.class.getSimpleName()); job.setMapperClass(MapRecordOnly.class); job.setReducerClass(ReduceRecordOnly.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(Integer.parseInt(args[2])); if (args.length > 3) { if (args[3].startsWith("bzip")) FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class); if (args[3].startsWith("gz")) FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); }// w w w .j av a 2 s . c o m JobClient.runJob(job); }
From source file:edu.uci.ics.pregelix.core.util.DataGenerator.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(DataGenerator.class); FileSystem dfs = FileSystem.get(job); String maxFile = "/maxtemp"; dfs.delete(new Path(maxFile), true); job.setJobName(DataGenerator.class.getSimpleName() + "max ID"); job.setMapperClass(MapMaxId.class); job.setCombinerClass(CombineMaxId.class); job.setReducerClass(ReduceMaxId.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(VLongWritable.class); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(maxFile)); job.setNumReduceTasks(1);// w ww .j ava2 s .c om JobClient.runJob(job); job = new JobConf(DataGenerator.class); job.set("hyracks.maxid.file", maxFile); job.setInt("hyracks.x", Integer.parseInt(args[2])); dfs.delete(new Path(args[1]), true); job.setJobName(DataGenerator.class.getSimpleName()); job.setMapperClass(MapRecordGen.class); job.setReducerClass(ReduceRecordGen.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(Integer.parseInt(args[3])); if (args.length > 4) { if (args[4].startsWith("bzip")) FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class); if (args[4].startsWith("gz")) FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } JobClient.runJob(job); }
From source file:edu.uci.ics.pregelix.example.utils.CommonSource.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(GraphPreProcessor.class); job.setJobName(GraphPreProcessor.class.getSimpleName()); job.setMapperClass(MapRecordOnly.class); job.setReducerClass(ReduceRecordOnly.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setInputFormat(TextInputFormat.class); for (int i = 0; i < args.length - 2; i++) { FileInputFormat.addInputPath(job, new Path(args[i])); }//from w w w .jav a 2 s . c o m FileOutputFormat.setOutputPath(job, new Path(args[args.length - 2])); job.setNumReduceTasks(Integer.parseInt(args[args.length - 1])); JobClient.runJob(job); }
From source file:edu.uci.ics.pregelix.example.utils.DuplicateGraph.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(DuplicateGraph.class); job.setJobName(DuplicateGraph.class.getSimpleName()); job.setMapperClass(MapRecordOnly.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(0);// w w w.j a v a 2 s. co m JobClient.runJob(job); }