List of usage examples for org.apache.hadoop.mapred JobConf setOutputFormat
public void setOutputFormat(Class<? extends OutputFormat> theClass)
From source file:org.acacia.partitioner.java.EdgelistPartitioner.java
License:Apache License
@SuppressWarnings("unused") public static void main(String[] args) throws IOException { JobConf conf = new JobConf(EdgelistPartitioner.class); if (conf == null) { return;//ww w . j av a 2 s .c o m } String dir1 = "/user/miyuru/merged"; String dir2 = "/user/miyuru/merged-out"; //We first delete the temporary directories if they exist on the HDFS FileSystem fs1 = FileSystem.get(new JobConf()); //only delete dir2 because dir1 is uploaded externally. if (fs1.exists(new Path(dir2))) { fs1.delete(new Path(dir2), true); } conf.setInputFormat(WholeFileInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); WholeFileInputFormat.setInputPaths(conf, new Path(dir1)); SequenceFileOutputFormat.setOutputPath(conf, new Path(dir2)); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(SequenceFileMapper.class); conf.setReducerClass(MultipleOutputsInvertedReducer.class); conf.setOutputFormat(NullOutputFormat.class); conf.setJobName("EdgelistPartitioner"); MultipleOutputs.addMultiNamedOutput(conf, "partition", TextOutputFormat.class, NullWritable.class, Text.class); JobClient.runJob(conf); }
From source file:org.ahanna.DoubleConversionMapper.java
License:Apache License
public static void main(String[] args) { JobConf conf = new JobConf(DoubleConversion.class); conf.setJobName("DoubleConversation"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(DoubleConversionMapper.class); conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class); // KeyValueTextInputFormat treats each line as an input record, // and splits the line by the tab character to separate it into key and value conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); try {/*from w w w .jav a2 s . c om*/ JobClient.runJob(conf); } catch (IOException e) { // do nothing } }
From source file:org.apache.ambari.servicemonitor.jobs.FileUsingJobRunner.java
License:Apache License
public int run(String[] args) throws Exception { // Configuration processed by ToolRunner Configuration conf = getConf(); CommandLine commandLine = getCommandLine(); // Create a JobConf using the processed conf JobConf jobConf = new JobConf(conf, FileUsingJobRunner.class); //tune the config if (jobConf.get(JobKeys.RANGEINPUTFORMAT_ROWS) == null) { jobConf.setInt(JobKeys.RANGEINPUTFORMAT_ROWS, 1); }//from ww w . j av a2 s . co m // Process custom command-line options String name = OptionHelper.getStringOption(commandLine, "n", "File Using Job"); if (commandLine.hasOption('x')) { //delete the output directory String destDir = jobConf.get(JobKeys.MAPRED_OUTPUT_DIR); FileSystem fs = FileSystem.get(jobConf); fs.delete(new Path(destDir), true); } // Specify various job-specific parameters jobConf.setMapperClass(FileUsingMapper.class); jobConf.setReducerClass(FileUsingReducer.class); jobConf.setMapOutputKeyClass(IntWritable.class); jobConf.setMapOutputValueClass(IntWritable.class); jobConf.setOutputFormat(TextOutputFormat.class); jobConf.setInputFormat(RangeInputFormat.class); //jobConf.setPartitionerClass(SleepJob.class); jobConf.setSpeculativeExecution(false); jobConf.setJobName(name); jobConf.setJarByClass(this.getClass()); FileInputFormat.addInputPath(jobConf, new Path("ignored")); // Submit the job, then poll for progress until the job is complete RunningJob runningJob = JobClient.runJob(jobConf); runningJob.waitForCompletion(); return runningJob.isSuccessful() ? 0 : 1; }
From source file:org.apache.avro.mapred.AvroJob.java
License:Apache License
private static void configureAvroOutput(JobConf job) { if (job.get("mapred.output.format.class") == null) job.setOutputFormat(AvroOutputFormat.class); if (job.getReducerClass() == IdentityReducer.class) job.setReducerClass(HadoopReducer.class); job.setOutputKeyClass(AvroWrapper.class); configureAvroShuffle(job);//from w ww . j a v a2 s .c o m }
From source file:org.apache.avro.mapred.TestAvroTextSort.java
License:Apache License
@Test /**/*from www .j ava2 s . co m*/ * Run the identity job on a "bytes" Avro file using AvroAsTextInputFormat * and AvroTextOutputFormat to produce a sorted "bytes" Avro file. */ public void testSort() throws Exception { JobConf job = new JobConf(); String dir = System.getProperty("test.dir", ".") + "/mapred"; Path outputPath = new Path(dir + "/out"); outputPath.getFileSystem(job).delete(outputPath); WordCountUtil.writeLinesBytesFile(); job.setInputFormat(AvroAsTextInputFormat.class); job.setOutputFormat(AvroTextOutputFormat.class); job.setOutputKeyClass(Text.class); FileInputFormat.setInputPaths(job, new Path(dir + "/in")); FileOutputFormat.setOutputPath(job, outputPath); JobClient.runJob(job); WordCountUtil.validateSortedFile(); }
From source file:org.apache.avro.mapred.TestGenericJob.java
License:Apache License
@Test public void testJob() throws Exception { JobConf job = new JobConf(); Path outputPath = new Path(dir + "/out"); outputPath.getFileSystem(job).delete(outputPath); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, dir + "/in"); job.setMapperClass(AvroTestConverter.class); job.setNumReduceTasks(0);/* w ww . ja v a2s . c o m*/ FileOutputFormat.setOutputPath(job, outputPath); System.out.println(createSchema()); AvroJob.setOutputSchema(job, Pair.getPairSchema(Schema.create(Schema.Type.LONG), createSchema())); job.setOutputFormat(AvroOutputFormat.class); JobClient.runJob(job); }
From source file:org.apache.avro.mapred.TestSequenceFileReader.java
License:Apache License
@Test public void testNonAvroReducer() throws Exception { JobConf job = new JobConf(); Path output = new Path(System.getProperty("test.dir", ".") + "/seq-out"); output.getFileSystem(job).delete(output); // configure input for Avro from sequence file AvroJob.setInputSequenceFile(job);//from ww w . java 2 s .c om AvroJob.setInputSchema(job, SCHEMA); FileInputFormat.setInputPaths(job, FILE.toURI().toString()); // mapper is default, identity // use a hadoop reducer that consumes Avro input AvroJob.setMapOutputSchema(job, SCHEMA); job.setReducerClass(NonAvroReducer.class); // configure output for non-Avro SequenceFile job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, output); // output key/value classes are default, LongWritable/Text JobClient.runJob(job); checkFile(new SequenceFileReader<Long, CharSequence>(new File(output.toString() + "/part-00000"))); }
From source file:org.apache.avro.mapred.tether.TetherJob.java
License:Apache License
private static void setupTetherJob(JobConf job) throws IOException { job.setMapRunnerClass(TetherMapRunner.class); job.setPartitionerClass(TetherPartitioner.class); job.setReducerClass(TetherReducer.class); job.setInputFormat(TetherInputFormat.class); job.setOutputFormat(TetherOutputFormat.class); job.setOutputKeyClass(TetherData.class); job.setOutputKeyComparatorClass(TetherKeyComparator.class); job.setMapOutputValueClass(NullWritable.class); // set the map output key class to TetherData job.setMapOutputKeyClass(TetherData.class); // add TetherKeySerialization to io.serializations Collection<String> serializations = job.getStringCollection("io.serializations"); if (!serializations.contains(TetherKeySerialization.class.getName())) { serializations.add(TetherKeySerialization.class.getName()); job.setStrings("io.serializations", serializations.toArray(new String[0])); }/*from www .j a v a 2 s.c om*/ // determine whether the executable should be added to the cache. if (job.getBoolean(TETHER_EXEC_CACHED, false)) { DistributedCache.addCacheFile(getExecutable(job), job); } }
From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java
License:Apache License
@Test public void hbaseBulkOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { String testName = "hbaseBulkOutputFormatTest"; Path methodTestDir = new Path(getTestDir(), testName); LOG.info("starting: " + testName); String tableName = newTableName(testName).toLowerCase(); String familyName = "my_family"; byte[] familyNameBytes = Bytes.toBytes(familyName); //include hbase config in conf file Configuration conf = new Configuration(allConf); //create table conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); conf.set("yarn.scheduler.capacity.root.queues", "default"); conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); createTable(tableName, new String[] { familyName }); String data[] = { "1,english:one,spanish:uno", "2,english:two,spanish:dos", "3,english:three,spanish:tres" }; // input/output settings Path inputPath = new Path(methodTestDir, "mr_input"); FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); for (String line : data) os.write(Bytes.toBytes(line + "\n")); os.close();/*from w w w .j a va 2s.c om*/ Path interPath = new Path(methodTestDir, "inter"); //create job JobConf job = new JobConf(conf); job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); job.setJarByClass(this.getClass()); job.setMapperClass(MapWriteOldMapper.class); job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath); job.setOutputFormat(HBaseBulkOutputFormat.class); org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(job, interPath); job.setOutputCommitter(HBaseBulkOutputCommitter.class); //manually create transaction RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); try { OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null); Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName)); outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, HCatUtil.serialize(txn)); job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); } finally { rm.close(); } job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(HCatRecord.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); RunningJob runJob = JobClient.runJob(job); runJob.waitForCompletion(); assertTrue(runJob.isSuccessful()); //verify HTable table = new HTable(conf, tableName); Scan scan = new Scan(); scan.addFamily(familyNameBytes); ResultScanner scanner = table.getScanner(scan); int index = 0; for (Result result : scanner) { String vals[] = data[index].toString().split(","); for (int i = 1; i < vals.length; i++) { String pair[] = vals[i].split(":"); assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); } index++; } //test if load count is the same assertEquals(data.length, index); //test if scratch directory was erased assertFalse(FileSystem.get(job).exists(interPath)); }