List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass
public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException
From source file:com.soteradefense.dga.louvain.mapreduce.CommunityCompression.java
License:Apache License
public int run(String[] args) throws Exception { Configuration mrConf = this.getConf(); for (java.util.Map.Entry<String, String> entry : dgaConfiguration.getSystemProperties().entrySet()) { mrConf.set(entry.getKey(), entry.getValue()); }/*from w w w . ja va 2s . c om*/ Job job = Job.getInstance(mrConf); job.setJarByClass(CommunityCompression.class); Path in = new Path(inputPath); Path out = new Path(outputPath); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("CommunityCompression"); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LouvainVertexWritable.class); job.setMapperClass(CommunityCompression.Map.class); job.setReducerClass(CommunityCompression.Reduce.class); logger.debug("Running Mapreduce step with job configuration: {}", job); return job.waitForCompletion(false) ? 0 : 1; }
From source file:com.soteradefense.dga.louvain.mapreduce.LouvainTableSynthesizer.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = null; try {//from ww w . j a va 2 s . co m int iteration = 0; if (!basePath.endsWith("/")) basePath = basePath + "/"; String inputPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + iteration; String joinPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + (iteration + 1); String outputPath = basePath + TABLE_BASE_NAME + FILE_NAME_SEPARATOR + iteration; Configuration mrConf = this.getConf(); job = Job.getInstance(mrConf); for (Map.Entry<String, String> entry : dgaConfiguration.getSystemProperties().entrySet()) { mrConf.set(entry.getKey(), entry.getValue()); } FileSystem fs = FileSystem.get(job.getConfiguration()); boolean nextFileExists = fs.exists(new Path(joinPath)); while (nextFileExists) { System.out.println("Processing " + inputPath + " and " + joinPath); job = Job.getInstance(mrConf); job.setJobName("Louvain Table Synthesizer " + iteration); job.setJarByClass(LouvainTableSynthesizer.class); job.setMapperClass(LouvainTableSynthesizerMapper.class); job.setReducerClass(LouvainTableSynthesizerReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); //Reducer Output job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); //Add both input folders Path in = new Path(inputPath); Path joinIn = new Path(joinPath); Path out = new Path(outputPath); FileInputFormat.addInputPath(job, in); FileInputFormat.addInputPath(job, joinIn); FileOutputFormat.setOutputPath(job, out); job.waitForCompletion(true); //Set the new temp input path inputPath = outputPath; iteration++; outputPath = basePath + TABLE_BASE_NAME + FILE_NAME_SEPARATOR + iteration; joinPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + (iteration + 1); nextFileExists = fs.exists(new Path(joinPath)); } } catch (IOException e) { e.printStackTrace(); return -1; } catch (InterruptedException e) { e.printStackTrace(); return -1; } catch (ClassNotFoundException e) { e.printStackTrace(); return -1; } return 0; }
From source file:com.soteradefense.dga.LouvainRunner.java
License:Apache License
private int runMapreduceJob(String inputPath, String outputPath, DGAConfiguration conf) throws Exception { Configuration mrConf = new Configuration(); for (Map.Entry<String, String> entry : conf.getSystemProperties().entrySet()) { mrConf.set(entry.getKey(), entry.getValue()); }/*w w w .ja v a 2s. co m*/ Job job = Job.getInstance(configuration); job.setJarByClass(LouvainRunner.class); Path in = new Path(inputPath); Path out = new Path(outputPath); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("CommunityCompression"); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LouvainVertexWritable.class); job.setMapperClass(CommunityCompression.Map.class); job.setReducerClass(CommunityCompression.Reduce.class); logger.debug("Running Mapreduce step with job configuration: {}", job); return job.waitForCompletion(false) ? 0 : 1; }
From source file:com.splicemachine.derby.stream.spark.SparkExportDataSetWriter.java
License:Apache License
@Override public DataSet<LocatedRow> write() throws StandardException { Configuration conf = new Configuration(HConfiguration.unwrapDelegate()); ByteDataOutput bdo = new ByteDataOutput(); Job job; String encoded;//from ww w.j a v a 2 s .com try { bdo.writeObject(exportFunction); encoded = Base64.encodeBase64String(bdo.toByteArray()); conf.set("exportFunction", encoded); job = Job.getInstance(conf); } catch (IOException e) { throw new RuntimeException(e); } job.setOutputKeyClass(Void.class); job.setOutputValueClass(LocatedRow.class); job.setOutputFormatClass(SparkDataSet.EOutputFormat.class); job.getConfiguration().set("mapred.output.dir", directory); JavaRDD<V> cached = rdd.cache(); long writtenRows = cached.count(); rdd.keyBy(new NullFunction<V>()).setName(String.format("Export Directory: %s", directory)) .saveAsNewAPIHadoopDataset(job.getConfiguration()); cached.unpersist(); ValueRow valueRow = new ValueRow(2); valueRow.setColumn(1, new SQLLongint(writtenRows)); valueRow.setColumn(2, new SQLInteger(0)); return new SparkDataSet<>( SpliceSpark.getContext().parallelize(Collections.singletonList(new LocatedRow(valueRow)), 1)); }
From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java
License:Apache License
/** * Use this before submitting a TableReduce job. It will * appropriately set up the JobConf./*from ww w. j a v a2 s. c o m*/ * * @param table The output Splice table name, The format should be Schema.tableName. * @param reducer The reducer class to use. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary configuration. * @param partitioner Partitioner to use. Pass <code>null</code> to use * default partitioner. * @param quorumAddress Distant cluster to write to; default is null for * output to the cluster that is designated in <code>hbase-site.xml</code>. * Set this String to the zookeeper ensemble of an alternate remote cluster * when you would have the reduce write a cluster that is other than the * default; e.g. copying tables between clusters, the source would be * designated by <code>hbase-site.xml</code> and this param would have the * ensemble address of the remote cluster. The format to pass is particular. * Pass <code> <hbase.zookeeper.quorum>:<hbase.zookeeper.client.port>:<zookeeper.znode.parent> * </code> such as <code>server,server2,server3:2181:/hbase</code>. * @param serverClass redefined hbase.regionserver.class * @param serverImpl redefined hbase.regionserver.client * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @throws IOException When determining the region count fails. * @throws SQLException */ public static void initTableReducerJob(String table, Class<? extends Reducer> reducer, Job job, Class partitioner, String quorumAddress, String serverClass, String serverImpl, boolean addDependencyJars, Class<? extends OutputFormat> outputformatClass) throws IOException { Configuration conf = job.getConfiguration(); job.setOutputFormatClass(outputformatClass); if (reducer != null) job.setReducerClass(reducer); conf.set(MRConstants.SPLICE_OUTPUT_TABLE_NAME, table); if (sqlUtil == null) sqlUtil = SMSQLUtil.getInstance(conf.get(MRConstants.SPLICE_JDBC_STR)); // If passed a quorum/ensemble address, pass it on to TableOutputFormat. String hbaseTableID = null; try { hbaseTableID = sqlUtil.getConglomID(table); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); throw new IOException(e); } conf.set(MRConstants.HBASE_OUTPUT_TABLE_NAME, table); if (quorumAddress != null) { // Calling this will validate the format HBasePlatformUtils.validateClusterKey(quorumAddress); conf.set(TableOutputFormat.QUORUM_ADDRESS, quorumAddress); } if (serverClass != null && serverImpl != null) { conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass); conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl); } job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Object.class); if (partitioner == HRegionPartitioner.class) { job.setPartitionerClass(HRegionPartitioner.class); // TODO Where are the keys? int regions = getReduceNumberOfRegions(hbaseTableID); if (job.getNumReduceTasks() > regions) { job.setNumReduceTasks(regions); } } else if (partitioner != null) { job.setPartitionerClass(partitioner); } if (addDependencyJars) { addDependencyJars(job); } //initCredentials(job); }
From source file:com.spotify.hdfs2cass.crunch.cql.CQLTarget.java
License:Open Source License
@Override public void configureForMapReduce(final Job job, final PType<?> pType, final Path outputPath, final String name) { if (name == null) { throw new CrunchRuntimeException("'name' arguments should not be null. We don't know why tho"); }//from ww w. j av a 2s .c om FileOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(CrunchCqlBulkOutputFormat.class); JobConf conf = new JobConf(); params.configure(conf); for (Map.Entry<String, String> e : extraConf.entrySet()) { conf.set(e.getKey(), e.getValue()); } FormatBundle<CrunchCqlBulkOutputFormat> bundle = FormatBundle.forOutput(CrunchCqlBulkOutputFormat.class); for (Map.Entry<String, String> e : conf) { bundle.set(e.getKey(), e.getValue()); } Configuration jobConfiguration = job.getConfiguration(); // we don't know why exactly this is needed, but without this, the actual streaming will not // see the the throttling and buffer size arguments params.configure(jobConfiguration); CrunchConfigHelper.setOutputColumnFamily(jobConfiguration, params.getKeyspace(), params.getColumnFamily()); CrunchCqlBulkOutputFormat.setColumnFamilySchema(jobConfiguration, params.getColumnFamily(), params.getSchema()); CrunchCqlBulkOutputFormat.setColumnFamilyInsertStatement(jobConfiguration, params.getColumnFamily(), params.getStatement()); String[] colNames = params.getColumnNames(); for (int i = 0; i < colNames.length; i++) { CrunchCqlBulkOutputFormat.setColumnIndex(jobConfiguration, params.getColumnFamily(), colNames[i], i); } CrunchOutputs.addNamedOutput(job, name, bundle, ByteBuffer.class, List.class); }
From source file:com.spotify.hdfs2cass.crunch.thrift.ThriftTarget.java
License:Open Source License
@Override public void configureForMapReduce(final Job job, final PType<?> pType, final Path outputPath, final String name) { if (name == null) { throw new CrunchRuntimeException("'name' arguments should not be null. We don't know why tho"); }//from w w w. ja v a 2s . co m FileOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(CrunchBulkOutputFormat.class); JobConf conf = new JobConf(); params.configure(conf); for (Map.Entry<String, String> e : extraConf.entrySet()) { conf.set(e.getKey(), e.getValue()); } FormatBundle<CrunchBulkOutputFormat> bundle = FormatBundle.forOutput(CrunchBulkOutputFormat.class); for (Map.Entry<String, String> e : conf) { bundle.set(e.getKey(), e.getValue()); } Configuration jobConfiguration = job.getConfiguration(); // we don't know why exactly this is needed, but without this, the actual streaming will not // see the the throttling and buffer size arguments params.configure(jobConfiguration); CrunchConfigHelper.setOutputColumnFamily(jobConfiguration, params.getKeyspace(), params.getColumnFamily()); CrunchOutputs.addNamedOutput(job, name, bundle, ByteBuffer.class, List.class); }
From source file:com.springsource.insight.plugin.hadoop.WordCount.java
License:Open Source License
public int run(String[] args) throws Exception { String INPUT = "src/test/resources"; String OUTPUT = "target/out"; Configuration conf = new Configuration(); File targetFolder = FileUtil.detectTargetFolder(getClass()); if (targetFolder == null) { throw new IllegalStateException("Cannot detect target folder"); }//from ww w . j a v a 2 s . com File tempFolder = new File(targetFolder, "temp"); conf.set("hadoop.tmp.dir", tempFolder.getAbsolutePath()); Job job = new Job(conf, "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(WordCountMapper.class); job.setCombinerClass(WordCountReducer.class); job.setReducerClass(WordCountReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileUtils.deleteDirectory(new File(OUTPUT)); // delete old output data FileInputFormat.addInputPath(job, new Path(INPUT)); FileOutputFormat.setOutputPath(job, new Path(OUTPUT)); return job.waitForCompletion(true) ? 0 : -1; }
From source file:com.streamsets.pipeline.stage.destination.mapreduce.jobtype.avroconvert.AvroConversionBaseCreator.java
License:Apache License
@Override public Job call() throws Exception { // We're explicitly disabling speculative execution conf.set("mapreduce.map.speculative", "false"); conf.set("mapreduce.map.maxattempts", "1"); conf.set("mapreduce.job.user.classpath.first", "true"); conf.set("mapreduce.task.classpath.user.precedence", "true"); conf.set("mapreduce.task.classpath.first", "true"); addNecessaryJarsToJob(conf);//from w w w . jav a 2 s. c o m Job job = Job.getInstance(conf); // IO formats job.setInputFormatClass(getInputFormatClass()); job.setOutputFormatClass(NullOutputFormat.class); // Mapper & job output job.setMapperClass(getMapperClass()); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); // It's map only job job.setNumReduceTasks(0); // General configuration job.setJarByClass(getClass()); return job; }
From source file:com.stride.cartrek.core.hbase.RowKeyDistributorTestBase.java
License:Apache License
private void testMapReduceInternal(long origKeyPrefix, Scan scan, int numValues, int startWithValue, int seekIntervalMinValue, int seekIntervalMaxValue) throws IOException, InterruptedException, ClassNotFoundException { int valuesCountInSeekInterval = writeTestData(origKeyPrefix, numValues, startWithValue, seekIntervalMinValue, seekIntervalMaxValue); // Reading data Configuration conf = testingUtility.getConfiguration(); Job job = new Job(conf, "testMapReduceInternal()-Job"); job.setJarByClass(this.getClass()); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); // Substituting standard TableInputFormat which was set in // TableMapReduceUtil.initTableMapperJob(...) job.setInputFormatClass(WdTableInputFormat.class); keyDistributor.addInfo(job.getConfiguration()); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0);//from w ww . j ava 2s . co m boolean succeeded = job.waitForCompletion(true); Assert.assertTrue(succeeded); long mapInputRecords = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS).getValue(); Assert.assertEquals(valuesCountInSeekInterval, mapInputRecords); }