List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
public static Job prepareAvroJob(String inputPaths, Path outputPath, Schema inputKeySchema, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> combiner, Class<? extends Reducer> reducer, Schema outputKeySchema, Class<? extends Writable> outputValue, Configuration conf, boolean overwrite) throws IOException { Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }/* w ww.ja v a 2 s. c o m*/ job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } FileInputFormat.setInputPaths(job, inputPaths); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DeflateCodec.class); job.setInputFormatClass(AvroKeyInputFormat.class); AvroJob.setInputKeySchema(job, inputKeySchema); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } if (combiner != null) { job.setCombinerClass(combiner); } job.setOutputFormatClass(AvroKeyOutputFormat.class); job.setReducerClass(reducer); AvroJob.setOutputKeySchema(job, outputKeySchema); job.setOutputValueClass(outputValue); if (overwrite) { HadoopUtils.delete(jobConf, outputPath); } return job; }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
public static Job prepareAvroJob(String inputPaths, Path outputPath, Schema inputKeySchema, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> combiner, Class<? extends Reducer> reducer, Class<? extends Writable> outputKey, Class<? extends Writable> outputValue, Configuration conf, boolean overwrite) throws IOException { Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }/*from w ww. j av a 2 s. c o m*/ job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } FileInputFormat.setInputPaths(job, inputPaths); FileOutputFormat.setOutputPath(job, outputPath); // FileOutputFormat.setCompressOutput(job, true); // FileOutputFormat.setOutputCompressorClass(job, DeflateCodec.class); job.setInputFormatClass(AvroKeyInputFormat.class); AvroJob.setInputKeySchema(job, inputKeySchema); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } if (combiner != null) { job.setCombinerClass(combiner); } job.setReducerClass(reducer); job.setOutputKeyClass(outputKey); job.setOutputValueClass(outputValue); if (overwrite) { HadoopUtils.delete(jobConf, outputPath); } return job; }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
public static Job prepareJob(String inputPath, String outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { // Job job = new Job(new Configuration(conf)); Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }/*from w w w .j a va 2 s. c om*/ job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath); return job; }
From source file:com.github.ygf.pagerank.PageRank.java
License:Apache License
private void createTransitionMatrix(Configuration conf, Path linksFile, Path outputDir) throws Exception { // This job reads the links-simple-sorted.txt input file and generates // the corresponding transition matrix. The matrix is divided into // square blocks and each block is represented by the nonzero entries. // See Section 5.2 (and 5.2.3 in particular) of Mining of Massive Datasets // (http://infolab.stanford.edu/~ullman/mmds.html) for details. // The output is written to the "M" subdir in the output dir. Job job = Job.getInstance(conf, "PageRank:Matrix"); job.setJarByClass(PageRank.class); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(PageRankMatrixMapper.class); job.getConfiguration().setBoolean("mapreduce.map.output.compress", true); job.getConfiguration().setClass("mapreduce.map.output.compress.codec", DefaultCodec.class, CompressionCodec.class); job.setMapOutputKeyClass(ShortArrayWritable.class); job.setMapOutputValueClass(ShortArrayWritable.class); job.setReducerClass(PageRankMatrixReducer.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); SequenceFileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(ShortArrayWritable.class); job.setOutputValueClass(MatrixBlockWritable.class); FileInputFormat.addInputPath(job, linksFile); FileOutputFormat.setOutputPath(job, new Path(outputDir, "M")); job.waitForCompletion(true);/*from ww w .j av a 2 s . c o m*/ }
From source file:com.google.cloud.bigtable.hbase.TestImport.java
License:Open Source License
@Test @Category(KnownGap.class) public void testMapReduce() throws IOException, ClassNotFoundException, InterruptedException { Admin admin = getConnection().getAdmin(); admin.disableTable(TABLE_NAME);// w w w. j a v a 2 s .c o m admin.deleteTable(TABLE_NAME); IntegrationTests.createTable(TABLE_NAME); // Put a value. byte[] rowKey = dataHelper.randomData("testrow-"); byte[] qual = dataHelper.randomData("testQualifier-"); byte[] value = dataHelper.randomData("testValue-"); try (Table oldTable = getConnection().getTable(TABLE_NAME)) { Put put = new Put(rowKey); put.addColumn(COLUMN_FAMILY, qual, value); oldTable.put(put); // Assert the value is there. Get get = new Get(rowKey); Result result = oldTable.get(get); List<Cell> cells = result.listCells(); Assert.assertEquals(1, cells.size()); Assert.assertArrayEquals(CellUtil.cloneValue(cells.get(0)), value); } // Run the export. Configuration conf = getConnection().getConfiguration(); //conf.set("fs.defaultFS", "file:///"); FileSystem dfs = IntegrationTests.getMiniCluster().getFileSystem(); String tempDir = "hdfs://" + dfs.getCanonicalServiceName() + "/tmp/backup"; String[] args = new String[] { TABLE_NAME.getNameAsString(), tempDir }; Job job = Export.createSubmittableJob(conf, args); // So it looks for jars in the local FS, not HDFS. job.getConfiguration().set("fs.defaultFS", "file:///"); Assert.assertTrue(job.waitForCompletion(true)); // Create new table. TableName newTableName = IntegrationTests.newTestTableName(); try (Table newTable = getConnection().getTable(newTableName)) { // Change for method in IntegrationTests HColumnDescriptor hcd = new HColumnDescriptor(IntegrationTests.COLUMN_FAMILY); HTableDescriptor htd = new HTableDescriptor(newTableName); htd.addFamily(hcd); admin.createTable(htd); // Run the import. args = new String[] { newTableName.getNameAsString(), tempDir }; job = Import.createSubmittableJob(conf, args); job.getConfiguration().set("fs.defaultFS", "file:///"); Assert.assertTrue(job.waitForCompletion(true)); // Assert the value is there. Get get = new Get(rowKey); Result result = newTable.get(get); List<Cell> cells = result.listCells(); Assert.assertEquals(1, cells.size()); Assert.assertArrayEquals(CellUtil.cloneValue(cells.get(0)), value); } finally { admin.disableTable(newTableName); admin.deleteTable(newTableName); } }
From source file:com.google.cloud.bigtable.mapreduce.Import.java
License:Open Source License
/** * Sets up the actual job./*from w ww. j a v a 2 s. co m*/ * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { TableName tableName = TableName.valueOf(args[0]); conf.set(TABLE_NAME, tableName.getNameAsString()); Path inputDir = new Path(args[1]); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(Importer.class); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(SequenceFileInputFormat.class); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); // make sure we get the filter in the jars try { Class<? extends Filter> filter = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class); if (filter != null) { TableMapReduceUtil.addDependencyJars(conf, filter); } } catch (Exception e) { throw new IOException(e); } if (hfileOutPath != null) { job.setMapperClass(KeyValueImporter.class); try (Connection conn = ConnectionFactory.createConnection(conf); Table table = conn.getTable(tableName); RegionLocator regionLocator = conn.getRegionLocator(tableName)) { job.setReducerClass(KeyValueSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); HFileOutputFormat2.configureIncrementalLoad(job, table, regionLocator); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Preconditions.class); } } else { // No reducers. Just write straight to table. Call initTableReducerJob // because it sets up the TableOutputFormat. job.setMapperClass(Importer.class); TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job); job.setNumReduceTasks(0); } return job; }
From source file:com.google.cloud.dataflow.contrib.hadoop.HadoopFileSink.java
License:Apache License
@Override public void validate(PipelineOptions options) { try {/*w w w . java 2s .co m*/ Job job = jobInstance(); FileSystem fs = FileSystem.get(job.getConfiguration()); Preconditions.checkState(!fs.exists(new Path(path)), "Output path " + path + " already exists"); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.google.cloud.dataflow.contrib.hadoop.HadoopFileSink.java
License:Apache License
private Job jobInstance() throws IOException { Job job = Job.getInstance(); // deserialize map to conf Configuration conf = job.getConfiguration(); for (Map.Entry<String, String> entry : map.entrySet()) { conf.set(entry.getKey(), entry.getValue()); }//from w ww .j a va 2 s. c o m return job; }
From source file:com.google.cloud.dataflow.sdk.io.hdfs.HDFSFileSink.java
License:Apache License
private Job jobInstance() throws IOException { Job job = Job.getInstance(); if (serializableConfiguration != null) { for (Map.Entry<String, String> entry : serializableConfiguration.get()) { job.getConfiguration().set(entry.getKey(), entry.getValue()); }//from w ww. j av a 2 s .co m } job.setJobID(jobId); return job; }
From source file:com.gsinnovations.howdah.AbstractJob.java
License:Apache License
protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat) throws IOException { Job job = new Job(new Configuration(getConf())); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }//from ww w. j av a 2 s. c om job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setJobName(getCustomJobName(job, mapper, reducer)); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }