List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:com.ery.hadoop.mrddx.hbase.HbaseOutputFormat.java
License:Apache License
@Override public void handle(Job conf) throws Exception { HbaseConfiguration hConf = new HbaseConfiguration(conf.getConfiguration(), HbaseConfiguration.FLAG_HBASE_OUTPUT); // ??/*w ww .j av a 2s. c o m*/ String tableName = hConf.getOutputHBaseTableName(); if (null == tableName || tableName.trim().length() <= 0) { String meg = "HBase??<" + HbaseConfiguration.OUTPUT_TABLE + ">?."; LOG.error(meg); throw new Exception(meg); } // ? String hbaseFieldNames = hConf.getOutputHBaseFieldNames(); this.vParamTargetFamilyNames(hbaseFieldNames, hConf); hConf.setOutputHBaseFamilyNames(this.getHBaseFamilyNames(hbaseFieldNames)); // String rowKeyRule = hConf.getOutputHBaseRowKeyRule(); if (null == rowKeyRule || rowKeyRule.trim().length() <= 0) { String meg = "<" + HbaseConfiguration.OUTPUT_ROWKEY_RULE + ">"; LOG.error(meg); throw new Exception(meg); } // HFile long hfileMaxfilesize = hConf.getOutputHBaseHFileMaxfilesize(); if (hfileMaxfilesize <= 0) { String meg = "HFile<" + HbaseConfiguration.OUTPUT_HFILE_MAXFILESIZE + ">0"; LOG.error(meg); throw new Exception(meg); } // memstore flushHDFS? long memstoreFlushSize = hConf.getOutputHBaseMemstoreFlushSize(); if (memstoreFlushSize <= 0) { String meg = "memstore flushHDFS?<" + HbaseConfiguration.OUTPUT_MEMSTORE_FLUSHSIZE + ">??0"; LOG.error(meg); throw new Exception(meg); } // ?? int colmunBlocksize = hConf.getOutputHBaseColmunBlocksize(); if (colmunBlocksize <= 0) { String meg = "??<" + HbaseConfiguration.OUTPUT_COLMUN_BLOCKSIZE + ">0"; LOG.error(meg); throw new Exception(meg); } // ? int colmunMaxversion = hConf.getOutputHBaseColmunMaxversion(); if (colmunMaxversion <= 0) { String meg = "?<" + HbaseConfiguration.OUTPUT_COLMUN_MAXVERSION + ">0"; LOG.error(meg); throw new Exception(meg); } // ?? int colmunMinversion = hConf.getOutputHBaseColmunMinversion(); if (colmunMinversion <= 0) { String meg = "??<" + HbaseConfiguration.OUTPUT_COLMUN_MINVERSION + ">0"; LOG.error(meg); throw new Exception(meg); } // ???? int commitBufferLength = hConf.getOutputHBaseBufferLength(); if (commitBufferLength <= 0) { String meg = "????<" + HbaseConfiguration.OUTPUT_SET_COMMIT_BUFFERLENGTH + ">0"; LOG.error(meg); throw new Exception(meg); } // ?hbaseWAL int walFlag = hConf.getOutputHBaseSetWalFlags(); if (!(walFlag == -1 || walFlag >= 0 || walFlag <= 4)) { String meg = "WAL<" + HbaseConfiguration.OUTPUT_SET_WAL_FLAG + ">?-1??:[0-4]"; LOG.error(meg); throw new Exception(meg); } // if (!validateTable(hConf)) { String errorInfo = "HBase output table, validate Execption!"; MRLog.error(LOG, errorInfo); throw new Exception(errorInfo); } conf.setOutputFormatClass(HbaseOutputFormat.class); conf.setReduceSpeculativeExecution(false); conf.setOutputKeyClass(DBRecord.class); conf.setOutputValueClass(NullWritable.class); conf.setReducerClass(DBReducer.class); // ?? printTableDesc(tableName, hConf.getConf()); }
From source file:com.ery.hadoop.mrddx.hive.HiveOutputFormat.java
License:Apache License
/** * ???/*from w w w . j a v a 2 s . c om*/ * * @param job jobconf * @param tableName ?? */ public static void setOutput(Job job, String tableName) { job.setOutputFormatClass(HiveOutputFormat.class); job.setReduceSpeculativeExecution(false); HiveConfiguration dbConf = new HiveConfiguration(job.getConfiguration()); dbConf.setOutputHiveTableName(tableName); }
From source file:com.ery.hadoop.mrddx.hive.HiveOutputFormat.java
License:Apache License
@Override public void handle(Job conf) throws Exception { /**//w ww .ja v a 2s .co m * ? */ HiveConfiguration hconf = new HiveConfiguration(conf.getConfiguration()); // ? String outRowChars = hconf.getOutputHiveFileRowsSplitChars(); if (null == outRowChars || outRowChars.length() <= 0) { String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ROWS_SPLITCHARS + ">"; MRLog.error(LOG, meg); throw new Exception(meg); } // String outFileSplitChars = hconf.getOutputHiveFileFieldSplitChars(); if (null == outFileSplitChars || outFileSplitChars.trim().length() <= 0) { String meg = "<" + HiveConfiguration.OUTPUT_HIVE_FIELD_SPLITCHARS + ">"; MRLog.error(LOG, meg); throw new Exception(meg); } boolean para = hconf.getOutputHiveCompress(); // ? (?HDFSUtils.CompressCodec) String outCompressCodec = hconf.getOutputHiveCompressCodec(); if (para && !HDFSUtils.isExistCompressCodec(outCompressCodec)) { String meg = "[MR ERROR]?<" + HiveConfiguration.OUTPUT_HIVE_COMPRESS_CODEC + ">?."; MRLog.error(LOG, meg); throw new Exception(meg); } // ?MR String outTargetpath = hconf.getOutputTargetFilePath(); hconf.setOutputTargetPath(outTargetpath); if (null == outTargetpath || outTargetpath.trim().length() <= 0) { MRLog.warn(LOG, "MR<" + HiveConfiguration.OUTPUT_HIVE_TARGET_PATH + ">"); } // ?hive?? String hiveUrl = hconf.getOutPutHiveConfigUrl(); if (null == hiveUrl || hiveUrl.trim().length() <= 0) { String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_URL + ">?."; LOG.error(meg); throw new Exception(meg); } // hive??? String hiveUser = hconf.getOutPutHiveConfigUser(); if (null == hiveUser || hiveUser.trim().length() <= 0) { LOG.warn("[MR WARN]hive???<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_USER + ">."); } // hive?? String hivePwd = hconf.getOutPutHiveConfigPassword(); if (null == hivePwd || hivePwd.trim().length() <= 0) { LOG.warn("[MR WARN]hive??<" + HiveConfiguration.OUTPUT_HIVE_CONFIG_PASSWORD + ">."); } // ?? String tableName = hconf.getOutputHiveTableName(); if (null == tableName || tableName.trim().length() <= 0) { String meg = "[MR ERROR]Hive??<" + HiveConfiguration.OUTPUT_TABLE + ">?."; LOG.error(meg); throw new Exception(meg); } // ?? String partitionField[] = hconf.getOutputHivePartitionField(); if (null != partitionField && partitionField.length > 0) { // String[] outputFieldName = hconf.getOutputFieldNames(); if (null == outputFieldName || outputFieldName.length <= 0) { String meg = "<" + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + ">."; MRLog.error(LOG, meg); throw new Exception(meg); } for (int i = 0; i < partitionField.length; i++) { boolean isExist = false; for (String s : outputFieldName) { if (s.equals(partitionField[i])) { isExist = true; break; } } if (!isExist) { String meg = "" + partitionField[i] + "<" + HiveConfiguration.OUTPUT_HIVE_PARTITION_FIELD + ">?<" + MRConfiguration.SYS_OUTPUT_FIELD_NAMES_PROPERTY + ""; MRLog.error(LOG, meg); throw new Exception(meg); } } String orderOutputTempPath = hconf.getOutputHiveOrderTempPath(); if (null == orderOutputTempPath || orderOutputTempPath.trim().length() <= 0) { String meg = "<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">."; MRLog.error(LOG, meg); throw new Exception(meg); } String orderOutputFileNamePrefix = hconf.getOutputHiveOrderFileNamePrefix(); if (null == orderOutputFileNamePrefix || orderOutputFileNamePrefix.trim().length() <= 0) { String meg = "???<" + HiveConfiguration.OUTPUT_HIVE_ORDER_TEMP_PATH + ">."; MRLog.warn(LOG, meg); } long orderOutputFileMaxCount = hconf.getOutputHiveOrderFileMaxCount(); if (orderOutputFileMaxCount == 0) { String meg = "?<" + HiveConfiguration.OUTPUT_HIVE_ORDER_FILEMAXCOUNT + ">0 -1(??)."; MRLog.error(LOG, meg); throw new Exception(meg); } } // DDL? String ddlHQL = hconf.getOutputHiveExecuteDDLHQL(); if (null == ddlHQL || ddlHQL.trim().length() <= 0) { LOG.warn("[MR WARN]hive?<" + HiveConfiguration.OUTPUT_HIVE_DDL_HQL + ">."); } try { executeDDLHQL(hconf); MRLog.info(LOG, "execute ddl hive sql success!"); } catch (SQLException e) { MRLog.error(LOG, "execute ddl hive sql error!"); e.printStackTrace(); } conf.setReduceSpeculativeExecution(false); conf.setOutputFormatClass(HiveOutputFormat.class); conf.setOutputKeyClass(DBRecord.class); conf.setOutputValueClass(NullWritable.class); if (null != partitionField && partitionField.length > 0) { conf.setCombinerClass(DBGroupReducer.class); conf.setReducerClass(DBPartitionReducer.class); } else { conf.setCombinerClass(DBGroupReducer.class); conf.setReducerClass(DBReducer.class); } }
From source file:com.ery.hadoop.mrddx.hive.HiveRCFileOutputFormat.java
License:Apache License
@Override public void handle(Job conf) throws Exception { super.handle(conf); HiveConfiguration hconf = new HiveConfiguration(conf.getConfiguration()); // ? (?HDFSUtils.CompressCodec) String outCompressCodec = hconf.getOutputHiveCompressCodec(); // ?BZip2Codec if (HDFSUtils.isBZip2CompressCodec(outCompressCodec)) { String meg = "[MR ERROR]?<" + HiveConfiguration.OUTPUT_HIVE_COMPRESS_CODEC + ">??BZip2Codec."; MRLog.error(LOG, meg);//from w ww .jav a 2 s.c o m throw new Exception(meg); } setColumnNumber(conf.getConfiguration(), hconf.getOutputFieldNames().length); conf.setOutputFormatClass(HiveRCFileOutputFormat.class); }
From source file:com.fanlehai.hadoop.join.CompositeJoin.java
License:Apache License
/** * The main driver for sort program. Invoke this method to submit the * map/reduce job.// w w w .j ava 2 s . co m * * @throws IOException * When there is communication problems with the job tracker. */ @SuppressWarnings("rawtypes") public int run(String[] args) throws Exception { Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String join_reduces = conf.get(REDUCES_PER_HOST); if (join_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(join_reduces); } Job job = Job.getInstance(conf); job.setJobName("join"); job.setJarByClass(CompositeJoin.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); Class<? extends InputFormat> inputFormatClass = KeyValueTextInputFormat.class;// SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = Text.class;// BytesWritable.class; Class<? extends Writable> outputValueClass = Text.class;//TupleWritable.class; String op = "inner"; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-joinOp".equals(args[i])) { op = args[++i]; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs job.setNumReduceTasks(num_reduces); if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } String strOut = otherArgs.remove(otherArgs.size() - 1); FileSystem.get(new Configuration()).delete(new Path(strOut), true); FileOutputFormat.setOutputPath(job, new Path(strOut)); List<Path> plist = new ArrayList<Path>(otherArgs.size()); for (String s : otherArgs) { plist.add(new Path(s)); } job.setInputFormatClass(CompositeInputFormat.class); job.getConfiguration().set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0]))); job.setOutputFormatClass(outputFormatClass); job.setMapperClass(MapComposite.class); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:com.github.dryangkun.hbase.tidx.hive.HBaseStorageHandler.java
License:Apache License
@Override public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { try {/* w ww . j av a 2s . c o m*/ HBaseSerDe.configureJobConf(tableDesc, jobConf); /* * HIVE-6356 * The following code change is only needed for hbase-0.96.0 due to HBASE-9165, and * will not be required once Hive bumps up its hbase version). At that time , we will * only need TableMapReduceUtil.addDependencyJars(jobConf) here. */ if (counterClass != null) { TableMapReduceUtil.addDependencyJars(jobConf, HBaseStorageHandler.class, TableInputFormatBase.class, counterClass); } else { TableMapReduceUtil.addDependencyJars(jobConf, HBaseStorageHandler.class, TableInputFormatBase.class); } if (HiveConf.getVar(jobConf, HiveConf.ConfVars.HIVE_HBASE_SNAPSHOT_NAME) != null) { // There is an extra dependency on MetricsRegistry for snapshot IF. TableMapReduceUtil.addDependencyJars(jobConf, MetricsRegistry.class); } Set<String> merged = new LinkedHashSet<String>(jobConf.getStringCollection("tmpjars")); Job copy = new Job(jobConf); TableMapReduceUtil.addDependencyJars(copy); merged.addAll(copy.getConfiguration().getStringCollection("tmpjars")); jobConf.set("tmpjars", StringUtils.arrayToString(merged.toArray(new String[0]))); // Get credentials using the configuration instance which has HBase properties JobConf hbaseJobConf = new JobConf(getConf()); org.apache.hadoop.hbase.mapred.TableMapReduceUtil.initCredentials(hbaseJobConf); ShimLoader.getHadoopShims().mergeCredentials(jobConf, hbaseJobConf); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableInputFormat.java
License:Apache License
@Override public RecordReader<ImmutableBytesWritable, ResultWritable> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { HBaseSplit hbaseSplit = (HBaseSplit) split; TableSplit tableSplit = hbaseSplit.getTableSplit(); Job job = new Job(jobConf); TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext(job.getConfiguration(), reporter);//from www. j a v a2s. c om final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader; if (hbaseSplit.isTxIndexScan()) { LOG.info("getRecordReader: TxHiveIndexScan -> " + tableSplit); recordReader = TxHiveTableInputFormatUtil.createRecordReader(tableSplit, tac, jobConf); } else { LOG.info("getRecordReader: no TxHiveIndexScan -> " + tableSplit); setHTable(HiveHBaseInputFormatUtil.getTable(jobConf)); setScan(HiveHBaseInputFormatUtil.getScan(jobConf)); recordReader = createRecordReader(tableSplit, tac); } try { recordReader.initialize(tableSplit, tac); } catch (InterruptedException e) { throw new IOException("Failed to initialize RecordReader", e); } return new RecordReader<ImmutableBytesWritable, ResultWritable>() { @Override public void close() throws IOException { recordReader.close(); closeTable(); } @Override public ImmutableBytesWritable createKey() { return new ImmutableBytesWritable(); } @Override public ResultWritable createValue() { return new ResultWritable(new Result()); } @Override public long getPos() throws IOException { return 0; } @Override public float getProgress() throws IOException { float progress = 0.0F; try { progress = recordReader.getProgress(); } catch (InterruptedException e) { throw new IOException(e); } return progress; } @Override public boolean next(ImmutableBytesWritable rowKey, ResultWritable value) throws IOException { boolean next = false; try { next = recordReader.nextKeyValue(); if (next) { rowKey.set(recordReader.getCurrentValue().getRow()); value.setResult(recordReader.getCurrentValue()); } } catch (InterruptedException e) { throw new IOException(e); } return next; } }; }
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHFileOutputFormat.java
License:Apache License
@Override public RecordWriter getHiveRecordWriter(final JobConf jc, final Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, final Progressable progressable) throws IOException { // Read configuration for the target path, first from jobconf, then from table properties String hfilePath = getFamilyPath(jc, tableProperties); if (hfilePath == null) { throw new RuntimeException("Please set " + HFILE_FAMILY_PATH + " to target location for HFiles"); }//from w w w.j a va 2 s . c om // Target path's last component is also the column family name. final Path columnFamilyPath = new Path(hfilePath); final String columnFamilyName = columnFamilyPath.getName(); final byte[] columnFamilyNameBytes = Bytes.toBytes(columnFamilyName); final Job job = new Job(jc); setCompressOutput(job, isCompressed); setOutputPath(job, finalOutPath); // Create the HFile writer final org.apache.hadoop.mapreduce.TaskAttemptContext tac = ShimLoader.getHadoopShims() .newTaskAttemptContext(job.getConfiguration(), progressable); final Path outputdir = FileOutputFormat.getOutputPath(tac); final org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable, KeyValue> fileWriter = getFileWriter( tac); // Individual columns are going to be pivoted to HBase cells, // and for each row, they need to be written out in order // of column name, so sort the column names now, creating a // mapping to their column position. However, the first // column is interpreted as the row key. String columnList = tableProperties.getProperty("columns"); String[] columnArray = columnList.split(","); final SortedMap<byte[], Integer> columnMap = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR); int i = 0; for (String columnName : columnArray) { if (i != 0) { columnMap.put(Bytes.toBytes(columnName), i); } ++i; } return new RecordWriter() { @Override public void close(boolean abort) throws IOException { try { fileWriter.close(null); if (abort) { return; } // Move the hfiles file(s) from the task output directory to the // location specified by the user. FileSystem fs = outputdir.getFileSystem(jc); fs.mkdirs(columnFamilyPath); Path srcDir = outputdir; for (;;) { FileStatus[] files = fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER); if ((files == null) || (files.length == 0)) { throw new IOException("No family directories found in " + srcDir); } if (files.length != 1) { throw new IOException("Multiple family directories found in " + srcDir); } srcDir = files[0].getPath(); if (srcDir.getName().equals(columnFamilyName)) { break; } } for (FileStatus regionFile : fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER)) { fs.rename(regionFile.getPath(), new Path(columnFamilyPath, regionFile.getPath().getName())); } // Hive actually wants a file as task output (not a directory), so // replace the empty directory with an empty file to keep it happy. fs.delete(outputdir, true); fs.createNewFile(outputdir); } catch (InterruptedException ex) { throw new IOException(ex); } } private void writeText(Text text) throws IOException { // Decompose the incoming text row into fields. String s = text.toString(); String[] fields = s.split("\u0001"); assert (fields.length <= (columnMap.size() + 1)); // First field is the row key. byte[] rowKeyBytes = Bytes.toBytes(fields[0]); // Remaining fields are cells addressed by column name within row. for (Map.Entry<byte[], Integer> entry : columnMap.entrySet()) { byte[] columnNameBytes = entry.getKey(); int iColumn = entry.getValue(); String val; if (iColumn >= fields.length) { // trailing blank field val = ""; } else { val = fields[iColumn]; if ("\\N".equals(val)) { // omit nulls continue; } } byte[] valBytes = Bytes.toBytes(val); KeyValue kv = new KeyValue(rowKeyBytes, columnFamilyNameBytes, columnNameBytes, valBytes); try { fileWriter.write(null, kv); } catch (IOException e) { LOG.error("Failed while writing row: " + s); throw e; } catch (InterruptedException ex) { throw new IOException(ex); } } } private void writePut(PutWritable put) throws IOException { ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow()); SortedMap<byte[], List<Cell>> cells = put.getPut().getFamilyCellMap(); for (Map.Entry<byte[], List<Cell>> entry : cells.entrySet()) { Collections.sort(entry.getValue(), new CellComparator()); for (Cell c : entry.getValue()) { try { fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c)); } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } } } } @Override public void write(Writable w) throws IOException { if (w instanceof Text) { writeText((Text) w); } else if (w instanceof PutWritable) { writePut((PutWritable) w); } else { throw new IOException("Unexpected writable " + w); } } }; }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
/** * Create a map-only Hadoop Job out of the passed in parameters. Does not set the * Job name.//from w w w. j ava2s. c om * * @see #getCustomJobName(String, JobContext, Class, Class) */ public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { // Job job = new Job(new Configuration(conf)); Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); job.setOutputKeyClass(mapperKey); job.setOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
/** * * @param inputPaths/* w w w . ja v a 2s. com*/ * @param outputPath * @param inputFormat * @param inputKey * @param inputValue * @param mapper * @param mapperKey * @param mapperValue * @param combiner * @param reducer * @param outputKey * @param outputValue * @param outputFormat * @param conf * @param overwrite * @param isCompress * @return * @throws IOException */ public static Job prepareAvroJob(String inputPaths, String outputPath, Class<? extends InputFormat> inputFormat, Object inputKey, Object inputValue, Class<? extends Mapper> mapper, Object mapperKey, Object mapperValue, Class<? extends Reducer> combiner, Class<? extends Reducer> reducer, Object outputKey, Object outputValue, Class<? extends OutputFormat> outputFormat, Configuration conf, boolean overwrite, boolean isCompress) throws IOException { Job job = Job.getInstance(conf); Configuration jobConf = job.getConfiguration(); if (inputKey instanceof Schema) { if (inputValue instanceof Schema) { inputFormat = inputFormat == null ? AvroKeyValueInputFormat.class : inputFormat; } inputFormat = inputFormat == null ? AvroKeyInputFormat.class : inputFormat; } if (inputFormat != null) { job.setInputFormatClass(inputFormat); } if (inputKey instanceof Schema) { AvroJob.setInputKeySchema(job, (Schema) inputKey); } if (inputValue instanceof Schema) { AvroJob.setInputValueSchema(job, (Schema) inputValue); } if (outputKey instanceof Schema) { if (outputValue instanceof Schema) { outputFormat = outputFormat == null ? AvroKeyValueOutputFormat.class : outputFormat; } outputFormat = outputFormat == null ? AvroKeyOutputFormat.class : outputFormat; } if (outputFormat != null) { job.setOutputFormatClass(outputFormat); } if (outputKey instanceof Schema) { AvroJob.setOutputKeySchema(job, (Schema) outputKey); } else if (outputKey instanceof Class) { job.setOutputKeyClass((Class) outputKey); } if (outputValue instanceof Schema) { AvroJob.setOutputValueSchema(job, (Schema) outputValue); } else if (outputValue instanceof Class) { job.setOutputValueClass((Class) outputValue); } if (reducer == null) { job.setNumReduceTasks(0); if (mapperKey instanceof Schema) { AvroJob.setMapOutputKeySchema(job, (Schema) mapperKey); } else if (mapperKey instanceof Class) { job.setOutputKeyClass((Class) mapperKey); } if (mapperValue instanceof Schema) { AvroJob.setOutputValueSchema(job, (Schema) mapperValue); } else if (mapperKey instanceof Class) { job.setOutputValueClass((Class) mapperValue); } job.setJarByClass(mapper); } else if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } FileInputFormat.setInputPaths(job, inputPaths); FileOutputFormat.setOutputPath(job, new Path(outputPath)); if (isCompress) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DeflateCodec.class); } job.setMapperClass(mapper); if (mapperKey instanceof Schema) { AvroJob.setMapOutputKeySchema(job, (Schema) mapperKey); } else if (mapperKey instanceof Class) { job.setMapOutputKeyClass((Class) mapperKey); } if (mapperValue instanceof Schema) { AvroJob.setMapOutputValueSchema(job, (Schema) mapperValue); } else if (mapperKey instanceof Class) { job.setMapOutputValueClass((Class) mapperValue); } if (reducer != null) { job.setReducerClass(reducer); } if (combiner != null) { job.setCombinerClass(combiner); } if (overwrite) { HadoopUtils.delete(jobConf, new Path(outputPath)); } return job; }