List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:com.dipwater.accountAnalyze.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "192.168.1.51:9001"); conf.set("fs.default.name", "hdfs://192.168.1.51:9000"); String[] ars = new String[] { "input", "newout" }; String[] otherArgs = new GenericOptionsParser(conf, ars).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//from w w w . j ava 2 s . c o m } Job job = new Job(conf, "word count"); File jarFile = EJob.createTempJar("bin"); EJob.addClasspath("/home/hadoop/hadoop-1.2.1/conf"); ClassLoader classLoader = EJob.getClassLoader(); Thread.currentThread().setContextClassLoader(classLoader); ((JobConf) job.getConfiguration()).setJar(jarFile.toString()); //job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.dse.pig.udfs.CqlStorage.java
License:Apache License
/** set read configuration settings */ public void setLocation(String location, Job job) throws IOException { conf = job.getConfiguration(); setLocationFromUri(location);//ww w. j av a 2s .c o m if (username != null && password != null) ConfigHelper.setInputKeyspaceUserNameAndPassword(conf, username, password); if (splitSize > 0) ConfigHelper.setInputSplitSize(conf, splitSize); if (partitionerClass != null) ConfigHelper.setInputPartitioner(conf, partitionerClass); if (rpcPort != null) ConfigHelper.setInputRpcPort(conf, rpcPort); if (initHostAddress != null) ConfigHelper.setInputInitialAddress(conf, initHostAddress); ConfigHelper.setInputColumnFamily(conf, keyspace, column_family); setConnectionInformation(); CqlConfigHelper.setInputCQLPageRowSize(conf, String.valueOf(pageSize)); if (columns != null && !columns.trim().isEmpty()) CqlConfigHelper.setInputColumns(conf, columns); String whereClauseForPartitionFilter = getWhereClauseForPartitionFilter(); String wc = whereClause != null && !whereClause.trim().isEmpty() ? whereClauseForPartitionFilter == null ? whereClause : String.format("%s AND %s", whereClause.trim(), whereClauseForPartitionFilter) : whereClauseForPartitionFilter; if (wc != null) { logger.debug("where clause: {}", wc); CqlConfigHelper.setInputWhereClauses(conf, wc); } if (System.getenv(PIG_INPUT_SPLIT_SIZE) != null) { try { ConfigHelper.setInputSplitSize(conf, Integer.valueOf(System.getenv(PIG_INPUT_SPLIT_SIZE))); } catch (NumberFormatException e) { throw new IOException("PIG_INPUT_SPLIT_SIZE is not a number", e); } } if (ConfigHelper.getInputRpcPort(conf) == 0) throw new IOException("PIG_INPUT_RPC_PORT or PIG_RPC_PORT environment variable not set"); if (ConfigHelper.getInputInitialAddress(conf) == null) throw new IOException("PIG_INPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set"); if (ConfigHelper.getInputPartitioner(conf) == null) throw new IOException("PIG_INPUT_PARTITIONER or PIG_PARTITIONER environment variable not set"); if (loadSignature == null) loadSignature = location; initSchema(loadSignature); }
From source file:com.dse.pig.udfs.CqlStorage.java
License:Apache License
/** set store configuration settings */ public void setStoreLocation(String location, Job job) throws IOException { conf = job.getConfiguration(); setLocationFromUri(location);/*from w ww .j av a 2s. co m*/ if (username != null && password != null) ConfigHelper.setOutputKeyspaceUserNameAndPassword(conf, username, password); if (splitSize > 0) ConfigHelper.setInputSplitSize(conf, splitSize); if (partitionerClass != null) ConfigHelper.setOutputPartitioner(conf, partitionerClass); if (rpcPort != null) { ConfigHelper.setOutputRpcPort(conf, rpcPort); ConfigHelper.setInputRpcPort(conf, rpcPort); } if (initHostAddress != null) { ConfigHelper.setOutputInitialAddress(conf, initHostAddress); ConfigHelper.setInputInitialAddress(conf, initHostAddress); } ConfigHelper.setOutputColumnFamily(conf, keyspace, column_family); CqlConfigHelper.setOutputCql(conf, outputQuery); setConnectionInformation(); if (ConfigHelper.getOutputRpcPort(conf) == 0) throw new IOException("PIG_OUTPUT_RPC_PORT or PIG_RPC_PORT environment variable not set"); if (ConfigHelper.getOutputInitialAddress(conf) == null) throw new IOException("PIG_OUTPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set"); if (ConfigHelper.getOutputPartitioner(conf) == null) throw new IOException("PIG_OUTPUT_PARTITIONER or PIG_PARTITIONER environment variable not set"); initSchema(storeSignature); }
From source file:com.elex.dmp.lda.CVB0Driver.java
License:Apache License
private static void setModelPaths(Job job, Path modelPath) throws IOException { Configuration conf = job.getConfiguration(); if (modelPath == null || !FileSystem.get(modelPath.toUri(), conf).exists(modelPath)) { return;/* w w w.j ava2 s . co m*/ } FileStatus[] statuses = FileSystem.get(modelPath.toUri(), conf).listStatus(modelPath, PathFilters.partFilter()); Preconditions.checkState(statuses.length > 0, "No part files found in model path '%s'", modelPath.toString()); String[] modelPaths = new String[statuses.length]; for (int i = 0; i < statuses.length; i++) { modelPaths[i] = statuses[i].getPath().toUri().toString(); } conf.setStrings(MODEL_PATHS, modelPaths); }
From source file:com.ema.hadoop.bestclient.BestClient.java
public static void main(String[] args) throws Exception { if (args.length != 4) { System.err.println("Usage: BestClient <input path> <output path> <date start> <date end>"); System.exit(-1);/*w w w . jav a2 s.c om*/ } Job job = Job.getInstance(); job.setJarByClass(BestClient.class); job.setJobName("Best client job"); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.setStrings("dates", args[2], args[3]); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(BCMapper.class); job.setReducerClass(BCReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.endgame.binarypig.loaders.AbstractExecutingLoaderTest.java
License:Apache License
public void testSetLocation() throws IOException { Job job = new Job(); underTest.setLocation("/tmp/some/path", job); assertEquals("file:/tmp/some/path", job.getConfiguration().get("mapred.input.dir")); }
From source file:com.ery.hadoop.mrddx.file.RCFileOutputFormat.java
License:Apache License
@Override public void handle(Job conf) throws Exception { /**/*from ww w .jav a 2s. c om*/ * ? */ FileConfiguration dbconf = new FileConfiguration(conf.getConfiguration(), FileConfiguration.FLAG_FILE_OUTPUT); // ? String outRowChars = dbconf.getOutputFileRowsSplitChars(); if (null == outRowChars || outRowChars.length() <= 0) { String meg = "<" + FileConfiguration.OUTPUT_FILE_ROWS_SPLIT_CHARS + ">"; MRLog.error(LOG, meg); throw new Exception(meg); } // String outFileSplitChars = dbconf.getOutputFileFieldSplitChars(); if (null == outFileSplitChars || outFileSplitChars.trim().length() <= 0) { String meg = "<" + FileConfiguration.OUTPUT_FILE_FIELD_SPLIT_CHARS + ">"; MRLog.error(LOG, meg); throw new Exception(meg); } boolean para = dbconf.getOutputFileCompress(); // ? (?HDFSUtils.CompressCodec) String outCompressCodec = dbconf.getOutputFileCompressCodec(); if (para && !HDFSUtils.isExistCompressCodec(outCompressCodec)) { String meg = "[MR ERROR]?<" + FileConfiguration.OUTPUT_FILE_COMPRESSCODEC + ">?."; MRLog.error(LOG, meg); throw new Exception(meg); } // ?BZip2Codec if (HDFSUtils.isBZip2CompressCodec(outCompressCodec)) { String meg = "[MR ERROR]?<" + HiveConfiguration.OUTPUT_HIVE_COMPRESS_CODEC + ">??BZip2Codec."; MRLog.error(LOG, meg); throw new Exception(meg); } // ?MR String outTargetpath = dbconf.getOutputTargetFilePath(); dbconf.setOutputTargetPath(outTargetpath); if (null == outTargetpath || outTargetpath.trim().length() <= 0) { MRLog.warn(LOG, "MR<" + FileConfiguration.OUTPUT_FILE_TARGET_PATH + ">"); } setColumnNumber(conf.getConfiguration(), dbconf.getOutputFieldNames().length); conf.setOutputFormatClass(RCFileOutputFormat.class); conf.setReducerClass(DBReducer.class); }
From source file:com.ery.hadoop.mrddx.hbase.HbaseInputFormat.java
License:Apache License
/** * Initializes the map-part of the job with the appropriate input settings. * //from w w w. ja v a2s . c o m * @param job The map-reduce job * @param inputClass * @param srcTargetFileNames * @param tableName ?? */ public static void setInput(Job job, Class<? extends DBWritable> inputClass, String tableName, String srcTargetFieldNames) { job.setInputFormatClass(HbaseInputFormat.class); HbaseConfiguration dbConf = new HbaseConfiguration(job.getConfiguration(), HbaseConfiguration.FLAG_HBASE_INPUT); dbConf.setInputClass(inputClass); dbConf.setInputTableName(tableName); dbConf.setInputHBaseColumnRelation(srcTargetFieldNames); }
From source file:com.ery.hadoop.mrddx.hbase.HbaseInputFormat.java
License:Apache License
@Override public void handle(Job conf) throws Exception { // HBase??//from www . java 2 s .c om HbaseConfiguration hconf = new HbaseConfiguration(conf.getConfiguration(), HbaseConfiguration.FLAG_HBASE_INPUT); String tableName = hconf.getInputTableName(); if (null == tableName || tableName.trim().length() <= 0) { String meg = "[MR ERROR]HBase??<" + HbaseConfiguration.INPUT_TABLE + ">?."; MRLog.error(LOG, meg); throw new Exception(meg); } // ? String inputFieldName[] = hconf.getInputFieldNames(); this.vParamSrcTargetFieldNames(hconf, inputFieldName); if (hconf.getInputIsCombiner()) { conf.setCombinerClass(DBGroupReducer.class); } // ?TIMERANGE String timerange[] = hconf.getInputHBaseQueryTimerange(); this.vParamQueryTimeRange(timerange); // ?startrow String startrow = hconf.getInputHBaseQueryStartRow(); if (null == startrow || startrow.trim().length() <= 0) { MRLog.warn(LOG, "[MR WARN]?startrow<" + HbaseConfiguration.INPUT_QUERY_STARTROW + ">."); } // ?stoprow String stoprow = hconf.getInputHBaseQueryStopRow(); if (null == stoprow || stoprow.trim().length() <= 0) { MRLog.warn(LOG, "[MR WARN]?stoprow<" + HbaseConfiguration.INPUT_QUERY_STOPROW + ">."); } // ?timestamp long timestamp = hconf.getInputHBaseQueryTimestamp(); if (timestamp <= -1) { MRLog.warn(LOG, "[MR WARN]?<" + HbaseConfiguration.INPUT_QUERY_TIMESTAMP + ">."); } // ?filters String filters = hconf.getInputHBaseQueryFilters(); if (null == filters || filters.length() <= 0) { MRLog.warn(LOG, "[MR WARN]??<" + HbaseConfiguration.INPUT_QUERY_FILTER + ">."); } // ?familyColumns String familyColumns[] = hconf.getInputHBaseQueryFamilyColumns(); if (null == familyColumns || familyColumns.length <= 0) { MRLog.warn(LOG, "[MR WARN]?<" + HbaseConfiguration.INPUT_QUERY_FAMILYCOLUMNS + ">."); } if (null != familyColumns) { for (String tmp : familyColumns) { if (tmp.split(":").length != 2) { String meg = "[MR ERROR]?<" + HbaseConfiguration.INPUT_QUERY_FAMILYCOLUMNS + ">."; MRLog.error(LOG, meg); throw new Exception(meg); } } } // ?familys String familys[] = hconf.getInputHBaseQueryFamilys(); if (null == familys || familys.length <= 0) { MRLog.warn(LOG, "[MR WARN]??<" + HbaseConfiguration.INPUT_QUERY_FAMILYS + ">."); } conf.setInputFormatClass(HbaseInputFormat.class); hconf.setInputClass(DBRecord.class); // ?MapTask? int taskNumber = HbaseInputFormat.getTableHRegionInfoCount(conf.getConfiguration(), startrow, stoprow); int reduceTasks = taskNumber; if (hconf.getInputMapEnd()) { reduceTasks = 0; } // hconf.setNumMapTasks(taskNumber); hconf.setNumReduceTasks(reduceTasks); hconf.setInputClass(DBRecord.class); conf.setMapperClass(DBMapper.class); conf.setMapOutputKeyClass(DBRecord.class); conf.setMapOutputValueClass(DBRecord.class); if (hconf.getInputIsCombiner()) { conf.setCombinerClass(DBGroupReducer.class); } }
From source file:com.ery.hadoop.mrddx.hbase.HbaseOutputFormat.java
License:Apache License
/** * format?// w w w .ja va2 s . c om * * @param job * ? * @param tableName * ?? * @return HbaseConfiguration HBase? * @throws IOException * IO */ public static HbaseConfiguration setOutput(Job job, String tableName) { job.setOutputFormatClass(HbaseOutputFormat.class); job.setReduceSpeculativeExecution(false); HbaseConfiguration dbConf = new HbaseConfiguration(job.getConfiguration(), HbaseConfiguration.FLAG_HBASE_OUTPUT); dbConf.setOutputHBaseTableName(tableName); return dbConf; }