Example usage for org.apache.hadoop.mapreduce Job getConfiguration

List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration() 

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.dipwater.accountAnalyze.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("mapred.job.tracker", "192.168.1.51:9001");
    conf.set("fs.default.name", "hdfs://192.168.1.51:9000");

    String[] ars = new String[] { "input", "newout" };
    String[] otherArgs = new GenericOptionsParser(conf, ars).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from   w w  w .  j  ava  2  s  .  c  o m
    }
    Job job = new Job(conf, "word count");

    File jarFile = EJob.createTempJar("bin");
    EJob.addClasspath("/home/hadoop/hadoop-1.2.1/conf");
    ClassLoader classLoader = EJob.getClassLoader();
    Thread.currentThread().setContextClassLoader(classLoader);
    ((JobConf) job.getConfiguration()).setJar(jarFile.toString());

    //job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.dse.pig.udfs.CqlStorage.java

License:Apache License

/** set read configuration settings */
public void setLocation(String location, Job job) throws IOException {
    conf = job.getConfiguration();
    setLocationFromUri(location);//ww w. j av a 2s  .c o  m

    if (username != null && password != null)
        ConfigHelper.setInputKeyspaceUserNameAndPassword(conf, username, password);
    if (splitSize > 0)
        ConfigHelper.setInputSplitSize(conf, splitSize);
    if (partitionerClass != null)
        ConfigHelper.setInputPartitioner(conf, partitionerClass);
    if (rpcPort != null)
        ConfigHelper.setInputRpcPort(conf, rpcPort);
    if (initHostAddress != null)
        ConfigHelper.setInputInitialAddress(conf, initHostAddress);

    ConfigHelper.setInputColumnFamily(conf, keyspace, column_family);
    setConnectionInformation();

    CqlConfigHelper.setInputCQLPageRowSize(conf, String.valueOf(pageSize));
    if (columns != null && !columns.trim().isEmpty())
        CqlConfigHelper.setInputColumns(conf, columns);

    String whereClauseForPartitionFilter = getWhereClauseForPartitionFilter();
    String wc = whereClause != null && !whereClause.trim().isEmpty()
            ? whereClauseForPartitionFilter == null ? whereClause
                    : String.format("%s AND %s", whereClause.trim(), whereClauseForPartitionFilter)
            : whereClauseForPartitionFilter;

    if (wc != null) {
        logger.debug("where clause: {}", wc);
        CqlConfigHelper.setInputWhereClauses(conf, wc);
    }

    if (System.getenv(PIG_INPUT_SPLIT_SIZE) != null) {
        try {
            ConfigHelper.setInputSplitSize(conf, Integer.valueOf(System.getenv(PIG_INPUT_SPLIT_SIZE)));
        } catch (NumberFormatException e) {
            throw new IOException("PIG_INPUT_SPLIT_SIZE is not a number", e);
        }
    }

    if (ConfigHelper.getInputRpcPort(conf) == 0)
        throw new IOException("PIG_INPUT_RPC_PORT or PIG_RPC_PORT environment variable not set");
    if (ConfigHelper.getInputInitialAddress(conf) == null)
        throw new IOException("PIG_INPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set");
    if (ConfigHelper.getInputPartitioner(conf) == null)
        throw new IOException("PIG_INPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");
    if (loadSignature == null)
        loadSignature = location;

    initSchema(loadSignature);
}

From source file:com.dse.pig.udfs.CqlStorage.java

License:Apache License

/** set store configuration settings */
public void setStoreLocation(String location, Job job) throws IOException {
    conf = job.getConfiguration();
    setLocationFromUri(location);/*from w  ww .j  av a  2s. co  m*/

    if (username != null && password != null)
        ConfigHelper.setOutputKeyspaceUserNameAndPassword(conf, username, password);
    if (splitSize > 0)
        ConfigHelper.setInputSplitSize(conf, splitSize);
    if (partitionerClass != null)
        ConfigHelper.setOutputPartitioner(conf, partitionerClass);
    if (rpcPort != null) {
        ConfigHelper.setOutputRpcPort(conf, rpcPort);
        ConfigHelper.setInputRpcPort(conf, rpcPort);
    }
    if (initHostAddress != null) {
        ConfigHelper.setOutputInitialAddress(conf, initHostAddress);
        ConfigHelper.setInputInitialAddress(conf, initHostAddress);
    }

    ConfigHelper.setOutputColumnFamily(conf, keyspace, column_family);
    CqlConfigHelper.setOutputCql(conf, outputQuery);

    setConnectionInformation();

    if (ConfigHelper.getOutputRpcPort(conf) == 0)
        throw new IOException("PIG_OUTPUT_RPC_PORT or PIG_RPC_PORT environment variable not set");
    if (ConfigHelper.getOutputInitialAddress(conf) == null)
        throw new IOException("PIG_OUTPUT_INITIAL_ADDRESS or PIG_INITIAL_ADDRESS environment variable not set");
    if (ConfigHelper.getOutputPartitioner(conf) == null)
        throw new IOException("PIG_OUTPUT_PARTITIONER or PIG_PARTITIONER environment variable not set");

    initSchema(storeSignature);
}

From source file:com.elex.dmp.lda.CVB0Driver.java

License:Apache License

private static void setModelPaths(Job job, Path modelPath) throws IOException {
    Configuration conf = job.getConfiguration();
    if (modelPath == null || !FileSystem.get(modelPath.toUri(), conf).exists(modelPath)) {
        return;/* w  w w.j ava2  s .  co m*/
    }
    FileStatus[] statuses = FileSystem.get(modelPath.toUri(), conf).listStatus(modelPath,
            PathFilters.partFilter());
    Preconditions.checkState(statuses.length > 0, "No part files found in model path '%s'",
            modelPath.toString());
    String[] modelPaths = new String[statuses.length];
    for (int i = 0; i < statuses.length; i++) {
        modelPaths[i] = statuses[i].getPath().toUri().toString();
    }
    conf.setStrings(MODEL_PATHS, modelPaths);
}

From source file:com.ema.hadoop.bestclient.BestClient.java

public static void main(String[] args) throws Exception {

    if (args.length != 4) {
        System.err.println("Usage: BestClient <input path> <output path> <date start> <date end>");
        System.exit(-1);/*w w w  .  jav  a2 s.c  om*/
    }

    Job job = Job.getInstance();
    job.setJarByClass(BestClient.class);
    job.setJobName("Best client job");

    JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.setStrings("dates", args[2], args[3]);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(BCMapper.class);
    job.setReducerClass(BCReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.endgame.binarypig.loaders.AbstractExecutingLoaderTest.java

License:Apache License

public void testSetLocation() throws IOException {
    Job job = new Job();
    underTest.setLocation("/tmp/some/path", job);
    assertEquals("file:/tmp/some/path", job.getConfiguration().get("mapred.input.dir"));
}

From source file:com.ery.hadoop.mrddx.file.RCFileOutputFormat.java

License:Apache License

@Override
public void handle(Job conf) throws Exception {
    /**/*from ww  w  .jav a  2s. c  om*/
     * ?
     */
    FileConfiguration dbconf = new FileConfiguration(conf.getConfiguration(),
            FileConfiguration.FLAG_FILE_OUTPUT);
    // ?
    String outRowChars = dbconf.getOutputFileRowsSplitChars();
    if (null == outRowChars || outRowChars.length() <= 0) {
        String meg = "<" + FileConfiguration.OUTPUT_FILE_ROWS_SPLIT_CHARS + ">";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    // 
    String outFileSplitChars = dbconf.getOutputFileFieldSplitChars();
    if (null == outFileSplitChars || outFileSplitChars.trim().length() <= 0) {
        String meg = "<" + FileConfiguration.OUTPUT_FILE_FIELD_SPLIT_CHARS + ">";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    boolean para = dbconf.getOutputFileCompress();
    // ? (?HDFSUtils.CompressCodec)
    String outCompressCodec = dbconf.getOutputFileCompressCodec();
    if (para && !HDFSUtils.isExistCompressCodec(outCompressCodec)) {
        String meg = "[MR ERROR]?<" + FileConfiguration.OUTPUT_FILE_COMPRESSCODEC
                + ">?.";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    // ?BZip2Codec
    if (HDFSUtils.isBZip2CompressCodec(outCompressCodec)) {
        String meg = "[MR ERROR]?<" + HiveConfiguration.OUTPUT_HIVE_COMPRESS_CODEC
                + ">??BZip2Codec.";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    // ?MR
    String outTargetpath = dbconf.getOutputTargetFilePath();
    dbconf.setOutputTargetPath(outTargetpath);
    if (null == outTargetpath || outTargetpath.trim().length() <= 0) {
        MRLog.warn(LOG,
                "MR<" + FileConfiguration.OUTPUT_FILE_TARGET_PATH + ">");
    }

    setColumnNumber(conf.getConfiguration(), dbconf.getOutputFieldNames().length);
    conf.setOutputFormatClass(RCFileOutputFormat.class);
    conf.setReducerClass(DBReducer.class);
}

From source file:com.ery.hadoop.mrddx.hbase.HbaseInputFormat.java

License:Apache License

/**
 * Initializes the map-part of the job with the appropriate input settings.
 * //from  w  w  w.  ja v a2s .  c o m
 * @param job The map-reduce job
 * @param inputClass 
 * @param srcTargetFileNames 
 * @param tableName ??
 */
public static void setInput(Job job, Class<? extends DBWritable> inputClass, String tableName,
        String srcTargetFieldNames) {
    job.setInputFormatClass(HbaseInputFormat.class);
    HbaseConfiguration dbConf = new HbaseConfiguration(job.getConfiguration(),
            HbaseConfiguration.FLAG_HBASE_INPUT);
    dbConf.setInputClass(inputClass);
    dbConf.setInputTableName(tableName);
    dbConf.setInputHBaseColumnRelation(srcTargetFieldNames);
}

From source file:com.ery.hadoop.mrddx.hbase.HbaseInputFormat.java

License:Apache License

@Override
public void handle(Job conf) throws Exception {
    // HBase??//from  www .  java 2 s .c om
    HbaseConfiguration hconf = new HbaseConfiguration(conf.getConfiguration(),
            HbaseConfiguration.FLAG_HBASE_INPUT);
    String tableName = hconf.getInputTableName();
    if (null == tableName || tableName.trim().length() <= 0) {
        String meg = "[MR ERROR]HBase??<" + HbaseConfiguration.INPUT_TABLE + ">?.";
        MRLog.error(LOG, meg);
        throw new Exception(meg);
    }

    // ?
    String inputFieldName[] = hconf.getInputFieldNames();
    this.vParamSrcTargetFieldNames(hconf, inputFieldName);

    if (hconf.getInputIsCombiner()) {
        conf.setCombinerClass(DBGroupReducer.class);
    }

    // ?TIMERANGE
    String timerange[] = hconf.getInputHBaseQueryTimerange();
    this.vParamQueryTimeRange(timerange);

    // ?startrow
    String startrow = hconf.getInputHBaseQueryStartRow();
    if (null == startrow || startrow.trim().length() <= 0) {
        MRLog.warn(LOG,
                "[MR WARN]?startrow<" + HbaseConfiguration.INPUT_QUERY_STARTROW + ">.");
    }

    // ?stoprow
    String stoprow = hconf.getInputHBaseQueryStopRow();
    if (null == stoprow || stoprow.trim().length() <= 0) {
        MRLog.warn(LOG,
                "[MR WARN]?stoprow<" + HbaseConfiguration.INPUT_QUERY_STOPROW + ">.");
    }

    // ?timestamp
    long timestamp = hconf.getInputHBaseQueryTimestamp();
    if (timestamp <= -1) {
        MRLog.warn(LOG, "[MR WARN]?<" + HbaseConfiguration.INPUT_QUERY_TIMESTAMP
                + ">.");
    }

    // ?filters
    String filters = hconf.getInputHBaseQueryFilters();
    if (null == filters || filters.length() <= 0) {
        MRLog.warn(LOG, "[MR WARN]??<" + HbaseConfiguration.INPUT_QUERY_FILTER
                + ">.");
    }

    // ?familyColumns
    String familyColumns[] = hconf.getInputHBaseQueryFamilyColumns();
    if (null == familyColumns || familyColumns.length <= 0) {
        MRLog.warn(LOG,
                "[MR WARN]?<" + HbaseConfiguration.INPUT_QUERY_FAMILYCOLUMNS + ">.");
    }

    if (null != familyColumns) {
        for (String tmp : familyColumns) {
            if (tmp.split(":").length != 2) {
                String meg = "[MR ERROR]?<" + HbaseConfiguration.INPUT_QUERY_FAMILYCOLUMNS
                        + ">.";
                MRLog.error(LOG, meg);
                throw new Exception(meg);
            }
        }
    }

    // ?familys
    String familys[] = hconf.getInputHBaseQueryFamilys();
    if (null == familys || familys.length <= 0) {
        MRLog.warn(LOG,
                "[MR WARN]??<" + HbaseConfiguration.INPUT_QUERY_FAMILYS + ">.");
    }

    conf.setInputFormatClass(HbaseInputFormat.class);
    hconf.setInputClass(DBRecord.class);

    // ?MapTask?
    int taskNumber = HbaseInputFormat.getTableHRegionInfoCount(conf.getConfiguration(), startrow, stoprow);
    int reduceTasks = taskNumber;
    if (hconf.getInputMapEnd()) {
        reduceTasks = 0;
    }

    // 
    hconf.setNumMapTasks(taskNumber);
    hconf.setNumReduceTasks(reduceTasks);
    hconf.setInputClass(DBRecord.class);
    conf.setMapperClass(DBMapper.class);
    conf.setMapOutputKeyClass(DBRecord.class);
    conf.setMapOutputValueClass(DBRecord.class);
    if (hconf.getInputIsCombiner()) {
        conf.setCombinerClass(DBGroupReducer.class);
    }
}

From source file:com.ery.hadoop.mrddx.hbase.HbaseOutputFormat.java

License:Apache License

/**
 * format?// w w w  .ja  va2  s  .  c  om
 * 
 * @param job
 *            ?
 * @param tableName
 *            ??
 * @return HbaseConfiguration HBase?
 * @throws IOException
 *             IO
 */
public static HbaseConfiguration setOutput(Job job, String tableName) {
    job.setOutputFormatClass(HbaseOutputFormat.class);
    job.setReduceSpeculativeExecution(false);
    HbaseConfiguration dbConf = new HbaseConfiguration(job.getConfiguration(),
            HbaseConfiguration.FLAG_HBASE_OUTPUT);
    dbConf.setOutputHBaseTableName(tableName);
    return dbConf;
}