Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the key class for the map output data.

Usage

From source file:com.yourcompany.hadoop.mapreduce.lexical.LexicalAnalyzerDriver.java

License:Apache License

public int run(String[] args) throws Exception {
    Job job = new Job();
    parseArguements(args, job);//  www. j  a v  a 2  s  . c  o  m

    job.setJarByClass(LexicalAnalyzerDriver.class);

    // Mapper & Reducer Class
    job.setMapperClass(LexicalAnalyzerMapper.class);

    // Mapper Output Key & Value Type after Hadoop 0.20
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setNumReduceTasks(0);

    // Run a Hadoop Job
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.yunrang.hadoop.app.utils.CustomizedUtil.java

License:Apache License

@SuppressWarnings("rawtypes")
public static void initSnapshotMapperJob(String snapshotName, Path tableRootDir, Scan scan,
        Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, Job job,
        boolean addDependencyJars) throws IOException {
    SnapshotInputFormat.setInput(job.getConfiguration(), snapshotName, tableRootDir);
    Configuration conf = job.getConfiguration();
    job.setInputFormatClass(SnapshotInputFormat.class);
    if (outputValueClass != null) {
        job.setMapOutputValueClass(outputValueClass);
    }/*from  ww w  . j a  v  a2 s .c  om*/
    if (outputKeyClass != null) {
        job.setMapOutputKeyClass(outputKeyClass);
    }
    job.setMapperClass(mapper);
    conf.set(TableInputFormat.SCAN, convertScanToString(scan));
    if (addDependencyJars) {
        TableMapReduceUtil.addDependencyJars(job);
    }
}

From source file:com.zinnia.nectar.util.hadoop.JobCreatorUtil.java

License:Apache License

public Job createJob(Class<? extends Mapper> mapperClass,
        Class<? extends WritableComparable> mapperOutputKeyClass,
        Class<? extends WritableComparable> mapperOutputValueClass,
        Class<? extends InputFormat> inputFormatClass, String[] inputFilePaths, String outputFilePath)
        throws IOException {
    Job job = new Job();
    job.setMapperClass(mapperClass);//  w w  w  .  j  a  va2 s  .  c om
    job.setMapOutputKeyClass(mapperOutputKeyClass);
    job.setMapOutputValueClass(mapperOutputValueClass);
    for (String inputFilePath : inputFilePaths) {
        FileInputFormat.addInputPath(job, new Path(inputFilePath));
    }
    FileOutputFormat.setOutputPath(job, new Path(outputFilePath));
    job.setInputFormatClass(inputFormatClass);

    return job;
}

From source file:com.zjy.mongo.util.MongoTool.java

License:Apache License

private int runMapReduceJob(final Configuration conf) throws IOException {
    final Job job = Job.getInstance(conf, getJobName());
    /**/*from ww w  .  jav  a  2s .  c  o m*/
     * Any arguments specified with -D <property>=<value>
     * on the CLI will be picked up and set here
     * They override any XML level values
     * Note that -D<space> is important - no space will
     * not work as it gets picked up by Java itself
     */
    // TODO - Do we need to set job name somehow more specifically?
    // This may or may not be correct/sane
    job.setJarByClass(getClass());
    final Class<? extends Mapper> mapper = MongoConfigUtil.getMapper(conf);

    if (LOG.isDebugEnabled()) {
        LOG.debug("Mapper Class: " + mapper);
        LOG.debug("Input URI: " + conf.get(MongoConfigUtil.INPUT_URI));
    }
    job.setMapperClass(mapper);
    Class<? extends Reducer> combiner = MongoConfigUtil.getCombiner(conf);
    if (combiner != null) {
        job.setCombinerClass(combiner);
    }
    job.setReducerClass(MongoConfigUtil.getReducer(conf));

    job.setOutputFormatClass(MongoConfigUtil.getOutputFormat(conf));
    job.setOutputKeyClass(MongoConfigUtil.getOutputKey(conf));
    job.setOutputValueClass(MongoConfigUtil.getOutputValue(conf));
    job.setInputFormatClass(MongoConfigUtil.getInputFormat(conf));
    Class mapOutputKeyClass = MongoConfigUtil.getMapperOutputKey(conf);
    Class mapOutputValueClass = MongoConfigUtil.getMapperOutputValue(conf);

    if (mapOutputKeyClass != null) {
        job.setMapOutputKeyClass(mapOutputKeyClass);
    }
    if (mapOutputValueClass != null) {
        job.setMapOutputValueClass(mapOutputValueClass);
    }

    /**
     * Determines if the job will run verbosely e.g. print debug output
     * Only works with foreground jobs
     */
    final boolean verbose = MongoConfigUtil.isJobVerbose(conf);
    /**
     * Run job in foreground aka wait for completion or background?
     */
    final boolean background = MongoConfigUtil.isJobBackground(conf);
    try {
        if (background) {
            LOG.info("Setting up and running MapReduce job in background.");
            job.submit();
            return 0;
        } else {
            LOG.info("Setting up and running MapReduce job in foreground, will wait for results.  {Verbose? "
                    + verbose + "}");
            return job.waitForCompletion(true) ? 0 : 1;
        }
    } catch (final Exception e) {
        LOG.error("Exception while executing job... ", e);
        return 1;
    }
}

From source file:com.zqh.hadoop.mr.Financial.HighLowDayDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    String input, output;/*from  w  w w .j  a v a2  s .  c  o m*/
    if (args.length == 2) {
        input = args[0];
        output = args[1];
    } else {
        System.err.println("Incorrect number of arguments.  Expected: input output");
        return -1;
    }

    Job job = new Job(getConf());
    job.setJarByClass(HighLowDayDriver.class);
    job.setJobName("High Low per Day");

    FileInputFormat.setInputPaths(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setMapperClass(HighLowDayMapper.class);
    job.setReducerClass(HighLowDayReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:com.zqh.hadoop.mr.Financial.HighLowStockDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    String input, output;//from   w w  w .ja va  2 s.  c o m
    if (args.length == 2) {
        input = args[0];
        output = args[1];
    } else {
        System.err.println("Incorrect number of arguments.  Expected: input output");
        return -1;
    }

    Job job = new Job(getConf());
    job.setJarByClass(HighLowStockDriver.class);
    job.setJobName("High Low per Stock");

    FileInputFormat.setInputPaths(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setMapperClass(HighLowStockMapper.class);
    job.setReducerClass(HighLowStockReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:com.zqh.hadoop.mr.Financial.HighLowWritableDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    String input, output;/*  w w w  .  ja  v  a  2 s.  c o  m*/
    if (args.length == 2) {
        input = args[0];
        output = args[1];
    } else {
        System.err.println("Incorrect number of arguments.  Expected: input output");
        return -1;
    }

    Job job = new Job(getConf());
    job.setJarByClass(HighLowWritableDriver.class);
    job.setJobName("High Low per Stock with day");

    FileInputFormat.setInputPaths(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setMapperClass(HighLowWritableMapper.class);
    job.setReducerClass(HighLowWritableReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(StockWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:corr.job.CIAJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 1) {
        System.err.println("usage: CIAJob <input path>");
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//w ww. ja  va2  s  .  c o m

    String inPath = args[0];
    Formatter formatter = new Formatter();
    String outPath = "." + args[0] + "_" + formatter.format("%1$tm%1$td%1$tH%1$tM%1$tS", new Date());

    Configuration config = getConf();
    Job job = new Job(config, "Algorithm A Concordance Index");
    job.setJarByClass(CIAJob.class);

    FileInputFormat.addInputPath(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setMapperClass(CIAMapper.class);
    job.setReducerClass(CIAReducer.class);

    job.setMapOutputKeyClass(VariableIndexPairsWritable.class);
    job.setMapOutputValueClass(VariableValuePairsWritable.class);

    job.setOutputKeyClass(VariableIndexPairsWritable.class);
    job.setOutputValueClass(CIComputationWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:cosmos.mapred.MediawikiIngestJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (1 != args.length) {
        System.err.println("Usage: input.xml,input.xml,input.xml...");
        return 1;
    }/*www.  j a  va  2 s .co  m*/

    String inputFiles = args[0];

    Configuration conf = getConf();
    System.out.println("path " + conf.get("fs.default.name"));
    conf.addResource(new Path("/opt/hadoop/conf/hdfs-site.xml"));
    conf.addResource(new Path("/opt/hadoop/conf/core-site.xml"));

    conf.addResource(new Path("/opt/hadoop/conf/mapred-site.xml"));

    System.out.println("path " + conf.get("fs.default.name"));
    //System.exit(1);
    Job job = new Job(conf, "Mediawiki Ingest");

    job.setJarByClass(MediawikiIngestJob.class);

    String tablename = "sortswiki";
    String zookeepers = "localhost:2181";
    String instanceName = "accumulo";
    String user = "root";
    PasswordToken passwd = new PasswordToken("secret");

    FileInputFormat.setInputPaths(job, inputFiles);

    job.setMapperClass(MediawikiMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);

    BatchWriterConfig bwConfig = new BatchWriterConfig();

    job.setInputFormatClass(MediawikiInputFormat.class);
    AccumuloOutputFormat.setZooKeeperInstance(job, instanceName, zookeepers);
    AccumuloOutputFormat.setConnectorInfo(job, user, passwd);
    AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);
    AccumuloOutputFormat.setCreateTables(job, true);
    AccumuloOutputFormat.setDefaultTableName(job, tablename);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:cp_b.CP_B.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(CP_B.class);

    job.setMapperClass(TokenizerMapper.class);
    job.setReducerClass(IntSumReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}