Example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the key class for the map output data.

Usage

From source file:com.yourcompany.hadoop.mapreduce.lexical.LexicalAnalyzerDriver.java

License:Apache License

public int run(String[] args) throws Exception {
    Job job = new Job();
    parseArguements(args, job);//  www. j  a v  a 2  s  . c  o  m

    job.setJarByClass(LexicalAnalyzerDriver.class);

    // Mapper & Reducer Class
    job.setMapperClass(LexicalAnalyzerMapper.class);

    // Mapper Output Key & Value Type after Hadoop 0.20
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setNumReduceTasks(0);

    // Run a Hadoop Job
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.yunrang.hadoop.app.utils.CustomizedUtil.java

License:Apache License

@SuppressWarnings("rawtypes")
public static void initSnapshotMapperJob(String snapshotName, Path tableRootDir, Scan scan,
        Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, Job job,
        boolean addDependencyJars) throws IOException {
    SnapshotInputFormat.setInput(job.getConfiguration(), snapshotName, tableRootDir);
    Configuration conf = job.getConfiguration();
    job.setInputFormatClass(SnapshotInputFormat.class);
    if (outputValueClass != null) {
        job.setMapOutputValueClass(outputValueClass);
    }/*from  ww w  . j a  v  a2 s .c  om*/
    if (outputKeyClass != null) {
        job.setMapOutputKeyClass(outputKeyClass);
    }
    job.setMapperClass(mapper);
    conf.set(TableInputFormat.SCAN, convertScanToString(scan));
    if (addDependencyJars) {
        TableMapReduceUtil.addDependencyJars(job);
    }
}

From source file:com.zinnia.nectar.util.hadoop.JobCreatorUtil.java

License:Apache License

public Job createJob(Class<? extends Mapper> mapperClass,
        Class<? extends WritableComparable> mapperOutputKeyClass,
        Class<? extends WritableComparable> mapperOutputValueClass,
        Class<? extends InputFormat> inputFormatClass, String[] inputFilePaths, String outputFilePath)
        throws IOException {
    Job job = new Job();
    job.setMapperClass(mapperClass);//  w w  w  .  j  a  va2 s  .  c om
    job.setMapOutputKeyClass(mapperOutputKeyClass);
    job.setMapOutputValueClass(mapperOutputValueClass);
    for (String inputFilePath : inputFilePaths) {
        FileInputFormat.addInputPath(job, new Path(inputFilePath));
    }
    FileOutputFormat.setOutputPath(job, new Path(outputFilePath));
    job.setInputFormatClass(inputFormatClass);

    return job;
}

From source file:com.zjy.mongo.util.MongoTool.java

License:Apache License

private int runMapReduceJob(final Configuration conf) throws IOException {
    final Job job = Job.getInstance(conf, getJobName());
    /**/*from ww w  .  jav  a  2s .  c  o m*/
     * Any arguments specified with -D <property>=<value>
     * on the CLI will be picked up and set here
     * They override any XML level values
     * Note that -D<space> is important - no space will
     * not work as it gets picked up by Java itself
     */
    // TODO - Do we need to set job name somehow more specifically?
    // This may or may not be correct/sane
    job.setJarByClass(getClass());
    final Class<? extends Mapper> mapper = MongoConfigUtil.getMapper(conf);

    if (LOG.isDebugEnabled()) {
        LOG.debug("Mapper Class: " + mapper);
        LOG.debug("Input URI: " + conf.get(MongoConfigUtil.INPUT_URI));
    }
    job.setMapperClass(mapper);
    Class<? extends Reducer> combiner = MongoConfigUtil.getCombiner(conf);
    if (combiner != null) {
        job.setCombinerClass(combiner);
    }
    job.setReducerClass(MongoConfigUtil.getReducer(conf));

    job.setOutputFormatClass(MongoConfigUtil.getOutputFormat(conf));
    job.setOutputKeyClass(MongoConfigUtil.getOutputKey(conf));
    job.setOutputValueClass(MongoConfigUtil.getOutputValue(conf));
    job.setInputFormatClass(MongoConfigUtil.getInputFormat(conf));
    Class mapOutputKeyClass = MongoConfigUtil.getMapperOutputKey(conf);
    Class mapOutputValueClass = MongoConfigUtil.getMapperOutputValue(conf);

    if (mapOutputKeyClass != null) {
        job.setMapOutputKeyClass(mapOutputKeyClass);
    }
    if (mapOutputValueClass != null) {
        job.setMapOutputValueClass(mapOutputValueClass);
    }

    /**
     * Determines if the job will run verbosely e.g. print debug output
     * Only works with foreground jobs
     */
    final boolean verbose = MongoConfigUtil.isJobVerbose(conf);
    /**
     * Run job in foreground aka wait for completion or background?
     */
    final boolean background = MongoConfigUtil.isJobBackground(conf);
    try {
        if (background) {
            LOG.info("Setting up and running MapReduce job in background.");
            job.submit();
            return 0;
        } else {
            LOG.info("Setting up and running MapReduce job in foreground, will wait for results.  {Verbose? "
                    + verbose + "}");
            return job.waitForCompletion(true) ? 0 : 1;
        }
    } catch (final Exception e) {
        LOG.error("Exception while executing job... ", e);
        return 1;
    }
}

From source file:com.zqh.hadoop.mr.Financial.HighLowDayDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    String input, output;/*from  w  w w .j  a v a2  s .  c  o m*/
    if (args.length == 2) {
        input = args[0];
        output = args[1];
    } else {
        System.err.println("Incorrect number of arguments.  Expected: input output");
        return -1;
    }

    Job job = new Job(getConf());
    job.setJarByClass(HighLowDayDriver.class);
    job.setJobName("High Low per Day");

    FileInputFormat.setInputPaths(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setMapperClass(HighLowDayMapper.class);
    job.setReducerClass(HighLowDayReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:com.zqh.hadoop.mr.Financial.HighLowStockDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    String input, output;//from   w w  w .ja va  2 s.  c o m
    if (args.length == 2) {
        input = args[0];
        output = args[1];
    } else {
        System.err.println("Incorrect number of arguments.  Expected: input output");
        return -1;
    }

    Job job = new Job(getConf());
    job.setJarByClass(HighLowStockDriver.class);
    job.setJobName("High Low per Stock");

    FileInputFormat.setInputPaths(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setMapperClass(HighLowStockMapper.class);
    job.setReducerClass(HighLowStockReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:com.zqh.hadoop.mr.Financial.HighLowWritableDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    String input, output;/*  w w w  .  ja  v  a  2 s.  c o  m*/
    if (args.length == 2) {
        input = args[0];
        output = args[1];
    } else {
        System.err.println("Incorrect number of arguments.  Expected: input output");
        return -1;
    }

    Job job = new Job(getConf());
    job.setJarByClass(HighLowWritableDriver.class);
    job.setJobName("High Low per Stock with day");

    FileInputFormat.setInputPaths(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setMapperClass(HighLowWritableMapper.class);
    job.setReducerClass(HighLowWritableReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(StockWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:corr.job.CIAJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 1) {
        System.err.println("usage: CIAJob <input path>");
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//w ww. ja  va2  s  .  c o m

    String inPath = args[0];
    Formatter formatter = new Formatter();
    String outPath = "." + args[0] + "_" + formatter.format("%1$tm%1$td%1$tH%1$tM%1$tS", new Date());

    Configuration config = getConf();
    Job job = new Job(config, "Algorithm A Concordance Index");
    job.setJarByClass(CIAJob.class);

    FileInputFormat.addInputPath(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setMapperClass(CIAMapper.class);
    job.setReducerClass(CIAReducer.class);

    job.setMapOutputKeyClass(VariableIndexPairsWritable.class);
    job.setMapOutputValueClass(VariableValuePairsWritable.class);

    job.setOutputKeyClass(VariableIndexPairsWritable.class);
    job.setOutputValueClass(CIComputationWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:cosmos.mapred.MediawikiIngestJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (1 != args.length) {
        System.err.println("Usage: input.xml,input.xml,input.xml...");
        return 1;
    }/*www.  j a  va  2 s .co  m*/

    String inputFiles = args[0];

    Configuration conf = getConf();
    System.out.println("path " + conf.get("fs.default.name"));
    conf.addResource(new Path("/opt/hadoop/conf/hdfs-site.xml"));
    conf.addResource(new Path("/opt/hadoop/conf/core-site.xml"));

    conf.addResource(new Path("/opt/hadoop/conf/mapred-site.xml"));

    System.out.println("path " + conf.get("fs.default.name"));
    //System.exit(1);
    Job job = new Job(conf, "Mediawiki Ingest");

    job.setJarByClass(MediawikiIngestJob.class);

    String tablename = "sortswiki";
    String zookeepers = "localhost:2181";
    String instanceName = "accumulo";
    String user = "root";
    PasswordToken passwd = new PasswordToken("secret");

    FileInputFormat.setInputPaths(job, inputFiles);

    job.setMapperClass(MediawikiMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);

    BatchWriterConfig bwConfig = new BatchWriterConfig();

    job.setInputFormatClass(MediawikiInputFormat.class);
    AccumuloOutputFormat.setZooKeeperInstance(job, instanceName, zookeepers);
    AccumuloOutputFormat.setConnectorInfo(job, user, passwd);
    AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);
    AccumuloOutputFormat.setCreateTables(job, true);
    AccumuloOutputFormat.setDefaultTableName(job, tablename);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:cp_b.CP_B.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(CP_B.class);

    job.setMapperClass(TokenizerMapper.class);
    job.setReducerClass(IntSumReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}