Example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass.

Prototype

public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException

Source Link

Document

Set the OutputFormat for the job.

Usage

From source file:com.gsinnovations.howdah.AbstractJob.java

License:Apache License

protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat) throws IOException {

    Job job = new Job(new Configuration(getConf()));
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }/*from   w ww .j a  v a2 s .  com*/
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);

    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setJobName(getCustomJobName(job, mapper, reducer));

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.gsinnovations.howdah.Driver.java

License:Apache License

public static void job(Path input, Path output, int numReduceTasks)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();

    Job job = new Job(conf);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(TikaMapper.class);
    //job.setCombinerClass(KMeansCombiner.class);
    //job.setReducerClass(KMeansReducer.class);
    job.setNumReduceTasks(numReduceTasks);

    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setJarByClass(Driver.class);
    HadoopUtil.overwriteOutput(output);/*from  ww w  . ja  v a 2 s  .c  o  m*/
    job.waitForCompletion(true);

}

From source file:com.gsvic.csmr.CSMRBase.java

License:Apache License

public static void generatePairs(String in, String out)
        throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    path = out;/*from w  w  w . j a va 2 s .  co  m*/
    Job job;
    Path input, output;
    input = new Path(in);
    output = new Path(path + "/CSMRPairs");

    job = new Job(conf);
    job.setJobName("CSMR Pairs Job");
    job.setJarByClass(CSMRBase.class);

    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(CSMRMapper.class);
    job.setReducerClass(CSMRReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DocumentWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorArrayWritable.class);

    job.waitForCompletion(true);
}

From source file:com.hhscyber.nl.tweets.svm.test.Tester.java

/**
 * @param args the command line arguments
 * @throws java.io.IOException/*from w  w  w . j  a  va 2 s .  c  om*/
 */
public static void main(String[] args) throws IOException, Exception {
    Conf conf = new Conf(args, "");
    Job job = new HBJob(conf, "TweetsSVMTester");

    job.setJarByClass(Tester.class);
    Scan scan = new Scan();

    TableMapReduceUtil.initTableMapperJob("hhscyber:tweets_lang", scan, TestMapper.class,
            ImmutableBytesWritable.class, Put.class, job);

    job.setOutputFormatClass(MultiTableOutputFormat.class);
    job.setReducerClass(TestReducer.class);
    job.setNumReduceTasks(2);
    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJars(job.getConfiguration());

    job.waitForCompletion(true);
}

From source file:com.hhscyber.nl.tweets.svm.train.Train.java

/**
 * @param args the command line arguments
 * @throws java.io.IOException//w w  w .  j  a v a 2 s . co  m
 */
public static void main(String[] args) throws IOException {

    Conf conf = new Conf(args, "");
    FileSystem hdfs = FileSystem.get(conf);

    hdfs.delete(new Path("trainer"), true);

    Job client = new HBJob(conf, "SVMTrainer");

    client.setJarByClass(Train.class);
    client.setMapOutputKeyClass(Text.class);
    client.setMapOutputValueClass(Text.class);

    client.setInputFormatClass(TextInputFormat.class);

    TextInputFormat.addInputPath(client, new Path("svmclass"));
    client.setNumReduceTasks(1);

    client.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(client, new Path("trainer"));

    client.setMapperClass(TrainMapper.class);
    client.setReducerClass(TrainReducer.class);

    try {
        client.waitForCompletion(true);
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
    }
}

From source file:com.hn.cluster.hadoop.mrs.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    // ?hadoop?/*from ww w. jav a2 s .  co  m*/
    Configuration conf = new Configuration();
    // ?
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    // Mapper
    job.setMapperClass(MapClass.class);
    // Reducer
    job.setReducerClass(Reduce.class);

    // 
    job.setPartitionerClass(FirstPartitioner.class);
    // 
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // map Key
    job.setMapOutputKeyClass(IntPair.class);
    // mapValue
    job.setMapOutputValueClass(IntWritable.class);

    // rduceKeyTextOutputFormatClassTextOutputFormat
    job.setOutputKeyClass(Text.class);
    // rduceValue
    job.setOutputValueClass(IntWritable.class);

    /**
     * ?????splites???RecordReder
     * ??RecordReder?keyvalue
     * Map<LongWritable, Text>
     * Mapmap<LongWritable, Text>Mapmap
     * ?List<IntPair, IntWritable>
     * map?job.setPartitionerClassList?reducer
     */
    job.setInputFormatClass(TextInputFormat.class);
    // ??RecordWriter?
    job.setOutputFormatClass(TextOutputFormat.class);

    // hdfs
    FileInputFormat.addInputPath(job, new Path("hdfs://192.1168.1.12:9000/input/input/soso.txt"));
    // hdfs
    FileOutputFormat.setOutputPath(job, new Path("hdfs://192.1168.1.12:9000/output/sort/"));
    // ??job
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hortonworks.mapreduce.URLCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", " ");
    Job job = Job.getInstance(conf, "URLCount");
    job.setJarByClass(getClass());//from  ww  w  . ja v a2  s  .  c  om
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapperClass(URLCountM.class);
    job.setReducerClass(URLCountR.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return (job.waitForCompletion(true) == true ? 0 : -1);
}

From source file:com.hortonworks.pso.data.generator.mapreduce.DataGenTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Job job = Job.getInstance(getConf()); // new Job(conf, this.getClass().getCanonicalName());

    //        Configuration conf = getConf();

    int mappers = 2;
    String output = null;//from ww  w . ja  va  2  s  .  co m
    String config = null;
    long count = 100;

    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-mappers".equals(args[i])) {
                mappers = Integer.parseInt(args[++i]);
                otherArgs.add("-Dmapreduce.job.maps=" + Integer.toString(mappers));
            } else if ("-output".equals(args[i])) {
                output = args[++i];
            } else if ("-json.cfg".equals(args[i])) {
                config = args[++i];
            } else if ("-count".equals(args[i])) {
                count = Long.parseLong(args[++i]);
            } else {
                otherArgs.add(args[i]);
            }

        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    job.getConfiguration().set("json.cfg", config);

    String[] altArgs = new String[otherArgs.size()];
    otherArgs.toArray(altArgs);

    GenericOptionsParser gop = new GenericOptionsParser(job.getConfiguration(), altArgs);

    DataGenInputFormat.setNumberOfRows(job, count);

    job.setJarByClass(DataGenTool.class);

    Path output_path = new Path(output);

    if (output_path.getFileSystem(getConf()).exists(output_path)) {
        throw new IOException("Output directory " + output_path + " already exists.");
    }

    FileOutputFormat.setOutputPath(job, output_path);

    job.setMapperClass(DataGenMapper.class);
    // Map Only Job
    job.setNumReduceTasks(0);
    //        job.setReducerClass(RerateReducer.class);

    job.setInputFormatClass(DataGenInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);
    //        job.setOutputKeyClass(Text.class);
    //        job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:com.ifeng.vdn.videolog.sort.SortGroupResultPreprocessor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from  w  w  w .jav a 2 s. co m

    Job job = Job.getInstance(getConf());
    job.setMapperClass(SortGroupResultMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // Sort data by total number:
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.iflytek.spider.crawl.CrawlDb.java

License:Apache License

public static Job createJob(Configuration config, Path crawlDb) throws IOException {
    Path newCrawlDb = new Path(crawlDb, Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Job job = AvroJob.getAvroJob(config);
    job.setJobName("crawldb " + crawlDb);

    Path current = new Path(crawlDb, CURRENT_NAME);
    if (FileSystem.get(config).exists(current)) {
        FileInputFormat.addInputPath(job, current);
    }//from  ww  w  .  j a v a 2  s  . c o m
    job.setInputFormatClass(AvroPairInputFormat.class);

    job.setMapperClass(CrawlDbFilter.class);
    job.setReducerClass(CrawlDbReducer.class);

    FileOutputFormat.setOutputPath(job, newCrawlDb);
    job.setOutputFormatClass(AvroMapOutputFormat.class);
    job.setOutputKeyClass(String.class);
    job.setOutputValueClass(CrawlDatum.class);

    return job;
}