Example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass.

Prototype

public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException 

Source Link

Document

Set the OutputFormat for the job.

Usage

From source file:de.bankmark.bigbench.queries.q28.ToSequenceFile.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Job job = Job.getInstance(getConf());

    job.setJarByClass(ToSequenceFile.class);
    if (args.length != 2) {
        usage(job);/*from  w  w w.  jav a  2  s  . c om*/
        return 2;
    }
    System.out.println("input:");
    job.setJobName(ToSequenceFile.class.getSimpleName() + "::" + args[0] + "->" + args[1]);

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);
    System.out.println("Input: " + input + "  out -> " + output);
    FileInputFormat.addInputPath(job, input);
    SequenceFileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(IdentityMapper.class);
    job.setReducerClass(Reducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(0);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.gesundkrank.wikipedia.hadoop.util.RepackToMapFile.java

License:Open Source License

public int run(String basePath, String outputPath, boolean checkNew, boolean skipRedirect) throws Exception {
    Configuration configuration = getConf();
    configuration.setBoolean("skipRedirect", skipRedirect);

    LOGGER.info("Tool name: " + getClass().getSimpleName());

    Job job = Job.getInstance(configuration, getClass().getSimpleName());
    job.setJarByClass(getClass());//from   w ww  .j  a va 2  s. c  om

    job.setMapperClass(WikiMapper.class);
    job.setInputFormatClass(WikiInputFormat.class);
    job.setOutputFormatClass(MapFileOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(WikiRevisionWritable.class);

    WikiDumpLoader wikiDumpLoader = new WikiDumpLoader(checkNew);
    wikiDumpLoader.addWikiDump(job, basePath);

    MapFileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setNumReduceTasks(1);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.hpi.fgis.hdrs.mapreduce.examples.PredicateAnalysis.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    job.setJarByClass(PredicateAnalysis.class);
    job.setJobName("Predicate Analysis");

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    //job.setOutputKeyClass(Text.class);
    //job.setOutputValueClass(Text.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(LongWritable.class);

    job.setMapperClass(Map.class);
    //job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(0);// w  w w. j av  a2 s .  c o m

    job.setInputFormatClass(TripleInputFormat.class);
    //job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    TripleInputFormat.setStoreAddress(job, args[0]);
    TripleInputFormat.setIndex(job, "POS");
    TripleInputFormat.setPattern(job, Triple.newPattern(null, args[1], null));
    TripleInputFormat.setAggregationLevel2(job);

    SequenceFileOutputFormat.setOutputPath(job, new Path(args[2]));

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:de.hpi.fgis.hdrs.mapreduce.examples.PredicateCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    job.setJarByClass(PredicateCount.class);
    job.setJobName("PredicateCount");

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TripleInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    int argc = 0;

    TripleInputFormat.setStoreAddress(job, args[argc++]);
    TripleInputFormat.setIndex(job, args[argc++]);
    if ("-p".equals(args[argc])) {
        argc++;/*w  w  w  . j av  a 2 s .  c  om*/
        String s = args[argc++];
        String p = args[argc++];
        String o = args[argc++];
        if ("*".equals(s))
            s = null;
        if ("*".equals(p))
            p = null;
        if ("*".equals(o))
            o = null;
        TripleInputFormat.setPattern(job, Triple.newPattern(s, p, o));
    } else {
        FileOutputFormat.setOutputPath(job, new Path(args[argc]));
    }

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:de.hpi.fgis.hdrs.mapreduce.examples.TripleCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    job.setJarByClass(TripleCount.class);
    job.setJobName("TripleCount");

    job.setMapOutputKeyClass(ByteWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(1);//from   w  w w .  j a v  a  2s.  c  o m

    job.setInputFormatClass(TripleInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    int argc = 0;

    TripleInputFormat.setStoreAddress(job, args[argc++]);
    TripleInputFormat.setIndex(job, args[argc++]);
    if ("-p".equals(args[argc])) {
        argc++;
        String s = args[argc++];
        String p = args[argc++];
        String o = args[argc++];
        if ("*".equals(s))
            s = null;
        if ("*".equals(p))
            p = null;
        if ("*".equals(o))
            o = null;
        TripleInputFormat.setPattern(job, Triple.newPattern(s, p, o));
    } else {
        TextOutputFormat.setOutputPath(job, new Path(args[argc]));
    }

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:de.hpi.fgis.hdrs.mapreduce.examples.TripleSize.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    job.setJarByClass(TripleSize.class);
    job.setJobName("TripleSize");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Combine.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TripleInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    int argc = 0;

    TripleInputFormat.setStoreAddress(job, args[argc++]);
    TripleInputFormat.setIndex(job, args[argc++]);
    if ("-p".equals(args[argc])) {
        argc++;/*w w  w  . j a v a 2  s  . com*/
        String s = args[argc++];
        String p = args[argc++];
        String o = args[argc++];
        if ("*".equals(s))
            s = null;
        if ("*".equals(p))
            p = null;
        if ("*".equals(o))
            o = null;
        TripleInputFormat.setPattern(job, Triple.newPattern(s, p, o));
    } else {
        TextOutputFormat.setOutputPath(job, new Path(args[argc]));
    }

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:de.hpi.fgis.hdrs.mapreduce.IndexLoader.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (3 != args.length) {
        System.out.println(/*from  w w w.  j a v a  2  s.c o m*/
                "Usage: IndexLoader <StoreAddres> <SourceIndex> " + "<TargetIndex1>[,<TargetIndex2>...]");
        return 0;
    }

    Job job = new Job(getConf());
    job.setJarByClass(IndexLoader.class);
    job.setJobName("HDRS Index Loader");

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(TripleOutputFormat.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(TripleOutputFormat.class);

    job.setMapperClass(Map.class);
    job.setNumReduceTasks(0);

    job.setInputFormatClass(TripleInputFormat.class);
    job.setOutputFormatClass(TripleOutputFormat.class);

    TripleInputFormat.setStoreAddress(job, args[0]);
    TripleInputFormat.setIndex(job, args[1]);

    TripleOutputFormat.setStoreAddress(job, args[0]);
    TripleOutputFormat.setOutputIndexes(job, args[2]);

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:de.l3s.concatgz.io.ImmediateOutput.java

License:Open Source License

public static void initialize(Job job) {
    job.setOutputFormatClass(NullOutputFormat.class);
}

From source file:de.l3s.content.timex.extracting.ClueWeb09Timex.java

License:Apache License

/**
 * Runs this tool./* w ww.  j ava2s.co  m*/
 */
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(
            OptionBuilder.withArgName("input").hasArg().withDescription("input path").create(INPUT_OPTION));

    options.addOption(
            OptionBuilder.withArgName("output").hasArg().withDescription("output path").create(OUTPUT_OPTION));

    options.addOption(OptionBuilder.withArgName("column").hasArg()
            .withDescription("column to store row data into (must exist)").create(COLUMN));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    cmdline = parser.parse(options, args);

    if (!cmdline.hasOption(INPUT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    if (!cmdline.hasOption(OUTPUT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String input = cmdline.getOptionValue(INPUT_OPTION);

    String output = cmdline.getOptionValue(OUTPUT_OPTION);

    //      String column = cmdline.getOptionValue(COLUMN);

    LOG.info("Tool name: " + ClueWeb09Timex.class.getSimpleName());
    LOG.info(" - input: " + input);
    LOG.info(" - output: " + output);
    //      LOG.info(" - column: " + column);

    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.zookeeper.quorum", "node05.ib,node03.ib,node04.ib");
    conf.set("hbase.zookeeper.property.clientPort", "2181");
    conf.set("hbase.master", "master.ib");

    //      conf.set("conf.column", column);

    long milliSeconds = 10000 * 60 * 60; //x10 default
    conf.setLong("mapred.task.timeout", milliSeconds);

    Job job = Job.getInstance(conf, ClueWeb09Timex.class.getSimpleName()
            + " time-confident extraction + annotation + HBase import: " + input);

    //Configuration conf = new Configuration();
    //Job job = Job.getInstance(conf, "web pages count");
    job.setJarByClass(ClueWeb09Timex.class);
    job.setNumReduceTasks(0);

    job.setInputFormatClass(ClueWeb09InputFormat.class);
    job.setOutputFormatClass(TableOutputFormat.class);
    job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, output);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(Writable.class);
    job.setMapperClass(TMapper.class);
    //job.setReducerClass(IntSumReducer.class);
    //job.setOutputKeyClass(Text.class);
    //job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(input));
    //FileOutputFormat.setOutputPath(job, new Path(output));
    job.waitForCompletion(true);

    return 0;
}

From source file:de.l3s.content.timex.extracting.WikiTimex.java

License:Apache License

@SuppressWarnings("static-access")
@Override/*from  ww  w .j  a  v a2  s .c  o  m*/
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg()
            .withDescription("two-letter language code").create(LANGUAGE_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String language = "en"; // Assume 'en' by default.
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (language.length() != 2) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    String inputPath = cmdline.getOptionValue(INPUT_OPTION);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - XML dump file: " + inputPath);
    LOG.info(" - language: " + language);

    Job job = Job.getInstance(getConf());
    job.setJarByClass(WikiTimex.class);
    job.setJobName(String.format("CountWikipediaPages[%s: %s, %s: %s]", INPUT_OPTION, inputPath,
            LANGUAGE_OPTION, language));

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, new Path(inputPath));

    if (language != null) {
        job.getConfiguration().set("wiki.language", language);
    }

    job.setInputFormatClass(WikipediaPageInputFormat.class);
    job.setOutputFormatClass(NullOutputFormat.class);

    job.setMapperClass(TMapper.class);

    job.waitForCompletion(true);

    return 0;
}