List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass
public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException
From source file:de.bankmark.bigbench.queries.q28.ToSequenceFile.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); job.setJarByClass(ToSequenceFile.class); if (args.length != 2) { usage(job);/*from w w w. jav a 2 s . c om*/ return 2; } System.out.println("input:"); job.setJobName(ToSequenceFile.class.getSimpleName() + "::" + args[0] + "->" + args[1]); Path input = new Path(args[0]); Path output = new Path(args[1]); System.out.println("Input: " + input + " out -> " + output); FileInputFormat.addInputPath(job, input); SequenceFileOutputFormat.setOutputPath(job, output); job.setMapperClass(IdentityMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:de.gesundkrank.wikipedia.hadoop.util.RepackToMapFile.java
License:Open Source License
public int run(String basePath, String outputPath, boolean checkNew, boolean skipRedirect) throws Exception { Configuration configuration = getConf(); configuration.setBoolean("skipRedirect", skipRedirect); LOGGER.info("Tool name: " + getClass().getSimpleName()); Job job = Job.getInstance(configuration, getClass().getSimpleName()); job.setJarByClass(getClass());//from w ww .j a va 2 s. c om job.setMapperClass(WikiMapper.class); job.setInputFormatClass(WikiInputFormat.class); job.setOutputFormatClass(MapFileOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(WikiRevisionWritable.class); WikiDumpLoader wikiDumpLoader = new WikiDumpLoader(checkNew); wikiDumpLoader.addWikiDump(job, basePath); MapFileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setNumReduceTasks(1); return job.waitForCompletion(true) ? 0 : 1; }
From source file:de.hpi.fgis.hdrs.mapreduce.examples.PredicateAnalysis.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(PredicateAnalysis.class); job.setJobName("Predicate Analysis"); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); //job.setOutputKeyClass(Text.class); //job.setOutputValueClass(Text.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(Map.class); //job.setReducerClass(Reduce.class); job.setNumReduceTasks(0);// w w w. j av a2 s . c o m job.setInputFormatClass(TripleInputFormat.class); //job.setOutputFormatClass(TextOutputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); TripleInputFormat.setStoreAddress(job, args[0]); TripleInputFormat.setIndex(job, "POS"); TripleInputFormat.setPattern(job, Triple.newPattern(null, args[1], null)); TripleInputFormat.setAggregationLevel2(job); SequenceFileOutputFormat.setOutputPath(job, new Path(args[2])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:de.hpi.fgis.hdrs.mapreduce.examples.PredicateCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(PredicateCount.class); job.setJobName("PredicateCount"); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TripleInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); int argc = 0; TripleInputFormat.setStoreAddress(job, args[argc++]); TripleInputFormat.setIndex(job, args[argc++]); if ("-p".equals(args[argc])) { argc++;/*w w w . j av a 2 s . c om*/ String s = args[argc++]; String p = args[argc++]; String o = args[argc++]; if ("*".equals(s)) s = null; if ("*".equals(p)) p = null; if ("*".equals(o)) o = null; TripleInputFormat.setPattern(job, Triple.newPattern(s, p, o)); } else { FileOutputFormat.setOutputPath(job, new Path(args[argc])); } boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:de.hpi.fgis.hdrs.mapreduce.examples.TripleCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(TripleCount.class); job.setJobName("TripleCount"); job.setMapOutputKeyClass(ByteWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(1);//from w w w . j a v a 2s. c o m job.setInputFormatClass(TripleInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); int argc = 0; TripleInputFormat.setStoreAddress(job, args[argc++]); TripleInputFormat.setIndex(job, args[argc++]); if ("-p".equals(args[argc])) { argc++; String s = args[argc++]; String p = args[argc++]; String o = args[argc++]; if ("*".equals(s)) s = null; if ("*".equals(p)) p = null; if ("*".equals(o)) o = null; TripleInputFormat.setPattern(job, Triple.newPattern(s, p, o)); } else { TextOutputFormat.setOutputPath(job, new Path(args[argc])); } boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:de.hpi.fgis.hdrs.mapreduce.examples.TripleSize.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(TripleSize.class); job.setJobName("TripleSize"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setCombinerClass(Combine.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TripleInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); int argc = 0; TripleInputFormat.setStoreAddress(job, args[argc++]); TripleInputFormat.setIndex(job, args[argc++]); if ("-p".equals(args[argc])) { argc++;/*w w w . j a v a 2 s . com*/ String s = args[argc++]; String p = args[argc++]; String o = args[argc++]; if ("*".equals(s)) s = null; if ("*".equals(p)) p = null; if ("*".equals(o)) o = null; TripleInputFormat.setPattern(job, Triple.newPattern(s, p, o)); } else { TextOutputFormat.setOutputPath(job, new Path(args[argc])); } boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:de.hpi.fgis.hdrs.mapreduce.IndexLoader.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (3 != args.length) { System.out.println(/*from w w w. j a v a 2 s.c o m*/ "Usage: IndexLoader <StoreAddres> <SourceIndex> " + "<TargetIndex1>[,<TargetIndex2>...]"); return 0; } Job job = new Job(getConf()); job.setJarByClass(IndexLoader.class); job.setJobName("HDRS Index Loader"); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TripleOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(TripleOutputFormat.class); job.setMapperClass(Map.class); job.setNumReduceTasks(0); job.setInputFormatClass(TripleInputFormat.class); job.setOutputFormatClass(TripleOutputFormat.class); TripleInputFormat.setStoreAddress(job, args[0]); TripleInputFormat.setIndex(job, args[1]); TripleOutputFormat.setStoreAddress(job, args[0]); TripleOutputFormat.setOutputIndexes(job, args[2]); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:de.l3s.concatgz.io.ImmediateOutput.java
License:Open Source License
public static void initialize(Job job) { job.setOutputFormatClass(NullOutputFormat.class); }
From source file:de.l3s.content.timex.extracting.ClueWeb09Timex.java
License:Apache License
/** * Runs this tool./* w ww. j ava2s.co m*/ */ @SuppressWarnings("static-access") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("input").hasArg().withDescription("input path").create(INPUT_OPTION)); options.addOption( OptionBuilder.withArgName("output").hasArg().withDescription("output path").create(OUTPUT_OPTION)); options.addOption(OptionBuilder.withArgName("column").hasArg() .withDescription("column to store row data into (must exist)").create(COLUMN)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); cmdline = parser.parse(options, args); if (!cmdline.hasOption(INPUT_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } if (!cmdline.hasOption(OUTPUT_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String input = cmdline.getOptionValue(INPUT_OPTION); String output = cmdline.getOptionValue(OUTPUT_OPTION); // String column = cmdline.getOptionValue(COLUMN); LOG.info("Tool name: " + ClueWeb09Timex.class.getSimpleName()); LOG.info(" - input: " + input); LOG.info(" - output: " + output); // LOG.info(" - column: " + column); Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "node05.ib,node03.ib,node04.ib"); conf.set("hbase.zookeeper.property.clientPort", "2181"); conf.set("hbase.master", "master.ib"); // conf.set("conf.column", column); long milliSeconds = 10000 * 60 * 60; //x10 default conf.setLong("mapred.task.timeout", milliSeconds); Job job = Job.getInstance(conf, ClueWeb09Timex.class.getSimpleName() + " time-confident extraction + annotation + HBase import: " + input); //Configuration conf = new Configuration(); //Job job = Job.getInstance(conf, "web pages count"); job.setJarByClass(ClueWeb09Timex.class); job.setNumReduceTasks(0); job.setInputFormatClass(ClueWeb09InputFormat.class); job.setOutputFormatClass(TableOutputFormat.class); job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, output); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Writable.class); job.setMapperClass(TMapper.class); //job.setReducerClass(IntSumReducer.class); //job.setOutputKeyClass(Text.class); //job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(input)); //FileOutputFormat.setOutputPath(job, new Path(output)); job.waitForCompletion(true); return 0; }
From source file:de.l3s.content.timex.extracting.WikiTimex.java
License:Apache License
@SuppressWarnings("static-access") @Override/*from ww w .j a v a2 s .c o m*/ public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION)); options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg() .withDescription("two-letter language code").create(LANGUAGE_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String language = "en"; // Assume 'en' by default. if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (language.length() != 2) { System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } String inputPath = cmdline.getOptionValue(INPUT_OPTION); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - XML dump file: " + inputPath); LOG.info(" - language: " + language); Job job = Job.getInstance(getConf()); job.setJarByClass(WikiTimex.class); job.setJobName(String.format("CountWikipediaPages[%s: %s, %s: %s]", INPUT_OPTION, inputPath, LANGUAGE_OPTION, language)); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, new Path(inputPath)); if (language != null) { job.getConfiguration().set("wiki.language", language); } job.setInputFormatClass(WikipediaPageInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); job.setMapperClass(TMapper.class); job.waitForCompletion(true); return 0; }