List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass
public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException
From source file:com.yahoo.glimmer.indexing.preprocessor.PrepTool.java
License:Open Source License
@Override public int run(String[] args) throws Exception { SimpleJSAP jsap = new SimpleJSAP(PrepTool.class.getName(), "RDF tuples pre-processor for Glimmer", new Parameter[] { new Switch(NO_CONTEXTS_ARG, 'C', NO_CONTEXTS_ARG, "Don't process the contexts for each tuple."), new FlaggedOption(ONTOLOGY_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'O', ONTOLOGY_ARG), new FlaggedOption(REDUCER_COUNT_ARG, JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'r', REDUCER_COUNT_ARG), new UnflaggedOption(INPUT_ARG, JSAP.STRING_PARSER, JSAP.REQUIRED, "HDFS location for the input data."), new UnflaggedOption(OUTPUT_ARG, JSAP.STRING_PARSER, JSAP.REQUIRED, "HDFS location for the out data."), }); JSAPResult jsapResult = jsap.parse(args); if (!jsapResult.success()) { System.err.print(jsap.getUsage()); System.exit(1);/* ww w.j a va 2 s. co m*/ } Configuration config = getConf(); boolean withContexts = !jsapResult.getBoolean(NO_CONTEXTS_ARG, false); config.setBoolean(TuplesToResourcesMapper.INCLUDE_CONTEXTS_KEY, withContexts); // The ontology if any... String ontologyFilename = jsapResult.getString(ONTOLOGY_ARG); if (ontologyFilename != null) { // Load the ontology InputStream ontologyInputStream = new FileInputStream(ontologyFilename); OWLOntology ontology = OntologyLoader.load(ontologyInputStream); System.out.println( "Loaded ontology from " + ontologyFilename + " with " + ontology.getAxiomCount() + " axioms."); ArrayList<String> ontologyClasses = new ArrayList<String>(); for (OWLClass owlClass : ontology.getClassesInSignature()) { ontologyClasses.add(owlClass.getIRI().toString()); } System.out.println("Adding " + ontologyClasses.size() + " classes from ontology."); config.setStrings(TuplesToResourcesMapper.EXTRA_RESOURCES, ontologyClasses.toArray(new String[0])); } else { System.out.println("No ontology filename set in conf. No ontology has been loaded."); } Job job = Job.getInstance(config); job.setJarByClass(PrepTool.class); job.setJobName(PrepTool.class.getName() + "-part1-" + System.currentTimeMillis()); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(TuplesToResourcesMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); int reducerCount = jsapResult.getInt(REDUCER_COUNT_ARG, DEFAULT_REDUCER_COUNT); job.setNumReduceTasks(reducerCount); if (reducerCount == 1) { // We assign 'global' ids in the reducer. For this to work, there // can be only one. But using just one reducer, we run out of local disk space during the // pre-reduce merge with big data sets like WCC. job.setReducerClass(ResourcesReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Object.class); job.setOutputFormatClass(ResourceRecordWriter.OutputFormat.class); } else { /* * TODO: Take the functionality of the reducer and move it to run on * the gateway. We then use n identity reducers, the output of which * will be read and merged as streams on the gateway. */ } FileInputFormat.setInputPaths(job, new Path(jsapResult.getString(INPUT_ARG))); Path outputDir = new Path(jsapResult.getString(OUTPUT_ARG)); FileOutputFormat.setOutputPath(job, outputDir); if (!job.waitForCompletion(true)) { System.err.println("Failed to process tuples from " + jsapResult.getString(INPUT_ARG)); return 1; } // IF THERE WAS ONLY ONE REDUCER WE NOW HAVE // One file per reducer containing lists of urls(recourses) for // subjects, predicates, objects and contexts. // One file per reducer that contains all resources. subjects + // predicates + objects + contexts. // One file per reducer that contains the subjects + all <predicate> // <object>|"Literal" <context> on that subject. // IF THERE WAS MORE THAN ONE REDUCER WE NOW HAVE N FILES THAT NEED TO BE MERGED ON THE GATEWAY. TODO. return 0; }
From source file:com.yahoo.semsearch.fastlinking.io.RepackWikipedia.java
License:Apache License
@SuppressWarnings("static-access") @Override//w ww . j a v a2 s.c o m public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output location") .create(OUTPUT_OPTION)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("mapping file") .create(MAPPING_FILE_OPTION)); options.addOption(OptionBuilder.withArgName("block|record|none").hasArg() .withDescription("compression type").create(COMPRESSION_TYPE_OPTION)); options.addOption(OptionBuilder.withArgName("en|sv|de").hasArg().withDescription("two-letter language code") .create(LANGUAGE_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(MAPPING_FILE_OPTION) || !cmdline.hasOption(COMPRESSION_TYPE_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT_OPTION); String outputPath = cmdline.getOptionValue(OUTPUT_OPTION); String mappingFile = cmdline.getOptionValue(MAPPING_FILE_OPTION); String compressionType = cmdline.getOptionValue(COMPRESSION_TYPE_OPTION); if (!"block".equals(compressionType) && !"record".equals(compressionType) && !"none".equals(compressionType)) { System.err.println("Error: \"" + compressionType + "\" unknown compression type!"); return -1; } String language = null; if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (language.length() != 2) { System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } // this is the default block size int blocksize = 1000000; Job job = Job.getInstance(getConf()); job.setJarByClass(RepackWikipedia.class); job.setJobName(String.format("RepackWikipedia[%s: %s, %s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, OUTPUT_OPTION, outputPath, COMPRESSION_TYPE_OPTION, compressionType, LANGUAGE_OPTION, language)); job.getConfiguration().set(DOCNO_MAPPING_FIELD, mappingFile); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - XML dump file: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - docno mapping data file: " + mappingFile); LOG.info(" - compression type: " + compressionType); LOG.info(" - language: " + language); if ("block".equals(compressionType)) { LOG.info(" - block size: " + blocksize); } job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); if ("none".equals(compressionType)) { FileOutputFormat.setCompressOutput(job, false); } else { FileOutputFormat.setCompressOutput(job, true); if ("record".equals(compressionType)) { SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.RECORD); } else { SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.getConfiguration().setInt("io.seqfile.compress.blocksize", blocksize); } } if (language != null) { job.getConfiguration().set("wiki.language", language); } job.setInputFormatClass(WikipediaPageInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WikipediaPageFactory.getWikipediaPageClass(language)); //job.setOutputValueClass(EnglishWikipediaPage.class); job.setMapperClass(MyMapper.class); // Delete the output directory if it exists already. FileSystem.get(getConf()).delete(new Path(outputPath), true); return job.waitForCompletion(true) ? 0 : -1; }
From source file:com.yahoo.semsearch.fastlinking.io.WikipediaDocnoMappingBuilder.java
License:Apache License
@SuppressWarnings("static-access") @Override/*from w ww . j a v a 2s. c om*/ public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output file") .create(OUTPUT_FILE_OPTION)); options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr|it").hasArg() .withDescription("two-letter language code").create(LANGUAGE_OPTION)); options.addOption(KEEP_ALL_OPTION, false, "keep all pages"); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_FILE_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String language = null; if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (language.length() != 2) { System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } String inputPath = cmdline.getOptionValue(INPUT_OPTION); String outputFile = cmdline.getOptionValue(OUTPUT_FILE_OPTION); boolean keepAll = cmdline.hasOption(KEEP_ALL_OPTION); String tmpPath = "tmp-" + WikipediaDocnoMappingBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - input: " + inputPath); LOG.info(" - output file: " + outputFile); LOG.info(" - keep all pages: " + keepAll); LOG.info(" - language: " + language); Job job = Job.getInstance(getConf()); job.setJarByClass(WikipediaDocnoMappingBuilder.class); job.setJobName(String.format("BuildWikipediaDocnoMapping[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, OUTPUT_FILE_OPTION, outputFile, LANGUAGE_OPTION, language)); job.getConfiguration().setBoolean(KEEP_ALL_OPTION, keepAll); if (language != null) { job.getConfiguration().set("wiki.language", language); } job.setNumReduceTasks(1); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(tmpPath)); FileOutputFormat.setCompressOutput(job, false); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(WikipediaPageInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. FileSystem.get(getConf()).delete(new Path(tmpPath), true); if (job.waitForCompletion(true)) { // long cnt = keepAll ? job.getCounters().findCounter(PageTypes.TOTAL).getValue() : job.getCounters().findCounter(PageTypes.ARTICLE).getValue(); long cnt = job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); WikipediaDocnoMapping.writeDocnoMappingData(FileSystem.get(getConf()), tmpPath + "/part-r-00000", (int) cnt, outputFile); FileSystem.get(getConf()).delete(new Path(tmpPath), true); return 0; } else { return -1; } }
From source file:com.yahoo.semsearch.fastlinking.utils.RunFELOntheGrid.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf); //Job job = Job.getInstance( conf ); job.setJarByClass(RunFELOntheGrid.class); // Process custom command-line options Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // Specify various job-specific parameters job.setJobName("Entity Linker"); job.setNumReduceTasks(100);// w ww. ja va2 s . c o m job.setJarByClass(RunFELOntheGrid.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(FELMapper.class); job.setReducerClass(FELReducer.class); job.setCombinerClass(FELReducer.class); job.waitForCompletion(true); return 0; }
From source file:com.yahoo.semsearch.fastlinking.w2v.EntityEmbeddings.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf); //Job job = Job.getInstance( conf ); job.setJarByClass(EntityEmbeddings.class); // Process custom command-line options Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // Specify various job-specific parameters job.setJobName("Entity embeddings"); job.setNumReduceTasks(1);/*from w w w . j av a2s .c o m*/ job.setJarByClass(EntityEmbeddings.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(EntityEMapper.class); job.setReducerClass(EntityEReducer.class); job.setCombinerClass(EntityEReducer.class); job.waitForCompletion(true); return 0; }
From source file:com.yahoo.ycsb.bulk.hbase.BulkDataGeneratorJob.java
License:Apache License
/** * Parameters for bulk loader specified through the config file: * * - prefix for the row keys// ww w . j a va 2 s .c o m * - range start * - range end (inclusive) * - num splits (or number of partitions). * - user * - password * - table * * For the accepted default options * @see org.apache.hadoop.util.Tool#run(java.lang.String[]) */ public int run(String[] args) throws Exception { Configuration conf = this.getConf(); Util.printArgs("run", args, System.err); printKeyValues(conf, ARG_KEYS, System.err); if (args.length > 1 || (args.length == 1 && "-help".compareToIgnoreCase(args[0]) == 0)) { System.err.println("Usage: " + this.getClass().getName() + "input_path [generic options] [input_paths...] ouptut_path"); GenericOptionsParser.printGenericCommandUsage(System.err); return 1; } // Time run long startTime = System.currentTimeMillis(); String workdir; if (args.length == 1) { /* override workdir in the config if it is specified in the * command line */ conf.set(ARG_KEY_OUTDIR, args[0]); workdir = args[0]; } workdir = conf.get(ARG_KEY_OUTDIR); if (workdir == null) { System.err.println("No output directory specified"); return 1; } /* Initialize job, check parameters and decide which mapper to use */ Job job = new Job(conf, conf.get(ARG_KEY_JOBNAME, "YCSB KV data generator")); /* these settings are the same (i.e., fixed) independent of the * parameters */ job.setJarByClass(this.getClass()); // job.setInputFormatClass(TextInputFormat.class); job.setInputFormatClass(NLineInputFormat.class); /* these settings should depend on the type of output file */ job.setOutputFormatClass(HFileOutputFormat.class); /* not sure the next two are needed */ job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); this.createInputFile(job, workdir); HFileOutputFormat.setOutputPath(job, new Path(workdir + "/files")); /* depending on whether the keys need to be sorted and hashed, then * decide which mapper and reducer to use */ boolean hashKeys = conf.getBoolean(ARG_KEY_HASH_KEYS, false); boolean sortKeys = conf.getBoolean(ARG_KEY_SORTKEYS, true); /* get splits file name: side-effect -> this may generate a splits file */ String splitsfile = this.getSplitsFile(job, workdir); if (sortKeys && hashKeys) { /* do a full map reduce job */ job.setMapperClass(RowGeneratorMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(RangePartitioner.class); if (splitsfile == null) { /* Auto generate the splits file either from: * - the input key ranges * - from the current table splits */ throw new InvalidInputException("No splits specified"); } /* Set splits file */ RangePartitioner.setSplitFile(job, splitsfile); /* Add reducer (based on mapper code) */ job.setReducerClass(RowGeneratorReduce.class); /* the number of reducers is dependent on the number of * partitions */ int numReduce = conf.getInt(ARG_KEY_NUMREDUCE, 1); job.setNumReduceTasks(numReduce); } else { /* perform a map only job */ job.setMapperClass(RowGeneratorMapOnly.class); /* map output key and value types are the same as * for the job */ job.setMapOutputKeyClass(job.getOutputKeyClass()); job.setMapOutputValueClass(job.getOutputValueClass()); job.setNumReduceTasks(0); } job.waitForCompletion(true); // JobClient.runJob(conf); SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss.SSS z"); SimpleDateFormat ddf = new SimpleDateFormat("HH:mm:ss.SSS"); ddf.setTimeZone(TimeZone.getTimeZone("UTC")); long endTime = System.currentTimeMillis(); System.out.println("Start time (ms): " + df.format(new Date(startTime)) + " -- " + startTime); System.out.println("End time (ms): " + df.format(new Date(endTime)) + " -- " + endTime); System.out .println("Elapsed time (ms): " + ddf.format(endTime - startTime) + " -- " + (endTime - startTime)); return 0; }
From source file:com.yassergonzalez.pagerank.PageRank.java
License:Apache License
private void pageRankIteration(int iter, Configuration conf, Path outputDir) throws Exception { // This job performs an iteration of the power iteration method to // compute PageRank. The map task processes each block M_{i,j}, loads // the corresponding stripe j of the vector v_{k-1} and produces the // partial result of the stripe i of the vector v_k. The reduce task // sums all the partial results of v_k and adds the teleportation factor // (the combiner only sums all the partial results). See Section 5.2 // (and 5.2.3 in particular) of Mining of Massive Datasets // (http://infolab.stanford.edu/~ullman/mmds.html) for details. The // output is written in a "vk" subdir of the output dir, where k is the // iteration number. MapFileOutputFormat is used to keep an array of the // stripes of v. Job job = Job.getInstance(conf, "PageRank:Iteration"); job.setJarByClass(PageRank.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(PageRankIterationMapper.class); job.setMapOutputKeyClass(ShortWritable.class); job.setMapOutputValueClass(FloatArrayWritable.class); job.setCombinerClass(PageRankIterationCombiner.class); job.setReducerClass(PageRankIterationReducer.class); job.setOutputFormatClass(MapFileOutputFormat.class); job.setOutputKeyClass(ShortWritable.class); job.setOutputValueClass(FloatArrayWritable.class); FileInputFormat.addInputPath(job, new Path(outputDir, "M")); FileOutputFormat.setOutputPath(job, new Path(outputDir, "v" + iter)); job.waitForCompletion(true);// w w w . j a v a 2 s. c o m }
From source file:com.yourcompany.hadoop.mapreduce.hcatalog.HCatalogExampleDriver.java
License:Apache License
public int run(String[] args) throws Exception { Job job = new Job(); parseArguements(args, job);// w ww . j av a2s. c o m job.setJarByClass(HCatalogExampleDriver.class); job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(HCatOutputFormat.class); job.setMapperClass(HCatalogExampleMapper.class); job.setReducerClass(HCatalogExampleReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(WritableComparable.class); job.setOutputValueClass(DefaultHCatRecord.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.zjy.mongo.util.MongoTool.java
License:Apache License
private int runMapReduceJob(final Configuration conf) throws IOException { final Job job = Job.getInstance(conf, getJobName()); /**//from w w w . jav a 2 s . c om * Any arguments specified with -D <property>=<value> * on the CLI will be picked up and set here * They override any XML level values * Note that -D<space> is important - no space will * not work as it gets picked up by Java itself */ // TODO - Do we need to set job name somehow more specifically? // This may or may not be correct/sane job.setJarByClass(getClass()); final Class<? extends Mapper> mapper = MongoConfigUtil.getMapper(conf); if (LOG.isDebugEnabled()) { LOG.debug("Mapper Class: " + mapper); LOG.debug("Input URI: " + conf.get(MongoConfigUtil.INPUT_URI)); } job.setMapperClass(mapper); Class<? extends Reducer> combiner = MongoConfigUtil.getCombiner(conf); if (combiner != null) { job.setCombinerClass(combiner); } job.setReducerClass(MongoConfigUtil.getReducer(conf)); job.setOutputFormatClass(MongoConfigUtil.getOutputFormat(conf)); job.setOutputKeyClass(MongoConfigUtil.getOutputKey(conf)); job.setOutputValueClass(MongoConfigUtil.getOutputValue(conf)); job.setInputFormatClass(MongoConfigUtil.getInputFormat(conf)); Class mapOutputKeyClass = MongoConfigUtil.getMapperOutputKey(conf); Class mapOutputValueClass = MongoConfigUtil.getMapperOutputValue(conf); if (mapOutputKeyClass != null) { job.setMapOutputKeyClass(mapOutputKeyClass); } if (mapOutputValueClass != null) { job.setMapOutputValueClass(mapOutputValueClass); } /** * Determines if the job will run verbosely e.g. print debug output * Only works with foreground jobs */ final boolean verbose = MongoConfigUtil.isJobVerbose(conf); /** * Run job in foreground aka wait for completion or background? */ final boolean background = MongoConfigUtil.isJobBackground(conf); try { if (background) { LOG.info("Setting up and running MapReduce job in background."); job.submit(); return 0; } else { LOG.info("Setting up and running MapReduce job in foreground, will wait for results. {Verbose? " + verbose + "}"); return job.waitForCompletion(true) ? 0 : 1; } } catch (final Exception e) { LOG.error("Exception while executing job... ", e); return 1; } }
From source file:cosmos.mapred.MediawikiIngestJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (1 != args.length) { System.err.println("Usage: input.xml,input.xml,input.xml..."); return 1; }/*from w w w.j av a 2 s. c o m*/ String inputFiles = args[0]; Configuration conf = getConf(); System.out.println("path " + conf.get("fs.default.name")); conf.addResource(new Path("/opt/hadoop/conf/hdfs-site.xml")); conf.addResource(new Path("/opt/hadoop/conf/core-site.xml")); conf.addResource(new Path("/opt/hadoop/conf/mapred-site.xml")); System.out.println("path " + conf.get("fs.default.name")); //System.exit(1); Job job = new Job(conf, "Mediawiki Ingest"); job.setJarByClass(MediawikiIngestJob.class); String tablename = "sortswiki"; String zookeepers = "localhost:2181"; String instanceName = "accumulo"; String user = "root"; PasswordToken passwd = new PasswordToken("secret"); FileInputFormat.setInputPaths(job, inputFiles); job.setMapperClass(MediawikiMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setOutputFormatClass(AccumuloOutputFormat.class); BatchWriterConfig bwConfig = new BatchWriterConfig(); job.setInputFormatClass(MediawikiInputFormat.class); AccumuloOutputFormat.setZooKeeperInstance(job, instanceName, zookeepers); AccumuloOutputFormat.setConnectorInfo(job, user, passwd); AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig); AccumuloOutputFormat.setCreateTables(job, true); AccumuloOutputFormat.setDefaultTableName(job, tablename); return job.waitForCompletion(true) ? 0 : 1; }