List of usage examples for org.apache.hadoop.mapreduce Job getOutputFormatClass
@SuppressWarnings("unchecked") public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException
From source file:gr.ntua.h2rdf.inputFormat2.TableMapReduceUtil.java
License:Open Source License
/** * Add the HBase dependency jars as well as jars for any of the configured * job classes to the job configuration, so that JobClient will ship them * to the cluster and add them to the DistributedCache. *//*from w ww . ja v a 2s . com*/ public static void addDependencyJars(Job job) throws IOException { try { addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class, com.google.protobuf.Message.class, job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass()); } catch (ClassNotFoundException e) { throw new IOException(e); } }
From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java
License:Apache License
private static void setupPipesJob(Job job) throws IOException, ClassNotFoundException { Configuration conf = job.getConfiguration(); // default map output types to Text if (!getIsJavaMapper(conf)) { job.setMapperClass(PipesMapper.class); // Save the user's partitioner and hook in our's. setJavaPartitioner(conf, job.getPartitionerClass()); job.setPartitionerClass(PipesPartitioner.class); }/*from w ww .ja v a 2 s . com*/ if (!getIsJavaReducer(conf)) { job.setReducerClass(PipesReducer.class); if (!getIsJavaRecordWriter(conf)) { job.setOutputFormatClass(NullOutputFormat.class); } } String textClassname = Text.class.getName(); setIfUnset(conf, MRJobConfig.MAP_OUTPUT_KEY_CLASS, textClassname); setIfUnset(conf, MRJobConfig.MAP_OUTPUT_VALUE_CLASS, textClassname); setIfUnset(conf, MRJobConfig.OUTPUT_KEY_CLASS, textClassname); setIfUnset(conf, MRJobConfig.OUTPUT_VALUE_CLASS, textClassname); // Use PipesNonJavaInputFormat if necessary to handle progress reporting // from C++ RecordReaders ... if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) { conf.setClass(Submitter.INPUT_FORMAT, job.getInputFormatClass(), InputFormat.class); job.setInputFormatClass(PipesNonJavaInputFormat.class); } if (avroInput != null) { if (explicitInputFormat) { conf.setClass(Submitter.INPUT_FORMAT, job.getInputFormatClass(), InputFormat.class); } // else let the bridge fall back to the appropriate Avro IF switch (avroInput) { case K: job.setInputFormatClass(PydoopAvroInputKeyBridge.class); break; case V: job.setInputFormatClass(PydoopAvroInputValueBridge.class); break; case KV: job.setInputFormatClass(PydoopAvroInputKeyValueBridge.class); break; default: throw new IllegalArgumentException("Bad Avro input type"); } } if (avroOutput != null) { if (explicitOutputFormat) { conf.setClass(Submitter.OUTPUT_FORMAT, job.getOutputFormatClass(), OutputFormat.class); } // else let the bridge fall back to the appropriate Avro OF conf.set(props.getProperty("AVRO_OUTPUT"), avroOutput.name()); switch (avroOutput) { case K: job.setOutputFormatClass(PydoopAvroOutputKeyBridge.class); break; case V: job.setOutputFormatClass(PydoopAvroOutputValueBridge.class); break; case KV: job.setOutputFormatClass(PydoopAvroOutputKeyValueBridge.class); break; default: throw new IllegalArgumentException("Bad Avro output type"); } } String exec = getExecutable(conf); if (exec == null) { String msg = "No application program defined."; throw new IllegalArgumentException(msg); } // add default debug script only when executable is expressed as // <path>#<executable> //FIXME: this is kind of useless if the pipes program is not in c++ if (exec.contains("#")) { // set default gdb commands for map and reduce task String defScript = "$HADOOP_PREFIX/src/c++/pipes/debug/pipes-default-script"; setIfUnset(conf, MRJobConfig.MAP_DEBUG_SCRIPT, defScript); setIfUnset(conf, MRJobConfig.REDUCE_DEBUG_SCRIPT, defScript); } URI[] fileCache = DistributedCache.getCacheFiles(conf); if (fileCache == null) { fileCache = new URI[1]; } else { URI[] tmp = new URI[fileCache.length + 1]; System.arraycopy(fileCache, 0, tmp, 1, fileCache.length); fileCache = tmp; } try { fileCache[0] = new URI(exec); } catch (URISyntaxException e) { String msg = "Problem parsing executable URI " + exec; IOException ie = new IOException(msg); ie.initCause(e); throw ie; } DistributedCache.setCacheFiles(fileCache, conf); }
From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java
License:Apache License
public int run(String[] args) throws Exception { CommandLineParser cli = new CommandLineParser(); if (args.length == 0) { cli.printUsage();/* w w w . j ava 2s. c om*/ return 1; } try { Job job = new Job(new Configuration()); job.setJobName(getClass().getName()); Configuration conf = job.getConfiguration(); CommandLine results = cli.parse(conf, args); if (results.hasOption("input")) { Path path = new Path(results.getOptionValue("input")); FileInputFormat.setInputPaths(job, path); } if (results.hasOption("output")) { Path path = new Path(results.getOptionValue("output")); FileOutputFormat.setOutputPath(job, path); } if (results.hasOption("jar")) { job.setJar(results.getOptionValue("jar")); } if (results.hasOption("inputformat")) { explicitInputFormat = true; setIsJavaRecordReader(conf, true); job.setInputFormatClass(getClass(results, "inputformat", conf, InputFormat.class)); } if (results.hasOption("javareader")) { setIsJavaRecordReader(conf, true); } if (results.hasOption("map")) { setIsJavaMapper(conf, true); job.setMapperClass(getClass(results, "map", conf, Mapper.class)); } if (results.hasOption("partitioner")) { job.setPartitionerClass(getClass(results, "partitioner", conf, Partitioner.class)); } if (results.hasOption("reduce")) { setIsJavaReducer(conf, true); job.setReducerClass(getClass(results, "reduce", conf, Reducer.class)); } if (results.hasOption("reduces")) { job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces"))); } if (results.hasOption("writer")) { explicitOutputFormat = true; setIsJavaRecordWriter(conf, true); job.setOutputFormatClass(getClass(results, "writer", conf, OutputFormat.class)); } if (results.hasOption("lazyOutput")) { if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) { LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormatClass()); } } if (results.hasOption("avroInput")) { avroInput = AvroIO.valueOf(results.getOptionValue("avroInput").toUpperCase()); } if (results.hasOption("avroOutput")) { avroOutput = AvroIO.valueOf(results.getOptionValue("avroOutput").toUpperCase()); } if (results.hasOption("program")) { setExecutable(conf, results.getOptionValue("program")); } // if they gave us a jar file, include it into the class path String jarFile = job.getJar(); if (jarFile != null) { final URL[] urls = new URL[] { FileSystem.getLocal(conf).pathToFile(new Path(jarFile)).toURL() }; // FindBugs complains that creating a URLClassLoader should be // in a doPrivileged() block. ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() { public ClassLoader run() { return new URLClassLoader(urls); } }); conf.setClassLoader(loader); } setupPipesJob(job); return job.waitForCompletion(true) ? 0 : 1; } catch (ParseException pe) { LOG.info("Error : " + pe); cli.printUsage(); return 1; } }
From source file:org.apache.blur.mapreduce.lib.BlurMapReduceUtil.java
License:Apache License
/** * Add the Blur dependency jars as well as jars for any of the configured job * classes to the job configuration, so that JobClient will ship them to the * cluster and add them to the DistributedCache. *//*from w ww . jav a 2s .c o m*/ public static void addDependencyJars(Job job) throws IOException { try { addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class, job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass(), DocumentVisibility.class); addAllJarsInBlurLib(job.getConfiguration()); } catch (ClassNotFoundException e) { throw new IOException(e); } }
From source file:org.apache.crunch.impl.spark.SparkRuntime.java
License:Apache License
private void monitorLoop() { status.set(Status.RUNNING);//from w w w . j a va2 s .c o m Map<PCollectionImpl<?>, Set<SourceTarget<?>>> targetDeps = Maps .<PCollectionImpl<?>, PCollectionImpl<?>, Set<SourceTarget<?>>>newTreeMap(DEPTH_COMPARATOR); for (PCollectionImpl<?> pcollect : outputTargets.keySet()) { targetDeps.put(pcollect, pcollect.getTargetDependencies()); } while (!targetDeps.isEmpty() && doneSignal.getCount() > 0) { Set<Target> allTargets = Sets.newHashSet(); for (PCollectionImpl<?> pcollect : targetDeps.keySet()) { allTargets.addAll(outputTargets.get(pcollect)); } Map<PCollectionImpl<?>, JavaRDDLike<?, ?>> pcolToRdd = Maps.newTreeMap(DEPTH_COMPARATOR); for (PCollectionImpl<?> pcollect : targetDeps.keySet()) { if (Sets.intersection(allTargets, targetDeps.get(pcollect)).isEmpty()) { JavaRDDLike<?, ?> rdd = ((SparkCollection) pcollect).getJavaRDDLike(this); pcolToRdd.put(pcollect, rdd); } } distributeFiles(); for (Map.Entry<PCollectionImpl<?>, JavaRDDLike<?, ?>> e : pcolToRdd.entrySet()) { JavaRDDLike<?, ?> rdd = e.getValue(); PType<?> ptype = e.getKey().getPType(); Set<Target> targets = outputTargets.get(e.getKey()); if (targets.size() > 1) { rdd.rdd().cache(); } for (Target t : targets) { Configuration conf = new Configuration(getConfiguration()); if (t instanceof MapReduceTarget) { //TODO: check this earlier Converter c = t.getConverter(ptype); JavaPairRDD<?, ?> outRDD; if (rdd instanceof JavaRDD) { outRDD = ((JavaRDD) rdd).map(new MapFunction(ptype.getOutputMapFn(), ctxt)) .map(new OutputConverterFunction(c)); } else { outRDD = ((JavaPairRDD) rdd).map(new PairMapFunction(ptype.getOutputMapFn(), ctxt)) .map(new OutputConverterFunction(c)); } try { Job job = new Job(conf); if (t instanceof PathTarget) { PathTarget pt = (PathTarget) t; pt.configureForMapReduce(job, ptype, pt.getPath(), null); Path tmpPath = pipeline.createTempPath(); outRDD.saveAsNewAPIHadoopFile(tmpPath.toString(), c.getKeyClass(), c.getValueClass(), job.getOutputFormatClass(), job.getConfiguration()); pt.handleOutputs(job.getConfiguration(), tmpPath, -1); } else if (t instanceof MapReduceTarget) { MapReduceTarget mrt = (MapReduceTarget) t; mrt.configureForMapReduce(job, ptype, new Path("/tmp"), null); outRDD.saveAsHadoopDataset(new JobConf(job.getConfiguration())); } else { throw new IllegalArgumentException( "Spark execution cannot handle non-MapReduceTarget: " + t); } } catch (Exception et) { et.printStackTrace(); status.set(Status.FAILED); set(PipelineResult.EMPTY); doneSignal.countDown(); } } } } for (PCollectionImpl<?> output : pcolToRdd.keySet()) { if (toMaterialize.containsKey(output)) { MaterializableIterable mi = toMaterialize.get(output); if (mi.isSourceTarget()) { output.materializeAt((SourceTarget) mi.getSource()); } } targetDeps.remove(output); } } if (status.get() != Status.FAILED || status.get() != Status.KILLED) { status.set(Status.SUCCEEDED); result = new PipelineResult(ImmutableList.of(new PipelineResult.StageResult("Spark", null)), Status.SUCCEEDED); set(result); } else { set(PipelineResult.EMPTY); } doneSignal.countDown(); }
From source file:org.apache.kudu.mapreduce.KuduTableMapReduceUtil.java
License:Apache License
/** * Add the Kudu dependency jars as well as jars for any of the configured * job classes to the job configuration, so that JobClient will ship them * to the cluster and add them to the DistributedCache. *//*from w w w . j a v a 2s. c om*/ public static void addDependencyJars(Job job) throws IOException { addKuduDependencyJars(job.getConfiguration()); try { addDependencyJars(job.getConfiguration(), // when making changes here, consider also mapred.TableMapReduceUtil // pull job classes job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass()); } catch (ClassNotFoundException e) { throw new IOException(e); } }
From source file:org.apache.mahout.classifier.svm.mapreduce.MapReduceUtil.java
License:Apache License
/** * Sets the dynamic parameters related to a job. * //w w w .j a va 2 s . c o m * <ol> * <li>The input paths can be a string composed of many paths which are * separated by comma (',').</li> * * <li>The maximum split size must be positive, which determines the number of * mappers.</li> * <li>The number of reducers must be nonegative.</li> * </ol> * * @param job * @param inputPaths * @param outputPath * @param maxSplitSize * @param numReducers * @throws java.io.IOException * @throws ClassNotFoundException */ public static void setJobDynamicParameters(Job job, String inputPaths, String outputPath, long maxSplitSize, int numReducers) throws IOException, ClassNotFoundException { // input path if (null != inputPaths) { FileInputFormat.addInputPaths(job, inputPaths); } else { throw new IOException("[hadoop][job] input path is not specified"); } // output path if (null != outputPath) { FileOutputFormat.setOutputPath(job, new Path(outputPath)); } else if (job.getOutputFormatClass().getName().equals(NULL_OUTPUT_FILE_FORMAT_CLASS_NAME)) ; else { throw new IOException("[hadoop][job] output path is not specified"); } // maximum split size which determines the number of mappers if (maxSplitSize > 0) { job.getConfiguration().set("mapred.min.split.size", "0"); job.getConfiguration().set("mapred.max.split.size", maxSplitSize + ""); } else { throw new IOException("[hadoop][job] maximum split size must be positive"); } // the number of reducers if (numReducers >= 0) { job.setNumReduceTasks(numReducers); } else { throw new IOException("[hadoop][job] number of reducers must be nonnegative"); } }
From source file:org.cloudgraph.hbase.mapreduce.GraphMapReduceSetup.java
License:Apache License
/** * Add the HBase dependency jars as well as jars for any of the configured job * classes to the job configuration, so that JobClient will ship them to the * cluster and add them to the DistributedCache. *///from ww w . j a va2 s .com public static void addDependencyJars(Job job) throws IOException { try { addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class, com.google.protobuf.Message.class, com.google.common.collect.ImmutableSet.class, org.apache.hadoop.hbase.util.Bytes.class, // one class from // hbase.jar job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass()); } catch (ClassNotFoundException e) { throw new IOException(e); } }
From source file:org.janusgraph.hadoop.formats.FormatTools.java
License:Apache License
public static Class getBaseOutputFormatClass(final Job job) { try {/*from w w w.ja v a 2 s .co m*/ if (LazyOutputFormat.class.isAssignableFrom(job.getOutputFormatClass())) { Class<OutputFormat> baseClass = (Class<OutputFormat>) DEFAULT_COMPAT.getJobContextConfiguration(job) .getClass(LazyOutputFormat.OUTPUT_FORMAT, null); return (null == baseClass) ? job.getOutputFormatClass() : baseClass; } return job.getOutputFormatClass(); } catch (Exception e) { return null; } }
From source file:org.kiji.mapreduce.TestKijiBulkImportJobBuilder.java
License:Apache License
@Test public void testBuildWithHFileOutput() throws Exception { final MapReduceJob mrjob = KijiBulkImportJobBuilder.create().withConf(getConf()) .withInput(new TextMapReduceJobInput(new Path(mTempPath, "input"))) .withBulkImporter(NoopBulkImporter.class) .withOutput(new HFileMapReduceJobOutput(mTable, new Path(mTempPath, "output"), 10)).build(); final Job job = mrjob.getHadoopJob(); assertEquals(TextInputFormat.class, job.getInputFormatClass()); assertEquals(BulkImportMapper.class, job.getMapperClass()); assertEquals(NoopBulkImporter.class, job.getConfiguration().getClass(KijiConfKeys.KIJI_BULK_IMPORTER_CLASS, null)); assertEquals(IdentityReducer.class, job.getReducerClass()); assertEquals(10, job.getNumReduceTasks()); assertEquals(KijiHFileOutputFormat.class, job.getOutputFormatClass()); assertEquals(TotalOrderPartitioner.class, job.getPartitionerClass()); }