Example usage for org.apache.hadoop.mapreduce Job getOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getOutputFormatClass.

Prototype

@SuppressWarnings("unchecked")
public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException

Source Link

Document

Get the OutputFormat class for the job.

Usage

From source file:gr.ntua.h2rdf.inputFormat2.TableMapReduceUtil.java

License:Open Source License

/**
 * Add the HBase dependency jars as well as jars for any of the configured
 * job classes to the job configuration, so that JobClient will ship them
 * to the cluster and add them to the DistributedCache.
 *//*from   w  ww . ja  v a  2s  . com*/
public static void addDependencyJars(Job job) throws IOException {
    try {
        addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class,
                com.google.protobuf.Message.class, job.getMapOutputKeyClass(), job.getMapOutputValueClass(),
                job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(),
                job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java

License:Apache License

private static void setupPipesJob(Job job) throws IOException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    // default map output types to Text
    if (!getIsJavaMapper(conf)) {
        job.setMapperClass(PipesMapper.class);
        // Save the user's partitioner and hook in our's.
        setJavaPartitioner(conf, job.getPartitionerClass());
        job.setPartitionerClass(PipesPartitioner.class);
    }/*from  w ww  .ja v  a 2  s . com*/
    if (!getIsJavaReducer(conf)) {
        job.setReducerClass(PipesReducer.class);
        if (!getIsJavaRecordWriter(conf)) {
            job.setOutputFormatClass(NullOutputFormat.class);
        }
    }
    String textClassname = Text.class.getName();
    setIfUnset(conf, MRJobConfig.MAP_OUTPUT_KEY_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.MAP_OUTPUT_VALUE_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.OUTPUT_KEY_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.OUTPUT_VALUE_CLASS, textClassname);

    // Use PipesNonJavaInputFormat if necessary to handle progress reporting
    // from C++ RecordReaders ...
    if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) {
        conf.setClass(Submitter.INPUT_FORMAT, job.getInputFormatClass(), InputFormat.class);
        job.setInputFormatClass(PipesNonJavaInputFormat.class);
    }

    if (avroInput != null) {
        if (explicitInputFormat) {
            conf.setClass(Submitter.INPUT_FORMAT, job.getInputFormatClass(), InputFormat.class);
        } // else let the bridge fall back to the appropriate Avro IF
        switch (avroInput) {
        case K:
            job.setInputFormatClass(PydoopAvroInputKeyBridge.class);
            break;
        case V:
            job.setInputFormatClass(PydoopAvroInputValueBridge.class);
            break;
        case KV:
            job.setInputFormatClass(PydoopAvroInputKeyValueBridge.class);
            break;
        default:
            throw new IllegalArgumentException("Bad Avro input type");
        }
    }
    if (avroOutput != null) {
        if (explicitOutputFormat) {
            conf.setClass(Submitter.OUTPUT_FORMAT, job.getOutputFormatClass(), OutputFormat.class);
        } // else let the bridge fall back to the appropriate Avro OF
        conf.set(props.getProperty("AVRO_OUTPUT"), avroOutput.name());
        switch (avroOutput) {
        case K:
            job.setOutputFormatClass(PydoopAvroOutputKeyBridge.class);
            break;
        case V:
            job.setOutputFormatClass(PydoopAvroOutputValueBridge.class);
            break;
        case KV:
            job.setOutputFormatClass(PydoopAvroOutputKeyValueBridge.class);
            break;
        default:
            throw new IllegalArgumentException("Bad Avro output type");
        }
    }

    String exec = getExecutable(conf);
    if (exec == null) {
        String msg = "No application program defined.";
        throw new IllegalArgumentException(msg);
    }
    // add default debug script only when executable is expressed as
    // <path>#<executable>
    //FIXME: this is kind of useless if the pipes program is not in c++
    if (exec.contains("#")) {
        // set default gdb commands for map and reduce task
        String defScript = "$HADOOP_PREFIX/src/c++/pipes/debug/pipes-default-script";
        setIfUnset(conf, MRJobConfig.MAP_DEBUG_SCRIPT, defScript);
        setIfUnset(conf, MRJobConfig.REDUCE_DEBUG_SCRIPT, defScript);
    }
    URI[] fileCache = DistributedCache.getCacheFiles(conf);
    if (fileCache == null) {
        fileCache = new URI[1];
    } else {
        URI[] tmp = new URI[fileCache.length + 1];
        System.arraycopy(fileCache, 0, tmp, 1, fileCache.length);
        fileCache = tmp;
    }
    try {
        fileCache[0] = new URI(exec);
    } catch (URISyntaxException e) {
        String msg = "Problem parsing executable URI " + exec;
        IOException ie = new IOException(msg);
        ie.initCause(e);
        throw ie;
    }
    DistributedCache.setCacheFiles(fileCache, conf);
}

From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java

License:Apache License

public int run(String[] args) throws Exception {
    CommandLineParser cli = new CommandLineParser();
    if (args.length == 0) {
        cli.printUsage();/*  w  w w  . j ava 2s. c om*/
        return 1;
    }
    try {
        Job job = new Job(new Configuration());
        job.setJobName(getClass().getName());
        Configuration conf = job.getConfiguration();
        CommandLine results = cli.parse(conf, args);
        if (results.hasOption("input")) {
            Path path = new Path(results.getOptionValue("input"));
            FileInputFormat.setInputPaths(job, path);
        }
        if (results.hasOption("output")) {
            Path path = new Path(results.getOptionValue("output"));
            FileOutputFormat.setOutputPath(job, path);
        }
        if (results.hasOption("jar")) {
            job.setJar(results.getOptionValue("jar"));
        }
        if (results.hasOption("inputformat")) {
            explicitInputFormat = true;
            setIsJavaRecordReader(conf, true);
            job.setInputFormatClass(getClass(results, "inputformat", conf, InputFormat.class));
        }
        if (results.hasOption("javareader")) {
            setIsJavaRecordReader(conf, true);
        }
        if (results.hasOption("map")) {
            setIsJavaMapper(conf, true);
            job.setMapperClass(getClass(results, "map", conf, Mapper.class));
        }
        if (results.hasOption("partitioner")) {
            job.setPartitionerClass(getClass(results, "partitioner", conf, Partitioner.class));
        }
        if (results.hasOption("reduce")) {
            setIsJavaReducer(conf, true);
            job.setReducerClass(getClass(results, "reduce", conf, Reducer.class));
        }
        if (results.hasOption("reduces")) {
            job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces")));
        }
        if (results.hasOption("writer")) {
            explicitOutputFormat = true;
            setIsJavaRecordWriter(conf, true);
            job.setOutputFormatClass(getClass(results, "writer", conf, OutputFormat.class));
        }
        if (results.hasOption("lazyOutput")) {
            if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) {
                LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormatClass());
            }
        }
        if (results.hasOption("avroInput")) {
            avroInput = AvroIO.valueOf(results.getOptionValue("avroInput").toUpperCase());
        }
        if (results.hasOption("avroOutput")) {
            avroOutput = AvroIO.valueOf(results.getOptionValue("avroOutput").toUpperCase());
        }

        if (results.hasOption("program")) {
            setExecutable(conf, results.getOptionValue("program"));
        }
        // if they gave us a jar file, include it into the class path
        String jarFile = job.getJar();
        if (jarFile != null) {
            final URL[] urls = new URL[] { FileSystem.getLocal(conf).pathToFile(new Path(jarFile)).toURL() };
            // FindBugs complains that creating a URLClassLoader should be
            // in a doPrivileged() block.
            ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {
                public ClassLoader run() {
                    return new URLClassLoader(urls);
                }
            });
            conf.setClassLoader(loader);
        }
        setupPipesJob(job);
        return job.waitForCompletion(true) ? 0 : 1;
    } catch (ParseException pe) {
        LOG.info("Error : " + pe);
        cli.printUsage();
        return 1;
    }
}

From source file:org.apache.blur.mapreduce.lib.BlurMapReduceUtil.java

License:Apache License

/**
 * Add the Blur dependency jars as well as jars for any of the configured job
 * classes to the job configuration, so that JobClient will ship them to the
 * cluster and add them to the DistributedCache.
 *//*from  w  ww  . jav  a 2s .c o  m*/
public static void addDependencyJars(Job job) throws IOException {
    try {
        addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class,
                job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(),
                job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(),
                job.getPartitionerClass(), job.getCombinerClass(), DocumentVisibility.class);
        addAllJarsInBlurLib(job.getConfiguration());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:org.apache.crunch.impl.spark.SparkRuntime.java

License:Apache License

private void monitorLoop() {
    status.set(Status.RUNNING);//from w w  w  . j  a va2 s .c o m
    Map<PCollectionImpl<?>, Set<SourceTarget<?>>> targetDeps = Maps
            .<PCollectionImpl<?>, PCollectionImpl<?>, Set<SourceTarget<?>>>newTreeMap(DEPTH_COMPARATOR);
    for (PCollectionImpl<?> pcollect : outputTargets.keySet()) {
        targetDeps.put(pcollect, pcollect.getTargetDependencies());
    }

    while (!targetDeps.isEmpty() && doneSignal.getCount() > 0) {
        Set<Target> allTargets = Sets.newHashSet();
        for (PCollectionImpl<?> pcollect : targetDeps.keySet()) {
            allTargets.addAll(outputTargets.get(pcollect));
        }

        Map<PCollectionImpl<?>, JavaRDDLike<?, ?>> pcolToRdd = Maps.newTreeMap(DEPTH_COMPARATOR);
        for (PCollectionImpl<?> pcollect : targetDeps.keySet()) {
            if (Sets.intersection(allTargets, targetDeps.get(pcollect)).isEmpty()) {
                JavaRDDLike<?, ?> rdd = ((SparkCollection) pcollect).getJavaRDDLike(this);
                pcolToRdd.put(pcollect, rdd);
            }
        }
        distributeFiles();
        for (Map.Entry<PCollectionImpl<?>, JavaRDDLike<?, ?>> e : pcolToRdd.entrySet()) {
            JavaRDDLike<?, ?> rdd = e.getValue();
            PType<?> ptype = e.getKey().getPType();
            Set<Target> targets = outputTargets.get(e.getKey());
            if (targets.size() > 1) {
                rdd.rdd().cache();
            }
            for (Target t : targets) {
                Configuration conf = new Configuration(getConfiguration());
                if (t instanceof MapReduceTarget) { //TODO: check this earlier
                    Converter c = t.getConverter(ptype);
                    JavaPairRDD<?, ?> outRDD;
                    if (rdd instanceof JavaRDD) {
                        outRDD = ((JavaRDD) rdd).map(new MapFunction(ptype.getOutputMapFn(), ctxt))
                                .map(new OutputConverterFunction(c));
                    } else {
                        outRDD = ((JavaPairRDD) rdd).map(new PairMapFunction(ptype.getOutputMapFn(), ctxt))
                                .map(new OutputConverterFunction(c));
                    }

                    try {
                        Job job = new Job(conf);
                        if (t instanceof PathTarget) {
                            PathTarget pt = (PathTarget) t;
                            pt.configureForMapReduce(job, ptype, pt.getPath(), null);
                            Path tmpPath = pipeline.createTempPath();
                            outRDD.saveAsNewAPIHadoopFile(tmpPath.toString(), c.getKeyClass(),
                                    c.getValueClass(), job.getOutputFormatClass(), job.getConfiguration());
                            pt.handleOutputs(job.getConfiguration(), tmpPath, -1);
                        } else if (t instanceof MapReduceTarget) {
                            MapReduceTarget mrt = (MapReduceTarget) t;
                            mrt.configureForMapReduce(job, ptype, new Path("/tmp"), null);
                            outRDD.saveAsHadoopDataset(new JobConf(job.getConfiguration()));
                        } else {
                            throw new IllegalArgumentException(
                                    "Spark execution cannot handle non-MapReduceTarget: " + t);
                        }
                    } catch (Exception et) {
                        et.printStackTrace();
                        status.set(Status.FAILED);
                        set(PipelineResult.EMPTY);
                        doneSignal.countDown();
                    }
                }
            }
        }
        for (PCollectionImpl<?> output : pcolToRdd.keySet()) {
            if (toMaterialize.containsKey(output)) {
                MaterializableIterable mi = toMaterialize.get(output);
                if (mi.isSourceTarget()) {
                    output.materializeAt((SourceTarget) mi.getSource());
                }
            }
            targetDeps.remove(output);
        }
    }
    if (status.get() != Status.FAILED || status.get() != Status.KILLED) {
        status.set(Status.SUCCEEDED);
        result = new PipelineResult(ImmutableList.of(new PipelineResult.StageResult("Spark", null)),
                Status.SUCCEEDED);
        set(result);
    } else {
        set(PipelineResult.EMPTY);
    }
    doneSignal.countDown();
}

From source file:org.apache.kudu.mapreduce.KuduTableMapReduceUtil.java

License:Apache License

/**
 * Add the Kudu dependency jars as well as jars for any of the configured
 * job classes to the job configuration, so that JobClient will ship them
 * to the cluster and add them to the DistributedCache.
 *//*from   w  w w  . j a  v  a  2s.  c om*/
public static void addDependencyJars(Job job) throws IOException {
    addKuduDependencyJars(job.getConfiguration());
    try {
        addDependencyJars(job.getConfiguration(),
                // when making changes here, consider also mapred.TableMapReduceUtil
                // pull job classes
                job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(),
                job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(),
                job.getPartitionerClass(), job.getCombinerClass());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:org.apache.mahout.classifier.svm.mapreduce.MapReduceUtil.java

License:Apache License

/**
 * Sets the dynamic parameters related to a job.
 * //w  w  w  .j  a  va 2  s  .  c  o m
 * <ol>
 * <li>The input paths can be a string composed of many paths which are
 * separated by comma (',').</li>
 * 
 * <li>The maximum split size must be positive, which determines the number of
 * mappers.</li>
 * <li>The number of reducers must be nonegative.</li>
 * </ol>
 * 
 * @param job
 * @param inputPaths
 * @param outputPath
 * @param maxSplitSize
 * @param numReducers
 * @throws java.io.IOException
 * @throws ClassNotFoundException
 */
public static void setJobDynamicParameters(Job job, String inputPaths, String outputPath, long maxSplitSize,
        int numReducers) throws IOException, ClassNotFoundException {
    // input path
    if (null != inputPaths) {
        FileInputFormat.addInputPaths(job, inputPaths);
    } else {
        throw new IOException("[hadoop][job] input path is not specified");
    }

    // output path
    if (null != outputPath) {
        FileOutputFormat.setOutputPath(job, new Path(outputPath));
    } else if (job.getOutputFormatClass().getName().equals(NULL_OUTPUT_FILE_FORMAT_CLASS_NAME))
        ;
    else {
        throw new IOException("[hadoop][job] output path is not specified");
    }

    // maximum split size which determines the number of mappers
    if (maxSplitSize > 0) {
        job.getConfiguration().set("mapred.min.split.size", "0");
        job.getConfiguration().set("mapred.max.split.size", maxSplitSize + "");
    } else {
        throw new IOException("[hadoop][job] maximum split size must be positive");
    }

    // the number of reducers
    if (numReducers >= 0) {
        job.setNumReduceTasks(numReducers);
    } else {
        throw new IOException("[hadoop][job] number of reducers must be nonnegative");
    }
}

From source file:org.cloudgraph.hbase.mapreduce.GraphMapReduceSetup.java

License:Apache License

/**
 * Add the HBase dependency jars as well as jars for any of the configured job
 * classes to the job configuration, so that JobClient will ship them to the
 * cluster and add them to the DistributedCache.
 *///from ww w  .  j  a  va2 s  .com
public static void addDependencyJars(Job job) throws IOException {
    try {
        addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class,
                com.google.protobuf.Message.class, com.google.common.collect.ImmutableSet.class,
                org.apache.hadoop.hbase.util.Bytes.class, // one class from
                // hbase.jar
                job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(),
                job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(),
                job.getPartitionerClass(), job.getCombinerClass());
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
}

From source file:org.janusgraph.hadoop.formats.FormatTools.java

License:Apache License

public static Class getBaseOutputFormatClass(final Job job) {
    try {/*from w w w.ja  v a 2  s  .co m*/
        if (LazyOutputFormat.class.isAssignableFrom(job.getOutputFormatClass())) {
            Class<OutputFormat> baseClass = (Class<OutputFormat>) DEFAULT_COMPAT.getJobContextConfiguration(job)
                    .getClass(LazyOutputFormat.OUTPUT_FORMAT, null);
            return (null == baseClass) ? job.getOutputFormatClass() : baseClass;
        }
        return job.getOutputFormatClass();
    } catch (Exception e) {
        return null;
    }
}

From source file:org.kiji.mapreduce.TestKijiBulkImportJobBuilder.java

License:Apache License

@Test
public void testBuildWithHFileOutput() throws Exception {
    final MapReduceJob mrjob = KijiBulkImportJobBuilder.create().withConf(getConf())
            .withInput(new TextMapReduceJobInput(new Path(mTempPath, "input")))
            .withBulkImporter(NoopBulkImporter.class)
            .withOutput(new HFileMapReduceJobOutput(mTable, new Path(mTempPath, "output"), 10)).build();

    final Job job = mrjob.getHadoopJob();
    assertEquals(TextInputFormat.class, job.getInputFormatClass());
    assertEquals(BulkImportMapper.class, job.getMapperClass());
    assertEquals(NoopBulkImporter.class,
            job.getConfiguration().getClass(KijiConfKeys.KIJI_BULK_IMPORTER_CLASS, null));
    assertEquals(IdentityReducer.class, job.getReducerClass());
    assertEquals(10, job.getNumReduceTasks());
    assertEquals(KijiHFileOutputFormat.class, job.getOutputFormatClass());
    assertEquals(TotalOrderPartitioner.class, job.getPartitionerClass());
}