Example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass.

Prototype

public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException 

Source Link

Document

Set the OutputFormat for the job.

Usage

From source file:com.aliyun.openservices.tablestore.hadoop.TableStoreOutputFormatExample.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (!parseArgs(args)) {
        printUsage();// www.  j a v a 2s .c o  m
        System.exit(1);
    }
    if (endpoint == null || accessKeyId == null || accessKeySecret == null || inputTable == null
            || outputTable == null) {
        printUsage();
        System.exit(1);
    }

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, TableStoreOutputFormatExample.class.getName());
    job.setMapperClass(OwnerMapper.class);
    job.setReducerClass(IntoTableReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(MapWritable.class);
    job.setInputFormatClass(TableStoreInputFormat.class);
    job.setOutputFormatClass(TableStoreOutputFormat.class);

    TableStore.setCredential(job, accessKeyId, accessKeySecret, securityToken);
    TableStore.setEndpoint(job, endpoint, instance);
    TableStoreInputFormat.addCriteria(job, fetchCriteria());
    TableStoreOutputFormat.setOutputTable(job, outputTable);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.CalcFeatureWeights.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 3 && otherArgs.length != 4) {
        System.err.println("CalcFeatureWeights <inDir> <tmpDir> <outDir> [startStep]");
        System.exit(-1);// w w w. j a  v a2s.com
    }
    int startStep = 1;
    if (otherArgs.length == 4) {
        startStep = Integer.valueOf(otherArgs[otherArgs.length - 1]);
    }
    FileSystem fs = FileSystem.get(conf);
    if (startStep <= 1) {
        System.out.println("calc");
        fs.delete(new Path(otherArgs[1]), true);
        Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName());
        job.setNumReduceTasks(1);
        job.setJarByClass(CalcFeatureWeights.class);
        job.setMapperClass(CalcFeatureMapper.class);
        job.setReducerClass(CalcFeatureReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(MyKey.class);

        job.setOutputKeyClass(MyKey.class);
        job.setOutputValueClass(MyValue.class);
        FileInputFormat.setInputPaths(job, new Path(otherArgs[0]));

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("step1 failed");
            return;
        }
    }

    if (startStep <= 2)
    // sort
    {
        fs.delete(new Path(otherArgs[2]), true);
        System.out.println("sort");
        Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName());

        job.setNumReduceTasks(1);
        job.setJarByClass(CalcFeatureWeights.class);
        job.setMapperClass(IdentityMapper.class);
        job.setReducerClass(IdentityReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);

        job.setMapOutputKeyClass(MyKey.class);
        job.setMapOutputValueClass(MyValue.class);
        job.setOutputKeyClass(MyKey.class);
        job.setOutputValueClass(MyValue.class);

        FileInputFormat.setInputPaths(job, new Path(otherArgs[1]));

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("step2 failed");
            return;
        }
    }

}

From source file:com.antbrains.crf.hadoop.ParallelTraining.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 4) {
        System.err.println("ParallelTraining <instanceDir> <outDir> <featurecount> <training-params>");
        System.exit(-1);//  w  w  w  .j a  v a 2 s.c om
    }
    int featureCount = Integer.valueOf(otherArgs[2]);
    // conf.set("tc", object2String(tc));

    conf.set("pt.iterate", "1");
    conf.set("pt.featureCount", featureCount + "");

    TrainingParams params = SgdCrf.loadParams(otherArgs[3]);
    System.out.println(new Gson().toJson(params));
    conf.set("pt.params", object2String(params));

    Job job = new Job(conf, ParallelTraining.class.getSimpleName());

    job.setJarByClass(ParallelTraining.class);
    job.setMapperClass(TrainingMapper.class);
    job.setReducerClass(TrainingReducer.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(TrainingWeights.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.ParallelTraining2.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    FileSystem fs = FileSystem.get(conf);
    TrainingParams params = SgdCrf.loadParams(otherArgs[3]);
    System.out.println(new Gson().toJson(params));

    if (otherArgs.length != 5) {
        System.err.println(/*from  w w  w. ja  v  a2  s .  c  o  m*/
                "ParallelTraining2 <instanceDir> <outDir> <featurecount> <training-params> <out-iter>");
        System.exit(-1);
    }
    int featureCount = Integer.valueOf(otherArgs[2]);
    // conf.set("tc", object2String(tc));
    int outIter = Integer.valueOf(otherArgs[4]);

    String prevOutDir = "";
    for (int i = 1; i <= outIter; i++) {
        System.out.println("iterator: " + i);
        conf.set("pt.iterate", i + "");
        conf.set("pt.featureCount", featureCount + "");

        conf.set("pt.params", object2String(params));
        String outDir = otherArgs[1] + "/result" + i;

        if (i > 1) {
            conf.set("paramDir", prevOutDir);
        }
        prevOutDir = outDir;
        fs.delete(new Path(outDir), true);

        Job job = new Job(conf, ParallelTraining2.class.getSimpleName());

        job.setJarByClass(ParallelTraining2.class);
        job.setMapperClass(TrainingMapper.class);
        job.setReducerClass(TrainingReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        System.out.println("outDir: " + outDir);
        FileOutputFormat.setOutputPath(job, new Path(outDir));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("iter " + i + " failed");
            break;
        }
    }
}

From source file:com.architecting.ch07.MapReduceIndexerTool.java

License:Apache License

/** API for Java clients;visible for testing;may become a public API eventually */
int run(Options options) throws Exception {
    if (getConf().getBoolean("isMR1", false) && "local".equals(getConf().get("mapred.job.tracker"))) {
        throw new IllegalStateException(
                "Running with LocalJobRunner (i.e. all of Hadoop inside a single JVM) is not supported "
                        + "because LocalJobRunner does not (yet) implement the Hadoop Distributed Cache feature, "
                        + "which is required for passing files via --files and --libjars");
    }/*from w ww .  ja  va 2 s.com*/

    long programStartTime = System.nanoTime();
    getConf().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments);

    // switch off a false warning about allegedly not implementing Tool
    // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html
    // also see https://issues.apache.org/jira/browse/HADOOP-8183
    getConf().setBoolean("mapred.used.genericoptionsparser", true);

    if (options.log4jConfigFile != null) {
        Utils.setLogConfigFile(options.log4jConfigFile, getConf());
        addDistributedCacheFile(options.log4jConfigFile, getConf());
    }

    Configuration config = HBaseConfiguration.create();
    Job job = Job.getInstance(config);
    job.setJarByClass(getClass());

    // To be able to run this example from eclipse, we need to make sure 
    // the built jar is distributed to the map-reduce tasks from the
    // local file system.
    job.addCacheArchive(new URI("file:///home/cloudera/ahae/target/ahae.jar"));

    FileSystem fs = options.outputDir.getFileSystem(job.getConfiguration());
    if (fs.exists(options.outputDir) && !delete(options.outputDir, true, fs)) {
        return -1;
    }
    Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR);
    Path outputReduceDir = new Path(options.outputDir, "reducers");

    int reducers = 1;

    Scan scan = new Scan();
    scan.addFamily(CF);
    // tag::SETUP[]
    scan.setCaching(500); // <1>
    scan.setCacheBlocks(false); // <2>

    TableMapReduceUtil.initTableMapperJob( // <3>
            options.inputTable, // Input HBase table name
            scan, // Scan instance to control what to index
            HBaseAvroToSOLRMapper.class, // Mapper to parse cells content.
            Text.class, // Mapper output key
            SolrInputDocumentWritable.class, // Mapper output value
            job);

    FileOutputFormat.setOutputPath(job, outputReduceDir);

    job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(HBaseAvroToSOLRMapper.class));
    job.setReducerClass(SolrReducer.class); // <4>
    job.setPartitionerClass(SolrCloudPartitioner.class); // <5>
    job.getConfiguration().set(SolrCloudPartitioner.ZKHOST, options.zkHost);
    job.getConfiguration().set(SolrCloudPartitioner.COLLECTION, options.collection);
    job.getConfiguration().setInt(SolrCloudPartitioner.SHARDS, options.shards);

    job.setOutputFormatClass(SolrOutputFormat.class);
    SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SolrInputDocumentWritable.class);
    job.setSpeculativeExecution(false);
    // end::SETUP[]
    job.setNumReduceTasks(reducers); // Set the number of reducers based on the number of shards we have.
    if (!waitForCompletion(job, true)) {
        return -1;// job failed
    }

    // -------------------------------------------------------------------------------------------------------------------------------------

    assert reducers == options.shards;

    // normalize output shard dir prefix, i.e.
    // rename part-r-00000 to part-00000 (stems from zero tree merge iterations)
    // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations)
    for (FileStatus stats : fs.listStatus(outputReduceDir)) {
        String dirPrefix = SolrOutputFormat.getOutputName(job);
        Path srcPath = stats.getPath();
        if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) {
            String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length());
            Path dstPath = new Path(srcPath.getParent(), dstName);
            if (!rename(srcPath, dstPath, fs)) {
                return -1;
            }
        }
    }
    ;

    // publish results dir
    if (!rename(outputReduceDir, outputResultsDir, fs)) {
        return -1;
    }

    if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(job, outputResultsDir, fs))) {
        return -1;
    }

    goodbye(job, programStartTime);
    return 0;
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java

License:Apache License

/**
 * Test for map only job./*from w w w  .j  ava  2s. c o  m*/
 * @throws Exception if failed
 */
@Test
public void map_only() throws Exception {
    Job job = newJob();
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapperClass(SimpleMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    File inputDir = folder.newFolder();
    File inputFile = new File(inputDir, "input.txt");
    write(inputFile, "Hello, world!");

    File outputDir = folder.newFolder();
    outputDir.delete();

    FileInputFormat.setInputPaths(job, new Path(inputFile.toURI()));
    FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI()));
    assertThat(new SimpleJobRunner().run(job), is(true));
    assertThat(trimHead(read(outputDir)), is(set("Hello, world!")));
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java

License:Apache License

/**
 * Test for map-reduce job./*from w w  w  . j  a va 2  s .  c  o  m*/
 * @throws Exception if failed
 */
@Test
public void map_reduce() throws Exception {
    Job job = newJob();
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(WordCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setSortComparatorClass(Text.Comparator.class);
    job.setGroupingComparatorClass(Text.Comparator.class);

    job.setReducerClass(WordCountReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    File inputDir = folder.newFolder();
    File inputFile = new File(inputDir, "input.txt");
    write(inputFile, new String[] { "a b c d", "a a b c", "c", });

    File outputDir = folder.newFolder();
    outputDir.delete();

    FileInputFormat.setInputPaths(job, new Path(inputFile.toURI()));
    FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI()));
    assertThat(new SimpleJobRunner().run(job), is(true));
    assertThat(toMap(read(outputDir)), is(map(new String[] { "a", "3", "b", "2", "c", "3", "d", "1", })));
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java

License:Apache License

/**
 * Test for wrong job./*from  www . j av a  2 s  .  c o m*/
 * @throws Exception if failed
 */
@Test
public void exception() throws Exception {
    Job job = newJob();
    job.setJobName("w/ exception");
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapperClass(InvalidMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    File inputDir = folder.newFolder();
    File inputFile = new File(inputDir, "input.txt");
    write(inputFile, "testing");

    File outputDir = folder.newFolder();
    outputDir.delete();

    FileInputFormat.setInputPaths(job, new Path(inputFile.toURI()));
    FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI()));
    assertThat(new SimpleJobRunner().run(job), is(false));
}

From source file:com.asakusafw.runtime.stage.AbstractStageClient.java

License:Apache License

private void configureStageOutput(Job job, VariableTable variables) throws IOException {
    String outputPath = variables.parse(getStageOutputPath());
    List<StageOutput> outputList = new ArrayList<>();
    for (StageOutput output : getStageOutputs()) {
        String name = output.getName();
        Class<?> keyClass = output.getKeyClass();
        Class<?> valueClass = output.getValueClass();
        Class<? extends OutputFormat<?, ?>> formatClass = output.getFormatClass();
        Map<String, String> attributes = output.getAttributes();
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format(
                    "Output: path={0}/{1}-*, format={2}, key={3}, value={4}, attributes={5}", //$NON-NLS-1$
                    outputPath, name, formatClass.getName(), keyClass.getName(), valueClass.getName(),
                    attributes));//from  ww w  . j  a  v a  2 s.c  o  m
        }
        outputList.add(new StageOutput(name, keyClass, valueClass, formatClass, attributes));
    }
    StageOutputDriver.set(job, outputPath, outputList);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setOutputFormatClass(StageOutputFormat.class);
    job.getConfiguration().setClass("mapred.output.committer.class", //$NON-NLS-1$
            LegacyBridgeOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class);
}

From source file:com.asakusafw.runtime.stage.output.StageOutputDriver.java

License:Apache License

private ResultOutput<?> buildNormalSink(String name,
        @SuppressWarnings("rawtypes") Class<? extends OutputFormat> formatClass, Class<?> keyClass,
        Class<?> valueClass, List<Counter> counters) throws IOException, InterruptedException {
    assert context != null;
    assert name != null;
    assert formatClass != null;
    assert keyClass != null;
    assert valueClass != null;
    assert counters != null;
    Job job = JobCompatibility.newJob(context.getConfiguration());
    job.setOutputFormatClass(formatClass);
    job.setOutputKeyClass(keyClass);//from  w  w w .  j  a  va 2 s . c  om
    job.setOutputValueClass(valueClass);
    TaskAttemptContext localContext = JobCompatibility.newTaskAttemptContext(job.getConfiguration(),
            context.getTaskAttemptID());
    if (FileOutputFormat.class.isAssignableFrom(formatClass)) {
        setOutputFilePrefix(localContext, name);
    }
    OutputFormat<?, ?> format = ReflectionUtils.newInstance(formatClass, localContext.getConfiguration());
    RecordWriter<?, ?> writer = format.getRecordWriter(localContext);
    return new ResultOutput<Writable>(localContext, writer);
}