Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath.

Prototype

public static void setOutputPath(Job job, Path outputDir)

Source Link

Document

Set the Path of the output directory for the map-reduce job.

Usage

From source file:com.igalia.metamail.jobs.MessagesByTimePeriod.java

License:Open Source License

private static Job setupJob() throws IOException, InterruptedException, ClassNotFoundException {
    Configuration config = HBaseConfiguration.create();
    Job job = new Job(config, "MessagesByTimePeriod");
    job.setJarByClass(MessagesByTimePeriod.class);

    Scan scan = new Scan();
    scan.setCaching(500);//from w  w w.j  av a2s  .co m
    scan.setCacheBlocks(false); // don't set to true for MR jobs

    // Mapper
    TableMapReduceUtil.initTableMapperJob(mailsTable, // input HBase table name
            scan, // Scan instance to control CF and attribute selection
            MessagesByTimePeriod.MessagesByTimePeriodMapper.class, Text.class, IntWritable.class, job);

    // Reducer
    job.setReducerClass(MessagesByTimePeriod.MessagesByTimePeriodReducer.class);
    job.setNumReduceTasks(1);

    FileOutputFormat.setOutputPath(job, new Path(MessagesByTimePeriod.MAIL_OUT));

    return job;
}

From source file:com.igalia.wordcount.WordCount.java

License:Open Source License

public int run(String[] arg0) throws Exception {
    Job job = new Job(getConf());
    job.setJarByClass(WordCount.class);
    job.setJobName("wordcount");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    FileInputFormat.setInputPaths(job, new Path("/tmp/wordcount/in"));
    FileOutputFormat.setOutputPath(job, new Path("/tmp/wordcount/out"));

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:com.impetus.code.examples.hadoop.cassandra.wordcount.WordCount.java

License:Apache License

public int run(String[] args) throws Exception {
    String outputReducerType = "cassandra";
    if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) {
        String[] s = args[0].split("=");
        if (s != null && s.length == 2)
            outputReducerType = s[1];/*from  ww w.j a v  a 2 s  . com*/
    }
    logger.info("output reducer type: " + outputReducerType);

    for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) {
        String columnName = "text" + i;
        getConf().set(CONF_COLUMN_NAME, columnName);

        Job job = new Job(getConf(), "wordcount");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);

        if (outputReducerType.equalsIgnoreCase("filesystem")) {
            job.setCombinerClass(ReducerToFilesystem.class);
            job.setReducerClass(ReducerToFilesystem.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i));
        } else {
            job.setReducerClass(ReducerToCassandra.class);

            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setOutputKeyClass(ByteBuffer.class);
            job.setOutputValueClass(List.class);

            job.setOutputFormatClass(ColumnFamilyOutputFormat.class);

            ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY);
        }

        job.setInputFormatClass(ColumnFamilyInputFormat.class);

        ConfigHelper.setRpcPort(job.getConfiguration(), "9160");
        ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner");
        ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, INPUT_COLUMN_FAMILY);
        SlicePredicate predicate = new SlicePredicate()
                .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName)));
        ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

        job.waitForCompletion(true);
    }
    return 0;
}

From source file:com.impetus.code.examples.hadoop.cassandra.wordcount.WordCountCounters.java

License:Apache License

public int run(String[] args) throws Exception {
    Job job = new Job(getConf(), "wordcountcounters");
    job.setJarByClass(WordCountCounters.class);
    job.setMapperClass(SumMapper.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX));

    job.setInputFormatClass(ColumnFamilyInputFormat.class);

    ConfigHelper.setRpcPort(job.getConfiguration(), "9160");
    ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost");
    ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner");
    ConfigHelper.setInputColumnFamily(job.getConfiguration(), WordCount.KEYSPACE,
            WordCountCounters.COUNTER_COLUMN_FAMILY);
    SlicePredicate predicate = new SlicePredicate()
            .setSlice_range(new SliceRange().setStart(ByteBufferUtil.EMPTY_BYTE_BUFFER)
                    .setFinish(ByteBufferUtil.EMPTY_BYTE_BUFFER).setCount(100));
    ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

    job.waitForCompletion(true);// ww  w .j a  va  2s  .  c o m
    return 0;
}

From source file:com.impetus.code.examples.hadoop.mapred.earthquake.EarthQuakeAnalyzer.java

License:Apache License

public static void main(String[] args) throws Throwable {

    Job job = new Job();
    job.setJarByClass(EarthQuakeAnalyzer.class);
    FileInputFormat.addInputPath(job, new Path("src/main/resources/eq/input"));
    FileOutputFormat.setOutputPath(job, new Path("src/main/resources/eq/output"));

    job.setMapperClass(EarthQuakeMapper.class);
    job.setReducerClass(EarthQuakeReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.impetus.code.examples.hadoop.mapred.weather.MaxTemp.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MaxTemperature <input path> <output path>");
        System.exit(-1);//from  w  w  w  .jav  a 2s .c om
    }
    Job job = new Job();
    job.setJarByClass(MaxTemp.class);
    job.setJobName("Max temperature");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MaxTempMapper.class);
    job.setReducerClass(MaxTempReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
}

From source file:com.intel.hadoop.hbase.dot.KEY.java

License:Apache License

private void doMapReduce(Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass,
        String mrTableName) throws IOException, ClassNotFoundException, InterruptedException {

    this.conf.set(KEY.INPUT_TABLE, mrTableName);
    Job job = new Job(this.conf);
    job.setJobName("Generate Data for [" + mrTableName + "]");
    job.setJarByClass(GenerateTestTable.class);

    job.setInputFormatClass(inputFormatClass);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path("/tmp", "tempout");
    fs.delete(path, true);//from  w w w.j av a  2 s. c o  m

    FileOutputFormat.setOutputPath(job, path);

    job.setMapperClass(mapperClass);
    job.setNumReduceTasks(0);

    TableMapReduceUtil.addDependencyJars(job);
    // Add a Class from the hbase.jar so it gets registered too.
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hadoop.hbase.util.Bytes.class);

    TableMapReduceUtil.initCredentials(job);

    job.waitForCompletion(true);

}

From source file:com.intel.hadoop.hbase.dot.mapreduce.DotImportTsv.java

License:Apache License

/**
 * Sets up the actual job./* w ww  . j  av a2 s.  c  om*/
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
        throws IOException, ClassNotFoundException {

    // Support non-XML supported characters
    // by re-encoding the passed separator as a Base64 string.
    String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
    if (actualSeparator != null) {
        conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes()));
    }

    // See if a non-default Mapper was set
    String mapperClassName = conf.get(MAPPER_CONF_KEY);
    Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

    String tableName = args[0];
    Path inputDir = new Path(args[1]);
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(mapperClass);
    FileInputFormat.setInputPaths(job, inputDir);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(mapperClass);

    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
    if (hfileOutPath != null) {
        if (!doesTableExist(tableName)) {
            createTable(conf, tableName);
        }
        HTable table = new HTable(conf, tableName);
        job.setReducerClass(PutSortReducer.class);
        Path outputDir = new Path(hfileOutPath);
        FileOutputFormat.setOutputPath(job, outputDir);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);
        HFileOutputFormat.configureIncrementalLoad(job, table);
    } else {
        // No reducers.  Just write straight to table.  Call initTableReducerJob
        // to set up the TableOutputFormat.
        TableMapReduceUtil.initTableReducerJob(tableName, null, job);
        job.setNumReduceTasks(0);
    }

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
            com.google.common.base.Function.class /* Guava used by TsvParser */);
    return job;
}

From source file:com.j.distributed.counter.CounterJob.java

@Override
public int run(String... options) throws Exception {

    Job job = Job.getInstance(getConf(), getClass().toString());
    job.setJarByClass(getClass());/*w w w. j  a v a 2s.c o m*/

    job.setMapperClass(CounterMapper.class);
    job.setCombinerClass(CounterReducer.class);
    job.setReducerClass(CounterReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(options[0]));
    FileOutputFormat.setOutputPath(job, new Path(options[1]));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.j.distributed.sorter.SorterJob.java

@Override
public int run(String... options) throws Exception {

    Job job = Job.getInstance(getConf(), getClass().toString());
    job.setJarByClass(getClass());/*w w w  .j av a2  s .  co  m*/

    job.setMapperClass(SorterMapper.class);
    job.setCombinerClass(SorterReducer.class);
    job.setReducerClass(SorterReducer.class);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);
    job.setSortComparatorClass(LongWritable.DecreasingComparator.class);

    FileInputFormat.addInputPath(job, new Path(options[1]));
    FileOutputFormat.setOutputPath(job, new Path(options[2]));
    return job.waitForCompletion(true) ? 0 : 1;
}