Example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass.

Prototype

public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException 

Source Link

Document

Set the OutputFormat for the job.

Usage

From source file:com.pivotal.hawq.mapreduce.parquet.HAWQParquetOutputDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf(), "HAWQParquetOutputFormat");
    job.setJarByClass(HAWQParquetOutputDriver.class);

    job.setOutputFormatClass(HAWQParquetOutputFormat.class);

    /*/*from   w ww .j a v a2 s  .  c  om*/
    // int2 int4 int8
    HAWQSchema schema = new HAWQSchema("t_int",
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT2, "col_short"),
    HAWQSchema.optional_field(HAWQPrimitiveField.PrimitiveType.INT4, "col_int"),
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT8, "col_long")
    );
    job.setMapperClass(WriteIntMapper.class);
    */

    /*
    // varchar
    HAWQSchema schema = new HAWQSchema("t_varchar",
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.VARCHAR, "col_varchar")
    );
    job.setMapperClass(WriteVarcharMapper.class);
    */

    /*
    // float4 float8
    HAWQSchema schema = new HAWQSchema("t_floating",
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT4, "col_float"),
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT8, "col_long")
    );
    job.setMapperClass(WriteFloatingNumberMapper.class);
    */

    // boolean
    //      HAWQSchema schema = new HAWQSchema("t_boolean",
    //            HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BOOL, "col_bool"));
    //      job.setMapperClass(WriteBooleanMapper.class);

    // byte array
    HAWQSchema schema = new HAWQSchema("t_bytea",
            HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BYTEA, "col_bytea"));
    job.setMapperClass(WriteByteArrayMapper.class);

    HAWQParquetOutputFormat.setSchema(job, schema);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    HAWQParquetOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setNumReduceTasks(0);

    job.setMapOutputKeyClass(Void.class);
    job.setMapOutputValueClass(HAWQRecord.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.placeiq.piqconnect.BlocksBuilder.java

License:Apache License

protected Job configStage1() throws Exception {
    FileSystem fs = FileSystem.get(getConf());
    fs.delete(pathOutput, true); // useful ?

    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);
    conf.setBoolean(Constants.PROP_IS_VECTOR, isVector);
    conf.set("mapred.output.compression.type", "BLOCK"); // useful ?

    Job job = new Job(conf, "data-piqid.piqconnect.BlocksBuilder");
    job.setJarByClass(BlocksBuilder.class);
    job.setMapperClass(MapStage1.class);
    job.setReducerClass(RedStage1.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setMapOutputKeyClass(BlockIndexWritable.class);
    job.setMapOutputValueClass(LightBlockWritable.class);
    job.setOutputKeyClass(BlockIndexWritable.class);
    job.setOutputValueClass(BlockWritable.class);

    FileInputFormat.setInputPaths(job, pathEdges);
    SequenceFileOutputFormat.setOutputPath(job, pathOutput);
    SequenceFileOutputFormat.setCompressOutput(job, true);

    Runner.setCompression(job);/*w w  w .  ja  v a 2s .  com*/

    return job;
}

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

private Job buildJob1(Path input1, Path input2, Path output) throws Exception {
    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);
    conf.set("mapred.output.compression.type", "BLOCK");

    Job job = new Job(conf, "data-piqid.piqconnect.IterationStage1");
    job.setJarByClass(Runner.class);

    job.setMapperClass(IterationStage1._Mapper.class);
    job.setReducerClass(IterationStage1._Reducer.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setMapOutputKeyClass(IterationStage1.JoinKey.class);
    job.setMapOutputValueClass(BlockWritable.class);
    job.setOutputKeyClass(VLongWritable.class);
    job.setOutputValueClass(BlockWritable.class);
    job.setGroupingComparatorClass(IterationStage1.IndexComparator.class);
    job.setPartitionerClass(IterationStage1.IndexPartitioner.class);
    job.setSortComparatorClass(IterationStage1.SortComparator.class);

    FileInputFormat.setInputPaths(job, input1, input2);
    SequenceFileOutputFormat.setOutputPath(job, output);
    SequenceFileOutputFormat.setCompressOutput(job, true);

    setCompression(job);//w  ww  .j a  v  a 2 s.c  o m

    return job;
}

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

private Job buildJob2(Path input, Path output) throws Exception {
    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);

    Job job = new Job(conf, "data-piqid.piqconnect.IterationStage2");
    job.setJarByClass(Runner.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(IterationStage2._Reducer.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapOutputKeyClass(VLongWritable.class);
    job.setMapOutputValueClass(BlockWritable.class);
    job.setOutputKeyClass(BlockIndexWritable.class);
    job.setOutputValueClass(BlockWritable.class);
    job.setSortComparatorClass(VLongWritableComparator.class);

    SequenceFileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);
    FileOutputFormat.setCompressOutput(job, true);

    setCompression(job);//ww  w .  ja v a  2s  . c om
    return job;
}

From source file:com.rim.logdriver.util.Cat.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }// w  w w.  j  a  va  2s  . co m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 2) {
        System.out.println("usage: [genericOptions] input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    for (int i = 0; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Cat.class);
    jobConf.setIfUnset("mapred.job.name", "Cat Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(CatMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.rim.logdriver.util.FastSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*from w  w  w . j  a  v a  2  s  . com*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchString = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchString input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchString = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(FastSearch.class);
    jobConf.setIfUnset("mapred.job.name", "Search Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string", Base64.encodeBase64String(searchString.getBytes("UTF-8")));

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.rim.logdriver.util.Grep.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//  w  w  w. ja  va  2  s  .co  m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String regex = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] regex input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    regex = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Grep.class);
    jobConf.setIfUnset("mapred.job.name", "Grep Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.grep.regex", Base64.encodeBase64String(regex.getBytes("UTF-8")));

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(GrepMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }

}

From source file:com.rim.logdriver.util.MultiSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*  ww  w. j  a v a 2 s  . com*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchStringDir = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchStringDir = args[0];
    // We are going to be reading all the files in this directory a lot. So
    // let's up the replication factor by a lot so that they're easy to read.
    for (FileStatus f : fs.listStatus(new Path(searchStringDir))) {
        fs.setReplication(f.getPath(), (short) 16);
    }

    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }

    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(MultiSearch.class);
    jobConf.setIfUnset("mapred.job.name", "MultiSearch");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string.dir", searchStringDir);

    // This search is generally too fast to make good use of 128MB blocks, so
    // let's set the value to 256MB (if it's not set already)
    if (jobConf.get("mapred.max.split.size") == null) {
        jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024);
    }

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.rim.logdriver.util.Search.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*from   w  w w . ja  va  2  s .  com*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchString = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchString input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchString = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Search.class);
    jobConf.setIfUnset("mapred.job.name", "Search Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string", searchString);

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.rockstor.compact.CompactDataTool.java

License:Apache License

private Job createSubmittableJob(Configuration conf) throws IOException {
    Job job = new Job(conf, NAME);
    job.setJarByClass(CompactDataTool.class);

    job.setInputFormatClass(CompactDirInputFormat.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setMapOutputKeyClass(NullWritable.class);

    job.setMapperClass(CompactDataMapper.class);
    job.setNumReduceTasks(0);//from   w ww .  jav a2s .  c o m
    job.setOutputFormatClass(NullOutputFormat.class);
    LOG.info("init job " + NAME + " OK");
    return job;
}