Example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass.

Prototype

public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException

Source Link

Document

Set the OutputFormat for the job.

Usage

From source file:com.pivotal.hawq.mapreduce.parquet.HAWQParquetOutputDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf(), "HAWQParquetOutputFormat");
    job.setJarByClass(HAWQParquetOutputDriver.class);

    job.setOutputFormatClass(HAWQParquetOutputFormat.class);

    /*/*from   w ww .j a v a2 s  .  c  om*/
    // int2 int4 int8
    HAWQSchema schema = new HAWQSchema("t_int",
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT2, "col_short"),
    HAWQSchema.optional_field(HAWQPrimitiveField.PrimitiveType.INT4, "col_int"),
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.INT8, "col_long")
    );
    job.setMapperClass(WriteIntMapper.class);
    */

    /*
    // varchar
    HAWQSchema schema = new HAWQSchema("t_varchar",
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.VARCHAR, "col_varchar")
    );
    job.setMapperClass(WriteVarcharMapper.class);
    */

    /*
    // float4 float8
    HAWQSchema schema = new HAWQSchema("t_floating",
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT4, "col_float"),
    HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.FLOAT8, "col_long")
    );
    job.setMapperClass(WriteFloatingNumberMapper.class);
    */

    // boolean
    //      HAWQSchema schema = new HAWQSchema("t_boolean",
    //            HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BOOL, "col_bool"));
    //      job.setMapperClass(WriteBooleanMapper.class);

    // byte array
    HAWQSchema schema = new HAWQSchema("t_bytea",
            HAWQSchema.required_field(HAWQPrimitiveField.PrimitiveType.BYTEA, "col_bytea"));
    job.setMapperClass(WriteByteArrayMapper.class);

    HAWQParquetOutputFormat.setSchema(job, schema);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    HAWQParquetOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setNumReduceTasks(0);

    job.setMapOutputKeyClass(Void.class);
    job.setMapOutputValueClass(HAWQRecord.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.placeiq.piqconnect.BlocksBuilder.java

License:Apache License

protected Job configStage1() throws Exception {
    FileSystem fs = FileSystem.get(getConf());
    fs.delete(pathOutput, true); // useful ?

    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);
    conf.setBoolean(Constants.PROP_IS_VECTOR, isVector);
    conf.set("mapred.output.compression.type", "BLOCK"); // useful ?

    Job job = new Job(conf, "data-piqid.piqconnect.BlocksBuilder");
    job.setJarByClass(BlocksBuilder.class);
    job.setMapperClass(MapStage1.class);
    job.setReducerClass(RedStage1.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setMapOutputKeyClass(BlockIndexWritable.class);
    job.setMapOutputValueClass(LightBlockWritable.class);
    job.setOutputKeyClass(BlockIndexWritable.class);
    job.setOutputValueClass(BlockWritable.class);

    FileInputFormat.setInputPaths(job, pathEdges);
    SequenceFileOutputFormat.setOutputPath(job, pathOutput);
    SequenceFileOutputFormat.setCompressOutput(job, true);

    Runner.setCompression(job);/*w w  w .  ja  v a 2s .  com*/

    return job;
}

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

private Job buildJob1(Path input1, Path input2, Path output) throws Exception {
    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);
    conf.set("mapred.output.compression.type", "BLOCK");

    Job job = new Job(conf, "data-piqid.piqconnect.IterationStage1");
    job.setJarByClass(Runner.class);

    job.setMapperClass(IterationStage1._Mapper.class);
    job.setReducerClass(IterationStage1._Reducer.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setMapOutputKeyClass(IterationStage1.JoinKey.class);
    job.setMapOutputValueClass(BlockWritable.class);
    job.setOutputKeyClass(VLongWritable.class);
    job.setOutputValueClass(BlockWritable.class);
    job.setGroupingComparatorClass(IterationStage1.IndexComparator.class);
    job.setPartitionerClass(IterationStage1.IndexPartitioner.class);
    job.setSortComparatorClass(IterationStage1.SortComparator.class);

    FileInputFormat.setInputPaths(job, input1, input2);
    SequenceFileOutputFormat.setOutputPath(job, output);
    SequenceFileOutputFormat.setCompressOutput(job, true);

    setCompression(job);//w  ww  .j a  v  a 2 s.c  o m

    return job;
}

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

private Job buildJob2(Path input, Path output) throws Exception {
    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);

    Job job = new Job(conf, "data-piqid.piqconnect.IterationStage2");
    job.setJarByClass(Runner.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(IterationStage2._Reducer.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapOutputKeyClass(VLongWritable.class);
    job.setMapOutputValueClass(BlockWritable.class);
    job.setOutputKeyClass(BlockIndexWritable.class);
    job.setOutputValueClass(BlockWritable.class);
    job.setSortComparatorClass(VLongWritableComparator.class);

    SequenceFileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);
    FileOutputFormat.setCompressOutput(job, true);

    setCompression(job);//ww  w .  ja v a  2s  . c om
    return job;
}

From source file:com.rim.logdriver.util.Cat.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }// w  w w.  j  a  va  2s  . co m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 2) {
        System.out.println("usage: [genericOptions] input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    for (int i = 0; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Cat.class);
    jobConf.setIfUnset("mapred.job.name", "Cat Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(CatMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.rim.logdriver.util.FastSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*from w  w  w . j  a  v a  2  s  . com*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchString = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchString input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchString = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(FastSearch.class);
    jobConf.setIfUnset("mapred.job.name", "Search Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string", Base64.encodeBase64String(searchString.getBytes("UTF-8")));

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.rim.logdriver.util.Grep.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//  w  w  w. ja  va  2  s  .co  m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String regex = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] regex input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    regex = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Grep.class);
    jobConf.setIfUnset("mapred.job.name", "Grep Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.grep.regex", Base64.encodeBase64String(regex.getBytes("UTF-8")));

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(GrepMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }

}

From source file:com.rim.logdriver.util.MultiSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*  ww  w. j  a v a 2 s  . com*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchStringDir = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchStringDir = args[0];
    // We are going to be reading all the files in this directory a lot. So
    // let's up the replication factor by a lot so that they're easy to read.
    for (FileStatus f : fs.listStatus(new Path(searchStringDir))) {
        fs.setReplication(f.getPath(), (short) 16);
    }

    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }

    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(MultiSearch.class);
    jobConf.setIfUnset("mapred.job.name", "MultiSearch");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string.dir", searchStringDir);

    // This search is generally too fast to make good use of 128MB blocks, so
    // let's set the value to 256MB (if it's not set already)
    if (jobConf.get("mapred.max.split.size") == null) {
        jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024);
    }

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.rim.logdriver.util.Search.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*from   w  w w . ja  va  2  s .  com*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchString = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchString input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchString = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Search.class);
    jobConf.setIfUnset("mapred.job.name", "Search Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string", searchString);

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.rockstor.compact.CompactDataTool.java

License:Apache License

private Job createSubmittableJob(Configuration conf) throws IOException {
    Job job = new Job(conf, NAME);
    job.setJarByClass(CompactDataTool.class);

    job.setInputFormatClass(CompactDirInputFormat.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setMapOutputKeyClass(NullWritable.class);

    job.setMapperClass(CompactDataMapper.class);
    job.setNumReduceTasks(0);//from   w ww .  jav a2s .  c o m
    job.setOutputFormatClass(NullOutputFormat.class);
    LOG.info("init job " + NAME + " OK");
    return job;
}