Example usage for org.apache.hadoop.mapreduce Job getConfiguration

List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration() 

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.blackberry.logdriver.util.FastSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//  www. ja v a2s  .  c o m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchString = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchString input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchString = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(FastSearch.class);
    jobConf.setIfUnset("mapred.job.name", "Search Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string", Base64.encodeBase64String(searchString.getBytes("UTF-8")));

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.blackberry.logdriver.util.Grep.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from   www  .ja v  a2 s  . com

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String regex = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] regex input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    regex = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Grep.class);
    jobConf.setIfUnset("mapred.job.name", "Grep Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.grep.regex", Base64.encodeBase64String(regex.getBytes("UTF-8")));

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(GrepMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }

}

From source file:com.blackberry.logdriver.util.MultiSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*ww w .  j  a v  a2 s .  c  om*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchStringDir = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchStringDir = args[0];
    // We are going to be reading all the files in this directory a lot. So
    // let's up the replication factor by a lot so that they're easy to read.
    for (FileStatus f : fs.listStatus(new Path(searchStringDir))) {
        fs.setReplication(f.getPath(), (short) 16);
    }

    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }

    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(MultiSearch.class);
    jobConf.setIfUnset("mapred.job.name", "MultiSearch");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string.dir", searchStringDir);

    // This search is generally too fast to make good use of 128MB blocks, so
    // let's set the value to 256MB (if it's not set already)
    if (jobConf.get("mapred.max.split.size") == null) {
        jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024);
    }

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.blackberry.logdriver.util.Search.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }// w w  w. j  a  v a  2  s .c  o  m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchString = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchString input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchString = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Search.class);
    jobConf.setIfUnset("mapred.job.name", "Search Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string", searchString);

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.bol.crazypigs.HBaseStorage15.java

License:Apache License

@Override
public void setLocation(String location, Job job) throws IOException {
    Properties udfProps = getUDFProperties();
    job.getConfiguration().setBoolean("pig.noSplitCombination", true);

    m_conf = initializeLocalJobConfig(job);
    String delegationTokenSet = udfProps.getProperty(HBASE_TOKEN_SET);
    if (delegationTokenSet == null) {
        addHBaseDelegationToken(m_conf, job);
        udfProps.setProperty(HBASE_TOKEN_SET, "true");
    }//from  w  w w .  ja  v  a  2  s. c o  m

    String tablename = location;
    if (location.startsWith("hbase://")) {
        tablename = location.substring(8);
    }

    m_conf.set(TableInputFormat.INPUT_TABLE, tablename);

    String projectedFields = udfProps.getProperty(projectedFieldsName());
    if (projectedFields != null) {
        // update columnInfo_
        pushProjection((RequiredFieldList) ObjectSerializer.deserialize(projectedFields));
    }
    addFiltersWithoutColumnPrefix(columnInfo_);

    if (requiredFieldList != null) {
        Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass(),
                new String[] { contextSignature });
        p.setProperty(contextSignature + "_projectedFields", ObjectSerializer.serialize(requiredFieldList));
    }
}

From source file:com.bol.crazypigs.HBaseStorage15.java

License:Apache License

private JobConf initializeLocalJobConfig(Job job) {
    Properties udfProps = getUDFProperties();
    Configuration jobConf = job.getConfiguration();
    JobConf localConf = new JobConf(jobConf);
    if (udfProps.containsKey(HBASE_CONFIG_SET)) {
        for (Entry<Object, Object> entry : udfProps.entrySet()) {
            localConf.set((String) entry.getKey(), (String) entry.getValue());
        }/*ww  w .j a va 2s. c o m*/
    } else {
        Configuration hbaseConf = HBaseConfiguration.create();
        for (Entry<String, String> entry : hbaseConf) {
            // JobConf may have some conf overriding ones in hbase-site.xml
            // So only copy hbase config not in job config to UDFContext
            // Also avoids copying core-default.xml and core-site.xml
            // props in hbaseConf to UDFContext which would be redundant.
            if (jobConf.get(entry.getKey()) == null) {
                udfProps.setProperty(entry.getKey(), entry.getValue());
                localConf.set(entry.getKey(), entry.getValue());
            }
        }
        udfProps.setProperty(HBASE_CONFIG_SET, "true");
    }
    return localConf;
}

From source file:com.bol.crazypigs.HBaseStorage15.java

License:Apache License

@Override
public void setStoreLocation(String location, Job job) throws IOException {
    if (location.startsWith("hbase://")) {
        job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, location.substring(8));
    } else {//  ww w .  j a v  a  2s.c  o m
        job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, location);
    }

    String serializedSchema = getUDFProperties().getProperty(contextSignature + "_schema");
    if (serializedSchema != null) {
        schema_ = (ResourceSchema) ObjectSerializer.deserialize(serializedSchema);
    }

    m_conf = initializeLocalJobConfig(job);
    // Not setting a udf property and getting the hbase delegation token
    // only once like in setLocation as setStoreLocation gets different Job
    // objects for each call and the last Job passed is the one that is
    // launched. So we end up getting multiple hbase delegation tokens.
    addHBaseDelegationToken(m_conf, job);
}

From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java

License:Apache License

/**
 * Set whether the output of the job is compressed.
 * /*ww w  . java 2  s . co  m*/
 * @param job
 *            the job to modify
 * @param compress
 *            should the output of the job be compressed?
 */
public static void setCompressOutput(Job job, boolean compress) {
    job.getConfiguration().setBoolean(FileOutputFormat.COMPRESS, compress);
}

From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java

License:Apache License

/**
 * Set the {@link CompressionCodec} to be used to compress job outputs.
 * //from  w  w w .j a  va 2 s  .  co  m
 * @param job
 *            the job to modify
 * @param codecClass
 *            the {@link CompressionCodec} to be used to compress the job
 *            outputs
 */
public static void setOutputCompressorClass(Job job, Class<? extends CompressionCodec> codecClass) {
    setCompressOutput(job, true);
    job.getConfiguration().setClass(FileOutputFormat.COMPRESS_CODEC, codecClass, CompressionCodec.class);
}

From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java

License:Apache License

/**
 * Set the {@link Path} of the output directory for the map-reduce job.
 *
 * @param job/*from   w  w w .j a va 2s  .c om*/
 *            The job to modify
 * @param outputDir
 *            the {@link Path} of the output directory for the map-reduce
 *            job.
 */
public static void setOutputPath(Job job, Path outputDir) {
    try {
        outputDir = outputDir.getFileSystem(job.getConfiguration()).makeQualified(outputDir);
    } catch (IOException e) {
        // Throw the IOException as a RuntimeException to be compatible with
        // MR1
        throw new RuntimeException(e);
    }
    job.getConfiguration().set(FileOutputFormat.OUTDIR, outputDir.toString());
}