Example usage for org.apache.hadoop.mapred JobConf get

List of usage examples for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name) 

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java

License:Apache License

/**
 * Add a {@link Path} to the list of inputs for the map-reduce job.
 * //from w  w  w. j av a2  s.c o m
 * @param conf The configuration of the job 
 * @param path {@link Path} to be added to the list of inputs for 
 *            the map-reduce job.
 */
public static void addInputPath(JobConf conf, Path path) {
    path = new Path(conf.getWorkingDirectory(), path);
    String dirStr = StringUtils.escapeString(path.toString());
    String dirs = conf.get("mapred.input.dir");
    conf.set("mapred.input.dir", dirs == null ? dirStr : dirs + StringUtils.COMMA_STR + dirStr);
}

From source file:com.blackberry.logdriver.mapred.avro.AvroBlockInputFormat.java

License:Apache License

@SuppressWarnings("deprecation")
@Override/*from   w w  w  . jav  a2  s.c  o m*/
public RecordReader<AvroFileHeader, BytesWritable> getRecordReader(InputSplit split, JobConf job,
        Reporter reporter) throws IOException {

    // Ensure we have sensible defaults for how we build blocks.
    if (job.get("mapreduce.job.max.split.locations") == null) {
        job.setLong("mapreduce.job.max.split.locations", MAX_SPLIT_LOCATIONS);
    }
    if (job.get("mapred.max.split.size") == null) {
        // Try to set the split size to the default block size. In case of
        // failure, we'll use this 128MB default.
        long blockSize = 128 * 1024 * 1024; // 128MB
        try {
            blockSize = FileSystem.get(job).getDefaultBlockSize();
        } catch (IOException e) {
            LOG.error("Error getting filesystem to get get default block size (this does not bode well).");
        }
        job.setLong("mapred.max.split.size", blockSize);
    }

    return new AvroBlockRecordReader(split, job);
}

From source file:com.blackberry.logdriver.mapred.boom.BoomFilterMapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    // We'll need to know the component name to know which rules to use
    String componentName = job.get("logdriver.component.name");

    // Load in the yaml file that defines the rules.
    String confFileName = job.get("logdriver.filter.file");

    try {/*www. j  a va 2 s .c o  m*/
        filters = loadFilters(componentName, new FileInputStream(confFileName));
    } catch (FileNotFoundException e) {
        LOG.error("Error loading config files.  No filters will be used.", e);
    }
    LOG.info("Initial filter set: {}", filters);
}

From source file:com.blackberry.logdriver.mapred.boom.BoomInputFormat.java

License:Apache License

@SuppressWarnings("deprecation")
@Override// w w w  .java  2s  .  co  m
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    // Ensure we have sensible defaults for how we build blocks.
    if (job.get("mapreduce.job.max.split.locations") == null) {
        job.setLong("mapreduce.job.max.split.locations", MAX_SPLIT_LOCATIONS);
    }
    if (job.get("mapred.max.split.size") == null) {
        // Try to set the split size to the default block size. In case of
        // failure, we'll use this 128MB default.
        long blockSize = 128 * 1024 * 1024; // 128MB
        try {
            blockSize = FileSystem.get(job).getDefaultBlockSize();
        } catch (IOException e) {
            LOG.error("Error getting filesystem to get get default block size (this does not bode well).");
        }
        job.setLong("mapred.max.split.size", blockSize);
    }

    return super.getSplits(job, numSplits);
}

From source file:com.blackberry.logdriver.mapred.boom.ReBoomRecordWriter.java

License:Apache License

public ReBoomRecordWriter(ReBoomOutputFormat reBoomOutputFormat, JobConf job) throws IOException {
    String taskid = job.get("mapred.task.id");
    Path path = BinaryOutputFormat.getTaskOutputPath(job, taskid + ".bm");
    FSDataOutputStream out = path.getFileSystem(job).create(path);
    writer = new ReBoomWriter(out);
}

From source file:com.blm.orc.OrcOutputFormat.java

License:Apache License

/**
 * Helper method to get a parameter first from props if present, falling back to JobConf if not.
 * Returns null if key is present in neither.
 *//*from  w  w  w.j a  va2  s .  co  m*/
private String getSettingFromPropsFallingBackToConf(String key, Properties props, JobConf conf) {
    if ((props != null) && props.containsKey(key)) {
        return props.getProperty(key);
    } else if (conf != null) {
        // If conf is not null, and the key is not present, Configuration.get() will
        // return null for us. So, we don't have to check if it contains it.
        return conf.get(key);
    } else {
        return null;
    }
}

From source file:com.chriscx.mapred.Map.java

public void configure(JobConf job) {
    caseSensitive = job.getBoolean("wordcount.case.sensitive", true);
    inputFile = job.get("map.input.file");

    if (job.getBoolean("wordcount.skip.patterns", false)) {
        Path[] patternsFiles = new Path[0];
        try {// w ww.  j  a v a2 s.  co  m
            patternsFiles = DistributedCache.getLocalCacheFiles(job);
        } catch (IOException ioe) {
            System.err.println(
                    "Caught exception while getting cached files: " + StringUtils.stringifyException(ioe));
        }
        for (Path patternsFile : patternsFiles) {
            parseSkipFile(patternsFile);
        }
    }
}

From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java

License:Apache License

public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, IColumn>> getRecordReader(
        org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {
    TaskAttemptContext tac = new TaskAttemptContext(jobConf,
            TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) {
        @Override//w  w  w .  ja v a2  s.  co  m
        public void progress() {
            reporter.progress();
        }
    };

    ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader(
            jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT));
    recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
    return recordReader;
}

From source file:com.cloudera.science.avro.streaming.AvroAsJSONInputFormat.java

License:Open Source License

private void loadSchemas(JobConf job) throws IOException {
    this.schemas = Lists.newArrayList();
    SchemaLoader loader = new SchemaLoader(job);
    String schemaLiteral = job.get(SCHEMA_LITERAL);
    if (schemaLiteral != null) {
        schemas.add(loader.loadLiteral(schemaLiteral));
        return;// ww  w.  j  av  a  2 s. c  om
    } else {
        String[] schemaUrls = job.getStrings(SCHEMA_URL);
        String[] typeNames = job.getStrings(SCHEMA_TYPE_NAME);
        if (schemaUrls != null) {
            for (String schemaUrl : schemaUrls) {
                schemas.add(loader.loadFromUrl(schemaUrl));
            }
        } else if (typeNames != null) {
            for (String typeName : typeNames) {
                schemas.add(loader.loadFromTypeName(typeName));
            }
        } else {
            throw new IllegalArgumentException("No schema information provided");
        }

        if (schemas.size() > 1) {
            // Need to track input paths
            Path[] inputs = FileInputFormat.getInputPaths(job);
            if (inputs.length != schemas.size()) {
                throw new IllegalArgumentException(String.format(
                        "Number of input paths (%d) does not match number of schemas specified (%d)",
                        inputs.length, schemas.size()));
            }
            this.inputPaths = new String[inputs.length];
            for (int i = 0; i < inputs.length; i++) {
                inputPaths[i] = inputs[i].toString();
            }
        }
    }
}

From source file:com.cloudera.science.avro.streaming.AvroAsJSONOutputFormat.java

License:Open Source License

@Override
public RecordWriter<Text, Text> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {
    if (schema == null) {
        SchemaLoader loader = new SchemaLoader(job);
        this.schema = loader.load(job.get(SCHEMA_LITERAL), job.get(SCHEMA_URL), job.get(SCHEMA_TYPE_NAME));
        this.converter = new JsonConverter(schema);
        this.readKey = job.getBoolean(READ_KEY, true);
    }//from  w  w  w .  ja  v a 2  s .  co  m

    DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(
            new GenericDatumWriter<GenericRecord>(schema));
    if (getCompressOutput(job)) {
        int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
        String codecName = job.get(AvroJob.CONF_OUTPUT_CODEC,
                org.apache.avro.file.DataFileConstants.DEFLATE_CODEC);
        CodecFactory codec = codecName.equals(DataFileConstants.DEFLATE_CODEC)
                ? CodecFactory.deflateCodec(level)
                : CodecFactory.fromString(codecName);
        writer.setCodec(codec);
    }
    writer.setSyncInterval(
            job.getInt(AvroOutputFormat.SYNC_INTERVAL_KEY, DataFileConstants.DEFAULT_SYNC_INTERVAL));

    Path path = FileOutputFormat.getTaskOutputPath(job, name + AvroOutputFormat.EXT);
    writer.create(schema, path.getFileSystem(job).create(path));

    return new AvroAsJSONRecordWriter(writer, converter, readKey);
}