List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name)
name
property, null
if no such property exists. From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java
License:Apache License
/** * Add a {@link Path} to the list of inputs for the map-reduce job. * //from w w w. j av a2 s.c o m * @param conf The configuration of the job * @param path {@link Path} to be added to the list of inputs for * the map-reduce job. */ public static void addInputPath(JobConf conf, Path path) { path = new Path(conf.getWorkingDirectory(), path); String dirStr = StringUtils.escapeString(path.toString()); String dirs = conf.get("mapred.input.dir"); conf.set("mapred.input.dir", dirs == null ? dirStr : dirs + StringUtils.COMMA_STR + dirStr); }
From source file:com.blackberry.logdriver.mapred.avro.AvroBlockInputFormat.java
License:Apache License
@SuppressWarnings("deprecation") @Override/*from w w w . jav a2 s.c o m*/ public RecordReader<AvroFileHeader, BytesWritable> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { // Ensure we have sensible defaults for how we build blocks. if (job.get("mapreduce.job.max.split.locations") == null) { job.setLong("mapreduce.job.max.split.locations", MAX_SPLIT_LOCATIONS); } if (job.get("mapred.max.split.size") == null) { // Try to set the split size to the default block size. In case of // failure, we'll use this 128MB default. long blockSize = 128 * 1024 * 1024; // 128MB try { blockSize = FileSystem.get(job).getDefaultBlockSize(); } catch (IOException e) { LOG.error("Error getting filesystem to get get default block size (this does not bode well)."); } job.setLong("mapred.max.split.size", blockSize); } return new AvroBlockRecordReader(split, job); }
From source file:com.blackberry.logdriver.mapred.boom.BoomFilterMapper.java
License:Apache License
@Override public void configure(JobConf job) { // We'll need to know the component name to know which rules to use String componentName = job.get("logdriver.component.name"); // Load in the yaml file that defines the rules. String confFileName = job.get("logdriver.filter.file"); try {/*www. j a va 2 s .c o m*/ filters = loadFilters(componentName, new FileInputStream(confFileName)); } catch (FileNotFoundException e) { LOG.error("Error loading config files. No filters will be used.", e); } LOG.info("Initial filter set: {}", filters); }
From source file:com.blackberry.logdriver.mapred.boom.BoomInputFormat.java
License:Apache License
@SuppressWarnings("deprecation") @Override// w w w .java 2s . co m public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { // Ensure we have sensible defaults for how we build blocks. if (job.get("mapreduce.job.max.split.locations") == null) { job.setLong("mapreduce.job.max.split.locations", MAX_SPLIT_LOCATIONS); } if (job.get("mapred.max.split.size") == null) { // Try to set the split size to the default block size. In case of // failure, we'll use this 128MB default. long blockSize = 128 * 1024 * 1024; // 128MB try { blockSize = FileSystem.get(job).getDefaultBlockSize(); } catch (IOException e) { LOG.error("Error getting filesystem to get get default block size (this does not bode well)."); } job.setLong("mapred.max.split.size", blockSize); } return super.getSplits(job, numSplits); }
From source file:com.blackberry.logdriver.mapred.boom.ReBoomRecordWriter.java
License:Apache License
public ReBoomRecordWriter(ReBoomOutputFormat reBoomOutputFormat, JobConf job) throws IOException { String taskid = job.get("mapred.task.id"); Path path = BinaryOutputFormat.getTaskOutputPath(job, taskid + ".bm"); FSDataOutputStream out = path.getFileSystem(job).create(path); writer = new ReBoomWriter(out); }
From source file:com.blm.orc.OrcOutputFormat.java
License:Apache License
/** * Helper method to get a parameter first from props if present, falling back to JobConf if not. * Returns null if key is present in neither. *//*from w w w.j a va2 s . co m*/ private String getSettingFromPropsFallingBackToConf(String key, Properties props, JobConf conf) { if ((props != null) && props.containsKey(key)) { return props.getProperty(key); } else if (conf != null) { // If conf is not null, and the key is not present, Configuration.get() will // return null for us. So, we don't have to check if it contains it. return conf.get(key); } else { return null; } }
From source file:com.chriscx.mapred.Map.java
public void configure(JobConf job) { caseSensitive = job.getBoolean("wordcount.case.sensitive", true); inputFile = job.get("map.input.file"); if (job.getBoolean("wordcount.skip.patterns", false)) { Path[] patternsFiles = new Path[0]; try {// w ww. j a v a2 s. co m patternsFiles = DistributedCache.getLocalCacheFiles(job); } catch (IOException ioe) { System.err.println( "Caught exception while getting cached files: " + StringUtils.stringifyException(ioe)); } for (Path patternsFile : patternsFiles) { parseSkipFile(patternsFile); } } }
From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java
License:Apache License
public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, IColumn>> getRecordReader( org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { TaskAttemptContext tac = new TaskAttemptContext(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) { @Override//w w w . ja v a2 s. co m public void progress() { reporter.progress(); } }; ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader( jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT)); recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac); return recordReader; }
From source file:com.cloudera.science.avro.streaming.AvroAsJSONInputFormat.java
License:Open Source License
private void loadSchemas(JobConf job) throws IOException { this.schemas = Lists.newArrayList(); SchemaLoader loader = new SchemaLoader(job); String schemaLiteral = job.get(SCHEMA_LITERAL); if (schemaLiteral != null) { schemas.add(loader.loadLiteral(schemaLiteral)); return;// ww w. j av a 2 s. c om } else { String[] schemaUrls = job.getStrings(SCHEMA_URL); String[] typeNames = job.getStrings(SCHEMA_TYPE_NAME); if (schemaUrls != null) { for (String schemaUrl : schemaUrls) { schemas.add(loader.loadFromUrl(schemaUrl)); } } else if (typeNames != null) { for (String typeName : typeNames) { schemas.add(loader.loadFromTypeName(typeName)); } } else { throw new IllegalArgumentException("No schema information provided"); } if (schemas.size() > 1) { // Need to track input paths Path[] inputs = FileInputFormat.getInputPaths(job); if (inputs.length != schemas.size()) { throw new IllegalArgumentException(String.format( "Number of input paths (%d) does not match number of schemas specified (%d)", inputs.length, schemas.size())); } this.inputPaths = new String[inputs.length]; for (int i = 0; i < inputs.length; i++) { inputPaths[i] = inputs[i].toString(); } } } }
From source file:com.cloudera.science.avro.streaming.AvroAsJSONOutputFormat.java
License:Open Source License
@Override public RecordWriter<Text, Text> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { if (schema == null) { SchemaLoader loader = new SchemaLoader(job); this.schema = loader.load(job.get(SCHEMA_LITERAL), job.get(SCHEMA_URL), job.get(SCHEMA_TYPE_NAME)); this.converter = new JsonConverter(schema); this.readKey = job.getBoolean(READ_KEY, true); }//from w w w . ja v a 2 s . co m DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>( new GenericDatumWriter<GenericRecord>(schema)); if (getCompressOutput(job)) { int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, AvroOutputFormat.DEFAULT_DEFLATE_LEVEL); String codecName = job.get(AvroJob.CONF_OUTPUT_CODEC, org.apache.avro.file.DataFileConstants.DEFLATE_CODEC); CodecFactory codec = codecName.equals(DataFileConstants.DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(codec); } writer.setSyncInterval( job.getInt(AvroOutputFormat.SYNC_INTERVAL_KEY, DataFileConstants.DEFAULT_SYNC_INTERVAL)); Path path = FileOutputFormat.getTaskOutputPath(job, name + AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new AvroAsJSONRecordWriter(writer, converter, readKey); }