List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:com.datasalt.pangool.tuplemr.avro.AvroOutputFormat.java
License:Apache License
static <T> void configureDataFileWriter(DataFileWriter<T> writer, TaskAttemptContext job, String codecName, int deflateLevel) throws UnsupportedEncodingException { Configuration conf = job.getConfiguration(); if (FileOutputFormat.getCompressOutput(job)) { CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(deflateLevel) : CodecFactory.fromString(codecName); writer.setCodec(factory);/*from w w w . j a v a2 s . c om*/ } writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String, String> e : conf) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1")); } }
From source file:com.datasalt.pangool.tuplemr.avro.AvroOutputFormat.java
License:Apache License
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { final DataFileWriter<T> writer = new DataFileWriter<T>(new ReflectDatumWriter<T>()); configureDataFileWriter(writer, job, codecName, deflateLevel); Path path = getDefaultWorkFile(job, EXT); writer.create(getSchema(), path.getFileSystem(job.getConfiguration()).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { @Override/*from w ww .jav a2s . co m*/ public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { writer.append(wrapper.datum()); } @Override public void close(TaskAttemptContext context) throws IOException { writer.close(); } }; }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.DelegatingRecordReader.java
License:Apache License
/** * Constructs the DelegatingRecordReader. * //from w ww . j a v a 2s . c om * @param split * TaggegInputSplit object * @param context * TaskAttemptContext object * * @throws IOException * @throws InterruptedException */ @SuppressWarnings("unchecked") public DelegatingRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { // Find the InputFormat and then the RecordReader from the // TaggedInputSplit. TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split; InputFormat<K, V> inputFormat = (InputFormat<K, V>) InstancesDistributor.loadInstance( context.getConfiguration(), InputFormat.class, taggedInputSplit.getInputFormatFile(), true); PangoolMultipleInputs.setSpecificInputContext(context.getConfiguration(), taggedInputSplit.getInputFormatFile()); originalRecordReader = inputFormat.createRecordReader(taggedInputSplit.getInputSplit(), context); }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.DelegatingRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split; PangoolMultipleInputs.setSpecificInputContext(context.getConfiguration(), taggedInputSplit.getInputFormatFile()); originalRecordReader.initialize(((TaggedInputSplit) split).getInputSplit(), context); }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.TupleFileRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { org.apache.hadoop.mapreduce.lib.input.FileSplit fileSplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) split; conf = context.getConfiguration(); Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(conf); this.in = new TupleFile.Reader(fs, conf, path); this.end = fileSplit.getStart() + fileSplit.getLength(); if (fileSplit.getStart() > in.getPosition()) { in.sync(fileSplit.getStart());//from ww w . j a v a 2 s. c o m } this.start = in.getPosition(); more = start < end; tuple = new Tuple(in.getSchema()); }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java
License:Apache License
public synchronized RecordWriter getRecordWriter(String baseFileName) throws IOException, InterruptedException { // Look for record-writer in the cache OutputContext context = outputContexts.get(baseFileName); // If not in cache, create a new one if (context == null) { context = new OutputContext(); OutputFormat mainOutputFormat;/* w w w. jav a 2 s . c om*/ try { mainOutputFormat = ((OutputFormat) ReflectionUtils.newInstance(this.context.getOutputFormatClass(), this.context.getConfiguration())); } catch (ClassNotFoundException e1) { throw new RuntimeException(e1); } ProxyOutputCommitter baseOutputCommitter = ((ProxyOutputCommitter) mainOutputFormat .getOutputCommitter(this.context)); // The trick is to create a new Job for each output Job job = new Job(this.context.getConfiguration()); job.setOutputKeyClass(getNamedOutputKeyClass(this.context, baseFileName)); job.setOutputValueClass(getNamedOutputValueClass(this.context, baseFileName)); // Check possible specific context for the output setSpecificNamedOutputContext(this.context.getConfiguration(), job, baseFileName); TaskAttemptContext taskContext; try { taskContext = TaskAttemptContextFactory.get(job.getConfiguration(), this.context.getTaskAttemptID()); } catch (Exception e) { throw new IOException(e); } // First we change the output dir for the new OutputFormat that we will // create // We put it inside the main output work path -> in case the Job fails, // everything will be discarded taskContext.getConfiguration().set("mapred.output.dir", baseOutputCommitter.getBaseDir() + "/" + baseFileName); // This is for Hadoop 2.0 : taskContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir", baseOutputCommitter.getBaseDir() + "/" + baseFileName); context.taskAttemptContext = taskContext; // Load the OutputFormat instance OutputFormat outputFormat = InstancesDistributor.loadInstance( context.taskAttemptContext.getConfiguration(), OutputFormat.class, getNamedOutputFormatInstanceFile(this.context, baseFileName), true); // We have to create a JobContext for meeting the contract of the // OutputFormat JobContext jobContext; try { jobContext = JobContextFactory.get(taskContext.getConfiguration(), taskContext.getJobID()); } catch (Exception e) { throw new IOException(e); } context.jobContext = jobContext; // The contract of the OutputFormat is to check the output specs outputFormat.checkOutputSpecs(jobContext); // We get the output committer so we can call it later context.outputCommitter = outputFormat.getOutputCommitter(taskContext); // Save the RecordWriter to cache it context.recordWriter = outputFormat.getRecordWriter(taskContext); // if counters are enabled, wrap the writer with context // to increment counters if (countersEnabled) { context.recordWriter = new RecordWriterWithCounter(context.recordWriter, baseFileName, this.context); } outputContexts.put(baseFileName, context); } return context.recordWriter; }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.ProxyOutputFormat.java
License:Apache License
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException { createOutputFormatIfNeeded(context); String outDir = context.getConfiguration().get("mapred.output.dir"); originalDir = outDir;/* ww w.ja v a 2s. c o m*/ FileOutputCommitter committer = (FileOutputCommitter) super.getOutputCommitter(context); baseDir = committer.getWorkPath() + ""; Configuration conf = new Configuration(context.getConfiguration()); TaskAttemptContext reContext; try { reContext = TaskAttemptContextFactory.get(conf, context.getTaskAttemptID()); } catch (Exception e) { throw new IOException(e); } reContext.getConfiguration().set("mapred.output.dir", baseDir); // This is for Hadoop 2.0 : reContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir", baseDir); try { return new ProxyOutputCommitter(new Path(originalDir), context, outputFormat.getOutputCommitter(reContext)); } catch (InterruptedException e) { throw new RuntimeException(e); } }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TupleOutputFormat.java
License:Apache License
private CompressionCodec getCodec(TaskAttemptContext context) { if (getCompressOutput(context)) { // find the right codec Class<?> codecClass = SequenceFileOutputFormat.getOutputCompressorClass(context, DefaultCodec.class); return (CompressionCodec) ReflectionUtils.newInstance(codecClass, context.getConfiguration()); }/*from w w w . j av a 2 s . c o m*/ return null; }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TupleOutputFormat.java
License:Apache License
public RecordWriter<ITuple, NullWritable> getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { final Configuration conf = context.getConfiguration(); final CompressionCodec codec = getCodec(context); final SequenceFile.CompressionType compressionType = getCompressOutput(context) ? SequenceFileOutputFormat.getOutputCompressionType(context) : SequenceFile.CompressionType.NONE; // get the path of the temporary output file final Path file = getDefaultWorkFile(context, ""); final FileSystem fs = file.getFileSystem(conf); return new RecordWriter<ITuple, NullWritable>() { TupleFile.Writer out;//from w w w. j a va 2 s . c o m public void write(ITuple key, NullWritable value) throws IOException { if (out == null) { if (outputSchema == null) { outputSchema = key.getSchema(); } out = new TupleFile.Writer(fs, conf, file, outputSchema, compressionType, codec, context); } out.append(key); } public void close(TaskAttemptContext context) throws IOException { out.close(); } }; }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TupleTextOutputFormat.java
License:Apache License
@Override public RecordWriter<ITuple, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Path file = getDefaultWorkFile(context, ""); BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(file.getFileSystem(context.getConfiguration()).create(file))); CSVWriter csvWriter = new CSVWriter(writer, separatorCharacter, quoteCharacter, escapeCharacter); if (addHeader) { String[] header = new String[schema.getFields().size()]; for (int i = 0; i < schema.getFields().size(); i++) { header[i] = schema.getFields().get(i).getName(); }/*from w w w. j a v a 2s . co m*/ csvWriter.writeNext(header); } return new TupleTextRecordWriter(schema, csvWriter); }