Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.datasalt.pangool.tuplemr.avro.AvroOutputFormat.java

License:Apache License

static <T> void configureDataFileWriter(DataFileWriter<T> writer, TaskAttemptContext job, String codecName,
        int deflateLevel) throws UnsupportedEncodingException {
    Configuration conf = job.getConfiguration();
    if (FileOutputFormat.getCompressOutput(job)) {
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(deflateLevel)
                : CodecFactory.fromString(codecName);
        writer.setCodec(factory);/*from  w  w w . j  a  v a2  s .  c om*/
    }

    writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL));

    // copy metadata from job
    for (Map.Entry<String, String> e : conf) {
        if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue());
        if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                    URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1"));
    }
}

From source file:com.datasalt.pangool.tuplemr.avro.AvroOutputFormat.java

License:Apache License

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {

    final DataFileWriter<T> writer = new DataFileWriter<T>(new ReflectDatumWriter<T>());

    configureDataFileWriter(writer, job, codecName, deflateLevel);
    Path path = getDefaultWorkFile(job, EXT);
    writer.create(getSchema(), path.getFileSystem(job.getConfiguration()).create(path));

    return new RecordWriter<AvroWrapper<T>, NullWritable>() {
        @Override/*from w ww  .jav a2s .  co  m*/
        public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            writer.append(wrapper.datum());
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException {
            writer.close();
        }
    };
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.DelegatingRecordReader.java

License:Apache License

/**
 * Constructs the DelegatingRecordReader.
 * //from  w  ww  . j  a  v a  2s  .  c om
 * @param split
 *          TaggegInputSplit object
 * @param context
 *          TaskAttemptContext object
 * 
 * @throws IOException
 * @throws InterruptedException
 */
@SuppressWarnings("unchecked")
public DelegatingRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Find the InputFormat and then the RecordReader from the
    // TaggedInputSplit.
    TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split;
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) InstancesDistributor.loadInstance(
            context.getConfiguration(), InputFormat.class, taggedInputSplit.getInputFormatFile(), true);
    PangoolMultipleInputs.setSpecificInputContext(context.getConfiguration(),
            taggedInputSplit.getInputFormatFile());
    originalRecordReader = inputFormat.createRecordReader(taggedInputSplit.getInputSplit(), context);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.DelegatingRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split;
    PangoolMultipleInputs.setSpecificInputContext(context.getConfiguration(),
            taggedInputSplit.getInputFormatFile());
    originalRecordReader.initialize(((TaggedInputSplit) split).getInputSplit(), context);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.TupleFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    org.apache.hadoop.mapreduce.lib.input.FileSplit fileSplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) split;
    conf = context.getConfiguration();
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    this.in = new TupleFile.Reader(fs, conf, path);
    this.end = fileSplit.getStart() + fileSplit.getLength();

    if (fileSplit.getStart() > in.getPosition()) {
        in.sync(fileSplit.getStart());//from   ww w  . j a  v  a 2  s. c  o m
    }

    this.start = in.getPosition();
    more = start < end;

    tuple = new Tuple(in.getSchema());
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

public synchronized RecordWriter getRecordWriter(String baseFileName) throws IOException, InterruptedException {

    // Look for record-writer in the cache
    OutputContext context = outputContexts.get(baseFileName);

    // If not in cache, create a new one
    if (context == null) {

        context = new OutputContext();

        OutputFormat mainOutputFormat;/* w w w. jav  a 2  s . c om*/

        try {
            mainOutputFormat = ((OutputFormat) ReflectionUtils.newInstance(this.context.getOutputFormatClass(),
                    this.context.getConfiguration()));
        } catch (ClassNotFoundException e1) {
            throw new RuntimeException(e1);
        }

        ProxyOutputCommitter baseOutputCommitter = ((ProxyOutputCommitter) mainOutputFormat
                .getOutputCommitter(this.context));

        // The trick is to create a new Job for each output
        Job job = new Job(this.context.getConfiguration());
        job.setOutputKeyClass(getNamedOutputKeyClass(this.context, baseFileName));
        job.setOutputValueClass(getNamedOutputValueClass(this.context, baseFileName));
        // Check possible specific context for the output
        setSpecificNamedOutputContext(this.context.getConfiguration(), job, baseFileName);
        TaskAttemptContext taskContext;
        try {
            taskContext = TaskAttemptContextFactory.get(job.getConfiguration(),
                    this.context.getTaskAttemptID());
        } catch (Exception e) {
            throw new IOException(e);
        }

        // First we change the output dir for the new OutputFormat that we will
        // create
        // We put it inside the main output work path -> in case the Job fails,
        // everything will be discarded
        taskContext.getConfiguration().set("mapred.output.dir",
                baseOutputCommitter.getBaseDir() + "/" + baseFileName);
        // This is for Hadoop 2.0 :
        taskContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir",
                baseOutputCommitter.getBaseDir() + "/" + baseFileName);
        context.taskAttemptContext = taskContext;

        // Load the OutputFormat instance
        OutputFormat outputFormat = InstancesDistributor.loadInstance(
                context.taskAttemptContext.getConfiguration(), OutputFormat.class,
                getNamedOutputFormatInstanceFile(this.context, baseFileName), true);
        // We have to create a JobContext for meeting the contract of the
        // OutputFormat
        JobContext jobContext;
        try {
            jobContext = JobContextFactory.get(taskContext.getConfiguration(), taskContext.getJobID());
        } catch (Exception e) {
            throw new IOException(e);
        }

        context.jobContext = jobContext;
        // The contract of the OutputFormat is to check the output specs
        outputFormat.checkOutputSpecs(jobContext);
        // We get the output committer so we can call it later
        context.outputCommitter = outputFormat.getOutputCommitter(taskContext);
        // Save the RecordWriter to cache it
        context.recordWriter = outputFormat.getRecordWriter(taskContext);

        // if counters are enabled, wrap the writer with context
        // to increment counters
        if (countersEnabled) {
            context.recordWriter = new RecordWriterWithCounter(context.recordWriter, baseFileName,
                    this.context);
        }

        outputContexts.put(baseFileName, context);
    }
    return context.recordWriter;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.ProxyOutputFormat.java

License:Apache License

@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException {
    createOutputFormatIfNeeded(context);

    String outDir = context.getConfiguration().get("mapred.output.dir");
    originalDir = outDir;/* ww w.ja  v  a  2s.  c  o m*/
    FileOutputCommitter committer = (FileOutputCommitter) super.getOutputCommitter(context);
    baseDir = committer.getWorkPath() + "";
    Configuration conf = new Configuration(context.getConfiguration());
    TaskAttemptContext reContext;
    try {
        reContext = TaskAttemptContextFactory.get(conf, context.getTaskAttemptID());
    } catch (Exception e) {
        throw new IOException(e);
    }

    reContext.getConfiguration().set("mapred.output.dir", baseDir);
    // This is for Hadoop 2.0 :
    reContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir", baseDir);

    try {
        return new ProxyOutputCommitter(new Path(originalDir), context,
                outputFormat.getOutputCommitter(reContext));
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TupleOutputFormat.java

License:Apache License

private CompressionCodec getCodec(TaskAttemptContext context) {
    if (getCompressOutput(context)) {
        // find the right codec
        Class<?> codecClass = SequenceFileOutputFormat.getOutputCompressorClass(context, DefaultCodec.class);
        return (CompressionCodec) ReflectionUtils.newInstance(codecClass, context.getConfiguration());
    }/*from   w w  w .  j av  a 2  s  . c o  m*/
    return null;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TupleOutputFormat.java

License:Apache License

public RecordWriter<ITuple, NullWritable> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {

    final Configuration conf = context.getConfiguration();

    final CompressionCodec codec = getCodec(context);
    final SequenceFile.CompressionType compressionType = getCompressOutput(context)
            ? SequenceFileOutputFormat.getOutputCompressionType(context)
            : SequenceFile.CompressionType.NONE;
    // get the path of the temporary output file
    final Path file = getDefaultWorkFile(context, "");
    final FileSystem fs = file.getFileSystem(conf);

    return new RecordWriter<ITuple, NullWritable>() {

        TupleFile.Writer out;//from   w w w.  j  a  va  2  s . c o  m

        public void write(ITuple key, NullWritable value) throws IOException {
            if (out == null) {
                if (outputSchema == null) {
                    outputSchema = key.getSchema();
                }
                out = new TupleFile.Writer(fs, conf, file, outputSchema, compressionType, codec, context);
            }
            out.append(key);
        }

        public void close(TaskAttemptContext context) throws IOException {
            out.close();
        }
    };
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TupleTextOutputFormat.java

License:Apache License

@Override
public RecordWriter<ITuple, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    Path file = getDefaultWorkFile(context, "");
    BufferedWriter writer = new BufferedWriter(
            new OutputStreamWriter(file.getFileSystem(context.getConfiguration()).create(file)));
    CSVWriter csvWriter = new CSVWriter(writer, separatorCharacter, quoteCharacter, escapeCharacter);
    if (addHeader) {
        String[] header = new String[schema.getFields().size()];
        for (int i = 0; i < schema.getFields().size(); i++) {
            header[i] = schema.getFields().get(i).getName();
        }/*from w  w w. j  a v  a 2s .  co  m*/
        csvWriter.writeNext(header);
    }
    return new TupleTextRecordWriter(schema, csvWriter);
}