Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.datasalt.pangool.tuplemr.avro.AvroOutputFormat.java

License:Apache License

static <T> void configureDataFileWriter(DataFileWriter<T> writer, TaskAttemptContext job, String codecName,
        int deflateLevel) throws UnsupportedEncodingException {
    Configuration conf = job.getConfiguration();
    if (FileOutputFormat.getCompressOutput(job)) {
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(deflateLevel)
                : CodecFactory.fromString(codecName);
        writer.setCodec(factory);/*from  w  w w . j  a  v a2  s .  c om*/
    }

    writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL));

    // copy metadata from job
    for (Map.Entry<String, String> e : conf) {
        if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue());
        if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                    URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1"));
    }
}

From source file:com.datasalt.pangool.tuplemr.avro.AvroOutputFormat.java

License:Apache License

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {

    final DataFileWriter<T> writer = new DataFileWriter<T>(new ReflectDatumWriter<T>());

    configureDataFileWriter(writer, job, codecName, deflateLevel);
    Path path = getDefaultWorkFile(job, EXT);
    writer.create(getSchema(), path.getFileSystem(job.getConfiguration()).create(path));

    return new RecordWriter<AvroWrapper<T>, NullWritable>() {
        @Override/*from w ww  .jav a2s .  co  m*/
        public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            writer.append(wrapper.datum());
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException {
            writer.close();
        }
    };
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.DelegatingRecordReader.java

License:Apache License

/**
 * Constructs the DelegatingRecordReader.
 * //from  w  ww  . j  a  v a  2s  .  c om
 * @param split
 *          TaggegInputSplit object
 * @param context
 *          TaskAttemptContext object
 * 
 * @throws IOException
 * @throws InterruptedException
 */
@SuppressWarnings("unchecked")
public DelegatingRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Find the InputFormat and then the RecordReader from the
    // TaggedInputSplit.
    TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split;
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) InstancesDistributor.loadInstance(
            context.getConfiguration(), InputFormat.class, taggedInputSplit.getInputFormatFile(), true);
    PangoolMultipleInputs.setSpecificInputContext(context.getConfiguration(),
            taggedInputSplit.getInputFormatFile());
    originalRecordReader = inputFormat.createRecordReader(taggedInputSplit.getInputSplit(), context);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.DelegatingRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split;
    PangoolMultipleInputs.setSpecificInputContext(context.getConfiguration(),
            taggedInputSplit.getInputFormatFile());
    originalRecordReader.initialize(((TaggedInputSplit) split).getInputSplit(), context);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.TupleFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    org.apache.hadoop.mapreduce.lib.input.FileSplit fileSplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) split;
    conf = context.getConfiguration();
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    this.in = new TupleFile.Reader(fs, conf, path);
    this.end = fileSplit.getStart() + fileSplit.getLength();

    if (fileSplit.getStart() > in.getPosition()) {
        in.sync(fileSplit.getStart());//from   ww w  . j a  v  a 2  s. c  o m
    }

    this.start = in.getPosition();
    more = start < end;

    tuple = new Tuple(in.getSchema());
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

public synchronized RecordWriter getRecordWriter(String baseFileName) throws IOException, InterruptedException {

    // Look for record-writer in the cache
    OutputContext context = outputContexts.get(baseFileName);

    // If not in cache, create a new one
    if (context == null) {

        context = new OutputContext();

        OutputFormat mainOutputFormat;/* w w w. jav  a 2  s . c om*/

        try {
            mainOutputFormat = ((OutputFormat) ReflectionUtils.newInstance(this.context.getOutputFormatClass(),
                    this.context.getConfiguration()));
        } catch (ClassNotFoundException e1) {
            throw new RuntimeException(e1);
        }

        ProxyOutputCommitter baseOutputCommitter = ((ProxyOutputCommitter) mainOutputFormat
                .getOutputCommitter(this.context));

        // The trick is to create a new Job for each output
        Job job = new Job(this.context.getConfiguration());
        job.setOutputKeyClass(getNamedOutputKeyClass(this.context, baseFileName));
        job.setOutputValueClass(getNamedOutputValueClass(this.context, baseFileName));
        // Check possible specific context for the output
        setSpecificNamedOutputContext(this.context.getConfiguration(), job, baseFileName);
        TaskAttemptContext taskContext;
        try {
            taskContext = TaskAttemptContextFactory.get(job.getConfiguration(),
                    this.context.getTaskAttemptID());
        } catch (Exception e) {
            throw new IOException(e);
        }

        // First we change the output dir for the new OutputFormat that we will
        // create
        // We put it inside the main output work path -> in case the Job fails,
        // everything will be discarded
        taskContext.getConfiguration().set("mapred.output.dir",
                baseOutputCommitter.getBaseDir() + "/" + baseFileName);
        // This is for Hadoop 2.0 :
        taskContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir",
                baseOutputCommitter.getBaseDir() + "/" + baseFileName);
        context.taskAttemptContext = taskContext;

        // Load the OutputFormat instance
        OutputFormat outputFormat = InstancesDistributor.loadInstance(
                context.taskAttemptContext.getConfiguration(), OutputFormat.class,
                getNamedOutputFormatInstanceFile(this.context, baseFileName), true);
        // We have to create a JobContext for meeting the contract of the
        // OutputFormat
        JobContext jobContext;
        try {
            jobContext = JobContextFactory.get(taskContext.getConfiguration(), taskContext.getJobID());
        } catch (Exception e) {
            throw new IOException(e);
        }

        context.jobContext = jobContext;
        // The contract of the OutputFormat is to check the output specs
        outputFormat.checkOutputSpecs(jobContext);
        // We get the output committer so we can call it later
        context.outputCommitter = outputFormat.getOutputCommitter(taskContext);
        // Save the RecordWriter to cache it
        context.recordWriter = outputFormat.getRecordWriter(taskContext);

        // if counters are enabled, wrap the writer with context
        // to increment counters
        if (countersEnabled) {
            context.recordWriter = new RecordWriterWithCounter(context.recordWriter, baseFileName,
                    this.context);
        }

        outputContexts.put(baseFileName, context);
    }
    return context.recordWriter;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.ProxyOutputFormat.java

License:Apache License

@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException {
    createOutputFormatIfNeeded(context);

    String outDir = context.getConfiguration().get("mapred.output.dir");
    originalDir = outDir;/* ww w.ja  v  a  2s.  c  o m*/
    FileOutputCommitter committer = (FileOutputCommitter) super.getOutputCommitter(context);
    baseDir = committer.getWorkPath() + "";
    Configuration conf = new Configuration(context.getConfiguration());
    TaskAttemptContext reContext;
    try {
        reContext = TaskAttemptContextFactory.get(conf, context.getTaskAttemptID());
    } catch (Exception e) {
        throw new IOException(e);
    }

    reContext.getConfiguration().set("mapred.output.dir", baseDir);
    // This is for Hadoop 2.0 :
    reContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir", baseDir);

    try {
        return new ProxyOutputCommitter(new Path(originalDir), context,
                outputFormat.getOutputCommitter(reContext));
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TupleOutputFormat.java

License:Apache License

private CompressionCodec getCodec(TaskAttemptContext context) {
    if (getCompressOutput(context)) {
        // find the right codec
        Class<?> codecClass = SequenceFileOutputFormat.getOutputCompressorClass(context, DefaultCodec.class);
        return (CompressionCodec) ReflectionUtils.newInstance(codecClass, context.getConfiguration());
    }/*from   w w  w .  j av  a 2  s  . c o  m*/
    return null;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TupleOutputFormat.java

License:Apache License

public RecordWriter<ITuple, NullWritable> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {

    final Configuration conf = context.getConfiguration();

    final CompressionCodec codec = getCodec(context);
    final SequenceFile.CompressionType compressionType = getCompressOutput(context)
            ? SequenceFileOutputFormat.getOutputCompressionType(context)
            : SequenceFile.CompressionType.NONE;
    // get the path of the temporary output file
    final Path file = getDefaultWorkFile(context, "");
    final FileSystem fs = file.getFileSystem(conf);

    return new RecordWriter<ITuple, NullWritable>() {

        TupleFile.Writer out;//from   w w w.  j  a  va  2  s . c o  m

        public void write(ITuple key, NullWritable value) throws IOException {
            if (out == null) {
                if (outputSchema == null) {
                    outputSchema = key.getSchema();
                }
                out = new TupleFile.Writer(fs, conf, file, outputSchema, compressionType, codec, context);
            }
            out.append(key);
        }

        public void close(TaskAttemptContext context) throws IOException {
            out.close();
        }
    };
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TupleTextOutputFormat.java

License:Apache License

@Override
public RecordWriter<ITuple, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    Path file = getDefaultWorkFile(context, "");
    BufferedWriter writer = new BufferedWriter(
            new OutputStreamWriter(file.getFileSystem(context.getConfiguration()).create(file)));
    CSVWriter csvWriter = new CSVWriter(writer, separatorCharacter, quoteCharacter, escapeCharacter);
    if (addHeader) {
        String[] header = new String[schema.getFields().size()];
        for (int i = 0; i < schema.getFields().size(); i++) {
            header[i] = schema.getFields().get(i).getName();
        }/*from w  w w. j  a v  a 2s .  co  m*/
        csvWriter.writeNext(header);
    }
    return new TupleTextRecordWriter(schema, csvWriter);
}