Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.cloudera.crunch.type.avro.AvroOutputFormat.java

License:Apache License

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    Schema schema = AvroJob.getOutputSchema(context.getConfiguration());

    final DataFileWriter<T> WRITER = new DataFileWriter<T>(new GenericDatumWriter<T>());

    Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
    WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path));

    return new RecordWriter<AvroWrapper<T>, NullWritable>() {
        @Override//  ww w . j  a  va 2 s  .  co  m
        public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            WRITER.append(wrapper.datum());
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            WRITER.close();
        }
    };
}

From source file:com.cloudera.crunch.type.avro.AvroRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration conf = context.getConfiguration();
    SeekableInput in = new FsInput(split.getPath(), conf);
    DatumReader<T> datumReader = new GenericDatumReader<T>();
    this.reader = DataFileReader.openReader(in, datumReader);
    reader.sync(split.getStart()); // sync to start
    this.start = reader.tell();
    this.end = split.getStart() + split.getLength();
}

From source file:com.cloudera.dataflow.spark.ShardNameTemplateHelper.java

License:Open Source License

private static String getOutputFile(TaskAttemptContext context) {
    TaskID taskId = context.getTaskAttemptID().getTaskID();
    int partition = taskId.getId();

    String filePrefix = context.getConfiguration().get(OUTPUT_FILE_PREFIX);
    String fileTemplate = context.getConfiguration().get(OUTPUT_FILE_TEMPLATE);
    String fileSuffix = context.getConfiguration().get(OUTPUT_FILE_SUFFIX);
    return filePrefix + replaceShardNumber(fileTemplate, partition) + fileSuffix;
}

From source file:com.cloudera.dataflow.spark.TemplatedAvroKeyOutputFormat.java

License:Open Source License

@Override
protected OutputStream getAvroFileOutputStream(TaskAttemptContext context) throws IOException {
    Path path = ShardNameTemplateHelper.getDefaultWorkFile(this, context);
    return path.getFileSystem(context.getConfiguration()).create(path);
}

From source file:com.cloudera.fts.spark.format.RawFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    FileSplit split = (FileSplit) inputSplit;
    Path path = split.getPath();/*from ww w .  j  a v  a  2s.  c  o m*/
    FileSystem fs = path.getFileSystem(conf);
    fileIn = fs.open(path);
    key = new Text(path.toString());
    finished = false;
}

From source file:com.cloudera.integration.oracle.goldengate.ldv.mapreduce.lib.input.LengthDelimitedRecordReader.java

@Override
public void initialize(InputSplit is, TaskAttemptContext tac) throws IOException, InterruptedException {
    this.fileSplit = (FileSplit) is;

    sizeRecordLength = tac.getConfiguration().getInt(Constants.RECORD_PREFIX_LENGTH, -1);
    Preconditions.checkArgument(sizeRecordLength > 0, Constants.RECORD_PREFIX_LENGTH + " must be configured.");
    Preconditions.checkArgument(sizeRecordLength == 2 || sizeRecordLength == 4 || sizeRecordLength == 8,
            Constants.RECORD_PREFIX_LENGTH + " must be either 2, 4, or 8.");
    recordLengthBuffer = new byte[sizeRecordLength];

    sizeFieldLength = tac.getConfiguration().getInt(Constants.FIELD_PREFIX_LENGTH, -1);
    Preconditions.checkArgument(sizeFieldLength > 0, Constants.FIELD_PREFIX_LENGTH + " must be configured.");
    Preconditions.checkArgument(sizeFieldLength == 2 || sizeFieldLength == 4 || sizeFieldLength == 8,
            Constants.FIELD_PREFIX_LENGTH + " must be either 2, 4, or 8.");
    fieldLengthBuffer = new byte[sizeFieldLength];

    FileSystem fileSystem = this.fileSplit.getPath().getFileSystem(tac.getConfiguration());
    int inputBufferSize = tac.getConfiguration().getInt(Constants.INPUT_BUFFER_SIZE, 5 * 1024 * 1024);
    this.inputStream = fileSystem.open(this.fileSplit.getPath(), inputBufferSize);
}

From source file:com.cloudera.recordservice.avro.mapreduce.AvroKeyInputFormat.java

License:Apache License

@Override
public RecordReader<AvroKey<T>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Schema readerSchema = AvroJob.getInputKeySchema(context.getConfiguration());
    if (null == readerSchema) {
        LOG.warn("Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.");
        LOG.info("Using a reader schema equal to the writer schema.");
        LOG.info("Using GenericRecords instead of SpecificRecords.");
    }//  w  w  w  . j  a v  a2  s .  c  o m
    return new AvroKeyRecordReader<T>(readerSchema);
}

From source file:com.cloudera.recordservice.avro.mapreduce.AvroKeyValueInputFormat.java

License:Apache License

@Override
public RecordReader<AvroKey<K>, AvroValue<V>> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Schema keyReaderSchema = AvroJob.getInputKeySchema(context.getConfiguration());
    if (null == keyReaderSchema) {
        LOG.warn("Key reader schema was not set. " + "Use AvroJob.setInputKeySchema() if desired.");
        LOG.info("Using a key reader schema equal to the writer schema.");
    }//from w w  w  .  j a  va 2 s  .  c o m
    Schema valueReaderSchema = AvroJob.getInputValueSchema(context.getConfiguration());
    if (null == valueReaderSchema) {
        LOG.warn("Value reader schema was not set. " + "Use AvroJob.setInputValueSchema() if desired.");
        LOG.info("Using a value reader schema equal to the writer schema.");
    }
    return new AvroKeyValueRecordReader<K, V>(keyReaderSchema, valueReaderSchema);
}

From source file:com.cloudera.recordservice.examples.terasort.TeraOutputFormat.java

License:Apache License

@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException {
    Path file = getDefaultWorkFile(job, "");
    FileSystem fs = file.getFileSystem(job.getConfiguration());
    FSDataOutputStream fileOut = fs.create(file);
    return new TeraRecordWriter(fileOut, job);
}

From source file:com.cloudera.sa.ExcelRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) split;
    Configuration conf = context.getConfiguration();
    Path file = fileSplit.getPath();
    FileSystem fs = file.getFileSystem(conf);
    this.in = fs.open(file);
    XSSFWorkbook workbook = new XSSFWorkbook(this.in);
    XSSFSheet sheet = workbook.getSheetAt(0);
    this.totalRows = sheet.getPhysicalNumberOfRows();
    this.processedRows = 0;
    this.rowIterator = sheet.rowIterator();
}