Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.cloudera.crunch.type.avro.AvroOutputFormat.java

License:Apache License

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    Schema schema = AvroJob.getOutputSchema(context.getConfiguration());

    final DataFileWriter<T> WRITER = new DataFileWriter<T>(new GenericDatumWriter<T>());

    Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
    WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path));

    return new RecordWriter<AvroWrapper<T>, NullWritable>() {
        @Override//  ww w . j  a  va 2 s  .  co  m
        public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            WRITER.append(wrapper.datum());
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            WRITER.close();
        }
    };
}

From source file:com.cloudera.crunch.type.avro.AvroRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration conf = context.getConfiguration();
    SeekableInput in = new FsInput(split.getPath(), conf);
    DatumReader<T> datumReader = new GenericDatumReader<T>();
    this.reader = DataFileReader.openReader(in, datumReader);
    reader.sync(split.getStart()); // sync to start
    this.start = reader.tell();
    this.end = split.getStart() + split.getLength();
}

From source file:com.cloudera.dataflow.spark.ShardNameTemplateHelper.java

License:Open Source License

private static String getOutputFile(TaskAttemptContext context) {
    TaskID taskId = context.getTaskAttemptID().getTaskID();
    int partition = taskId.getId();

    String filePrefix = context.getConfiguration().get(OUTPUT_FILE_PREFIX);
    String fileTemplate = context.getConfiguration().get(OUTPUT_FILE_TEMPLATE);
    String fileSuffix = context.getConfiguration().get(OUTPUT_FILE_SUFFIX);
    return filePrefix + replaceShardNumber(fileTemplate, partition) + fileSuffix;
}

From source file:com.cloudera.dataflow.spark.TemplatedAvroKeyOutputFormat.java

License:Open Source License

@Override
protected OutputStream getAvroFileOutputStream(TaskAttemptContext context) throws IOException {
    Path path = ShardNameTemplateHelper.getDefaultWorkFile(this, context);
    return path.getFileSystem(context.getConfiguration()).create(path);
}

From source file:com.cloudera.fts.spark.format.RawFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    FileSplit split = (FileSplit) inputSplit;
    Path path = split.getPath();/*from ww w .  j  a v  a  2s.  c  o m*/
    FileSystem fs = path.getFileSystem(conf);
    fileIn = fs.open(path);
    key = new Text(path.toString());
    finished = false;
}

From source file:com.cloudera.integration.oracle.goldengate.ldv.mapreduce.lib.input.LengthDelimitedRecordReader.java

@Override
public void initialize(InputSplit is, TaskAttemptContext tac) throws IOException, InterruptedException {
    this.fileSplit = (FileSplit) is;

    sizeRecordLength = tac.getConfiguration().getInt(Constants.RECORD_PREFIX_LENGTH, -1);
    Preconditions.checkArgument(sizeRecordLength > 0, Constants.RECORD_PREFIX_LENGTH + " must be configured.");
    Preconditions.checkArgument(sizeRecordLength == 2 || sizeRecordLength == 4 || sizeRecordLength == 8,
            Constants.RECORD_PREFIX_LENGTH + " must be either 2, 4, or 8.");
    recordLengthBuffer = new byte[sizeRecordLength];

    sizeFieldLength = tac.getConfiguration().getInt(Constants.FIELD_PREFIX_LENGTH, -1);
    Preconditions.checkArgument(sizeFieldLength > 0, Constants.FIELD_PREFIX_LENGTH + " must be configured.");
    Preconditions.checkArgument(sizeFieldLength == 2 || sizeFieldLength == 4 || sizeFieldLength == 8,
            Constants.FIELD_PREFIX_LENGTH + " must be either 2, 4, or 8.");
    fieldLengthBuffer = new byte[sizeFieldLength];

    FileSystem fileSystem = this.fileSplit.getPath().getFileSystem(tac.getConfiguration());
    int inputBufferSize = tac.getConfiguration().getInt(Constants.INPUT_BUFFER_SIZE, 5 * 1024 * 1024);
    this.inputStream = fileSystem.open(this.fileSplit.getPath(), inputBufferSize);
}

From source file:com.cloudera.recordservice.avro.mapreduce.AvroKeyInputFormat.java

License:Apache License

@Override
public RecordReader<AvroKey<T>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Schema readerSchema = AvroJob.getInputKeySchema(context.getConfiguration());
    if (null == readerSchema) {
        LOG.warn("Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.");
        LOG.info("Using a reader schema equal to the writer schema.");
        LOG.info("Using GenericRecords instead of SpecificRecords.");
    }//  w  w  w  . j  a v  a2  s .  c  o m
    return new AvroKeyRecordReader<T>(readerSchema);
}

From source file:com.cloudera.recordservice.avro.mapreduce.AvroKeyValueInputFormat.java

License:Apache License

@Override
public RecordReader<AvroKey<K>, AvroValue<V>> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Schema keyReaderSchema = AvroJob.getInputKeySchema(context.getConfiguration());
    if (null == keyReaderSchema) {
        LOG.warn("Key reader schema was not set. " + "Use AvroJob.setInputKeySchema() if desired.");
        LOG.info("Using a key reader schema equal to the writer schema.");
    }//from w w  w  .  j a  va 2 s  .  c o m
    Schema valueReaderSchema = AvroJob.getInputValueSchema(context.getConfiguration());
    if (null == valueReaderSchema) {
        LOG.warn("Value reader schema was not set. " + "Use AvroJob.setInputValueSchema() if desired.");
        LOG.info("Using a value reader schema equal to the writer schema.");
    }
    return new AvroKeyValueRecordReader<K, V>(keyReaderSchema, valueReaderSchema);
}

From source file:com.cloudera.recordservice.examples.terasort.TeraOutputFormat.java

License:Apache License

@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException {
    Path file = getDefaultWorkFile(job, "");
    FileSystem fs = file.getFileSystem(job.getConfiguration());
    FSDataOutputStream fileOut = fs.create(file);
    return new TeraRecordWriter(fileOut, job);
}

From source file:com.cloudera.sa.ExcelRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) split;
    Configuration conf = context.getConfiguration();
    Path file = fileSplit.getPath();
    FileSystem fs = file.getFileSystem(conf);
    this.in = fs.open(file);
    XSSFWorkbook workbook = new XSSFWorkbook(this.in);
    XSSFSheet sheet = workbook.getSheetAt(0);
    this.totalRows = sheet.getPhysicalNumberOfRows();
    this.processedRows = 0;
    this.rowIterator = sheet.rowIterator();
}