List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:com.cloudera.crunch.type.avro.AvroOutputFormat.java
License:Apache License
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Schema schema = AvroJob.getOutputSchema(context.getConfiguration()); final DataFileWriter<T> WRITER = new DataFileWriter<T>(new GenericDatumWriter<T>()); Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT); WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { @Override// ww w . j a va 2 s . co m public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { WRITER.append(wrapper.datum()); } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { WRITER.close(); } }; }
From source file:com.cloudera.crunch.type.avro.AvroRecordReader.java
License:Apache License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; Configuration conf = context.getConfiguration(); SeekableInput in = new FsInput(split.getPath(), conf); DatumReader<T> datumReader = new GenericDatumReader<T>(); this.reader = DataFileReader.openReader(in, datumReader); reader.sync(split.getStart()); // sync to start this.start = reader.tell(); this.end = split.getStart() + split.getLength(); }
From source file:com.cloudera.dataflow.spark.ShardNameTemplateHelper.java
License:Open Source License
private static String getOutputFile(TaskAttemptContext context) { TaskID taskId = context.getTaskAttemptID().getTaskID(); int partition = taskId.getId(); String filePrefix = context.getConfiguration().get(OUTPUT_FILE_PREFIX); String fileTemplate = context.getConfiguration().get(OUTPUT_FILE_TEMPLATE); String fileSuffix = context.getConfiguration().get(OUTPUT_FILE_SUFFIX); return filePrefix + replaceShardNumber(fileTemplate, partition) + fileSuffix; }
From source file:com.cloudera.dataflow.spark.TemplatedAvroKeyOutputFormat.java
License:Open Source License
@Override protected OutputStream getAvroFileOutputStream(TaskAttemptContext context) throws IOException { Path path = ShardNameTemplateHelper.getDefaultWorkFile(this, context); return path.getFileSystem(context.getConfiguration()).create(path); }
From source file:com.cloudera.fts.spark.format.RawFileRecordReader.java
License:Apache License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); FileSplit split = (FileSplit) inputSplit; Path path = split.getPath();/*from ww w . j a v a 2s. c o m*/ FileSystem fs = path.getFileSystem(conf); fileIn = fs.open(path); key = new Text(path.toString()); finished = false; }
From source file:com.cloudera.integration.oracle.goldengate.ldv.mapreduce.lib.input.LengthDelimitedRecordReader.java
@Override public void initialize(InputSplit is, TaskAttemptContext tac) throws IOException, InterruptedException { this.fileSplit = (FileSplit) is; sizeRecordLength = tac.getConfiguration().getInt(Constants.RECORD_PREFIX_LENGTH, -1); Preconditions.checkArgument(sizeRecordLength > 0, Constants.RECORD_PREFIX_LENGTH + " must be configured."); Preconditions.checkArgument(sizeRecordLength == 2 || sizeRecordLength == 4 || sizeRecordLength == 8, Constants.RECORD_PREFIX_LENGTH + " must be either 2, 4, or 8."); recordLengthBuffer = new byte[sizeRecordLength]; sizeFieldLength = tac.getConfiguration().getInt(Constants.FIELD_PREFIX_LENGTH, -1); Preconditions.checkArgument(sizeFieldLength > 0, Constants.FIELD_PREFIX_LENGTH + " must be configured."); Preconditions.checkArgument(sizeFieldLength == 2 || sizeFieldLength == 4 || sizeFieldLength == 8, Constants.FIELD_PREFIX_LENGTH + " must be either 2, 4, or 8."); fieldLengthBuffer = new byte[sizeFieldLength]; FileSystem fileSystem = this.fileSplit.getPath().getFileSystem(tac.getConfiguration()); int inputBufferSize = tac.getConfiguration().getInt(Constants.INPUT_BUFFER_SIZE, 5 * 1024 * 1024); this.inputStream = fileSystem.open(this.fileSplit.getPath(), inputBufferSize); }
From source file:com.cloudera.recordservice.avro.mapreduce.AvroKeyInputFormat.java
License:Apache License
@Override public RecordReader<AvroKey<T>, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Schema readerSchema = AvroJob.getInputKeySchema(context.getConfiguration()); if (null == readerSchema) { LOG.warn("Reader schema was not set. Use AvroJob.setInputKeySchema() if desired."); LOG.info("Using a reader schema equal to the writer schema."); LOG.info("Using GenericRecords instead of SpecificRecords."); }// w w w . j a v a2 s . c o m return new AvroKeyRecordReader<T>(readerSchema); }
From source file:com.cloudera.recordservice.avro.mapreduce.AvroKeyValueInputFormat.java
License:Apache License
@Override public RecordReader<AvroKey<K>, AvroValue<V>> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Schema keyReaderSchema = AvroJob.getInputKeySchema(context.getConfiguration()); if (null == keyReaderSchema) { LOG.warn("Key reader schema was not set. " + "Use AvroJob.setInputKeySchema() if desired."); LOG.info("Using a key reader schema equal to the writer schema."); }//from w w w . j a va 2 s . c o m Schema valueReaderSchema = AvroJob.getInputValueSchema(context.getConfiguration()); if (null == valueReaderSchema) { LOG.warn("Value reader schema was not set. " + "Use AvroJob.setInputValueSchema() if desired."); LOG.info("Using a value reader schema equal to the writer schema."); } return new AvroKeyValueRecordReader<K, V>(keyReaderSchema, valueReaderSchema); }
From source file:com.cloudera.recordservice.examples.terasort.TeraOutputFormat.java
License:Apache License
@Override public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException { Path file = getDefaultWorkFile(job, ""); FileSystem fs = file.getFileSystem(job.getConfiguration()); FSDataOutputStream fileOut = fs.create(file); return new TeraRecordWriter(fileOut, job); }
From source file:com.cloudera.sa.ExcelRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) split; Configuration conf = context.getConfiguration(); Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); this.in = fs.open(file); XSSFWorkbook workbook = new XSSFWorkbook(this.in); XSSFSheet sheet = workbook.getSheetAt(0); this.totalRows = sheet.getPhysicalNumberOfRows(); this.processedRows = 0; this.rowIterator = sheet.rowIterator(); }