List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.FastqLineRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); start = split.getStart();/*w w w.j av a 2s.c o m*/ end = start + split.getLength(); final Path file = split.getPath(); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file); if (null != codec) { isCompressedInput = true; decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new CompressedSplitFastqLineReader(cIn, job, this.recordDelimiterBytes); start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes); filePosition = fileIn; } } else { fileIn.seek(start); in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.FastqOutputFormat.java
License:LGPL
@Override public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); boolean isCompressed = getCompressOutput(context); CompressionCodec codec = null;/*ww w. j a v a 2 s . c o m*/ String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } // Get the output file path final Path file = getDefaultWorkFile(context, extension); final FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new FastqRecordWriter(context, fileOut); } else { FSDataOutputStream fileOut = fs.create(file, false); return new FastqRecordWriter(context, new DataOutputStream(codec.createOutputStream(fileOut))); } }
From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.SAMOutputFormat.java
License:LGPL
@Override public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); boolean isCompressed = getCompressOutput(context); CompressionCodec codec = null;//from w w w. j av a 2 s. co m String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } // Get the output file path final Path file = getDefaultWorkFile(context, extension); final FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new SAMRecordWriter(context, fileOut); } else { FSDataOutputStream fileOut = fs.create(file, false); return new SAMRecordWriter(context, new DataOutputStream(codec.createOutputStream(fileOut))); } }
From source file:gaffer.accumulo.inputformat.BatchScannerElementInputFormat.java
License:Apache License
@Override public RecordReader<GraphElement, SetOfStatistics> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { log.setLevel(getLogLevel(context));// w ww .j a v a 2 s.com String serialisedPostRollUpTransform = context.getConfiguration().get(POST_ROLL_UP_TRANSFORM); if (serialisedPostRollUpTransform != null) { try { Transform transform = (Transform) WritableToStringConverter .deserialiseFromString(serialisedPostRollUpTransform); return new BatchScannerRecordReader(transform); } catch (IOException e) { throw new IllegalArgumentException( "Unable to deserialise a Transform from the string: " + serialisedPostRollUpTransform); } catch (ClassCastException e) { throw new IllegalArgumentException( "Unable to deserialise a Transform from the string: " + serialisedPostRollUpTransform); } } return new BatchScannerRecordReader(); }
From source file:gaffer.accumulo.inputformat.ElementInputFormat.java
License:Apache License
@Override public RecordReader<GraphElement, SetOfStatistics> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { log.setLevel(getLogLevel(context));//from w w w .jav a 2 s.co m String serialisedPostRollUpTransform = context.getConfiguration().get(POST_ROLL_UP_TRANSFORM); if (serialisedPostRollUpTransform != null) { try { Transform transform = (Transform) WritableToStringConverter .deserialiseFromString(serialisedPostRollUpTransform); return new ElementWithStatisticsRecordReader(transform); } catch (IOException e) { throw new IllegalArgumentException( "Unable to deserialise a Transform from the string: " + serialisedPostRollUpTransform); } catch (ClassCastException e) { throw new IllegalArgumentException( "Unable to deserialise a Transform from the string: " + serialisedPostRollUpTransform); } } return new ElementWithStatisticsRecordReader(); }
From source file:gaffer.accumulostore.inputformat.ElementInputFormat.java
License:Apache License
@Override public RecordReader<Element, NullWritable> createRecordReader(final InputSplit split, final TaskAttemptContext context) throws IOException, InterruptedException { log.setLevel(getLogLevel(context));//from ww w.j a v a 2s. c o m final Configuration conf = context.getConfiguration(); final String keyPackageClass = conf.get(KEY_PACKAGE); final Schema schema = Schema.fromJson(conf.get(SCHEMA).getBytes(CommonConstants.UTF_8)); final View view = View.fromJson(conf.get(VIEW).getBytes(CommonConstants.UTF_8)); try { return new ElementWithPropertiesRecordReader(keyPackageClass, schema, view); } catch (final StoreException | SchemaException | SerialisationException e) { throw new IOException("Exception creating RecordReader", e); } }
From source file:gobblin.compaction.mapreduce.avro.AvroKeyCombineFileRecordReader.java
License:Apache License
private static Schema getSchema(CombineFileSplit split, TaskAttemptContext cx, Integer idx) throws IOException { Schema schema = AvroJob.getInputKeySchema(cx.getConfiguration()); if (schema != null) { return schema; }// w w w . jav a 2 s . co m Path path = split.getPath(idx); FileSystem fs = path.getFileSystem(cx.getConfiguration()); return AvroUtils.getSchemaFromDataFile(path, fs); }
From source file:gobblin.compaction.mapreduce.avro.AvroKeyCompactorOutputCommitter.java
License:Apache License
/** * Commits the task, moving files to their final committed location by delegating to * {@link FileOutputCommitter} to perform the actual moving. First, renames the * files to include the count of records contained within the file and a timestamp, * in the form {recordCount}.{timestamp}.avro. Then, the files are moved to their * committed location.// w ww . j av a 2 s . c o m */ @Override public void commitTask(TaskAttemptContext context) throws IOException { Path workPath = getWorkPath(); FileSystem fs = workPath.getFileSystem(context.getConfiguration()); if (fs.exists(workPath)) { long recordCount = getRecordCountFromCounter(context, AvroKeyDedupReducer.EVENT_COUNTER.RECORD_COUNT); String fileNamePrefix; if (recordCount == 0) { // recordCount == 0 indicates that it is a map-only, non-dedup job, and thus record count should // be obtained from mapper counter. fileNamePrefix = CompactionRecordCountProvider.M_OUTPUT_FILE_PREFIX; recordCount = getRecordCountFromCounter(context, AvroKeyMapper.EVENT_COUNTER.RECORD_COUNT); } else { fileNamePrefix = CompactionRecordCountProvider.MR_OUTPUT_FILE_PREFIX; } String fileName = CompactionRecordCountProvider.constructFileName(fileNamePrefix, recordCount); for (FileStatus status : fs.listStatus(workPath, new PathFilter() { @Override public boolean accept(Path path) { return FilenameUtils.isExtension(path.getName(), "avro"); } })) { Path newPath = new Path(status.getPath().getParent(), fileName); LOG.info(String.format("Renaming %s to %s", status.getPath(), newPath)); fs.rename(status.getPath(), newPath); } } super.commitTask(context); }
From source file:gov.jgi.meta.hadoop.input.FastaBlockRecordReader.java
License:Open Source License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { LOG.info("initializing FastaBlockRecordReader"); FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();//from w w w .j a va2 s . c om end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { in = new FastaBlockLineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = false; // don't do this! //--start; or this fileIn.seek(start); } in = new FastaBlockLineReader(fileIn, job); } this.pos = start; }
From source file:gov.jgi.meta.hadoop.input.FastaRecordReader.java
License:Open Source License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();/*ww w . j a v a2 s .c om*/ end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { in = new FastaLineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = false; // don't do this! //--start; or this fileIn.seek(start); } in = new FastaLineReader(fileIn, job); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }