List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:libra.common.kmermatch.KmerJoiner.java
License:Apache License
public KmerJoiner(Path[] kmerIndexPath, KmerRangePartition partition, TaskAttemptContext context) throws IOException { initialize(kmerIndexPath, partition, context.getConfiguration()); }
From source file:libra.common.kmermatch.KmerMatchRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { if (!(split instanceof KmerMatchInputSplit)) { throw new IOException("split is not an instance of KmerMatchIndexSplit"); }/* ww w .ja v a 2 s . c o m*/ KmerMatchInputSplit kmerIndexSplit = (KmerMatchInputSplit) split; this.conf = context.getConfiguration(); this.inputIndexPath = kmerIndexSplit.getIndexFilePath(); KmerRangePartition partition = kmerIndexSplit.getPartition(); this.joiner = new KmerJoiner(this.inputIndexPath, partition, context); }
From source file:main.LicenseOutputFormat.java
@Override public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException { // TODO Auto-generated method stub FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context); _fileName = context.getConfiguration().get(OUTPUT_FILE_NAME); return new Path(committer.getWorkPath(), _fileName); }
From source file:mapred.io.CustomRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); start = split.getStart();//w w w. j a va 2 s . com end = start + split.getLength(); final Path file = split.getPath(); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file); if (null != codec) { isCompressedInput = true; decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes); start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes); filePosition = fileIn; } } else { fileIn.seek(start); in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
From source file:mapreduce.CustomTemporalLineRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); start = split.getStart();/*from w w w.ja va 2 s . c om*/ end = start + split.getLength(); final Path file = split.getPath(); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file); if (null != codec) { isCompressedInput = true; decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes); start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes); filePosition = fileIn; } } else { fileIn.seek(start); in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. Text text = new Text(); String str = null; int prevTime = -1; int currentTime = -1; if (start != 0) { start += in.readLine(text, 0, maxBytesToConsume(start)); start += in.readLine(text, maxLineLength, maxBytesToConsume(start)); str = text.toString(); currentTime = Integer.parseInt(str.split(",")[1]); prevTime = currentTime; text = new Text(); int offset = 0; while ((offset = in.readLine(text, maxLineLength, maxBytesToConsume(start))) >= 0) { start += offset; str = text.toString(); currentTime = Integer.parseInt(str.split(",")[1]); if (currentTime != prevTime) { useRecordReadInInitialize = true; key = new LongWritable(start - offset); value = text; break; } else { prevTime = currentTime; text = new Text(); } } } this.pos = start; }
From source file:mapreduce.ZipFileRecordReader.java
License:Apache License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Path path = split.getPath();/*from www. java 2s.co m*/ FileSystem fs = path.getFileSystem(conf); fsin = fs.open(path); zip = new ZipInputStream(fsin); }
From source file:matrixFormat.MatrixOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t"); CompressionCodec codec = null;/*from ww w . j av a 2s. co m*/ String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new MatrixRecordWriter<K, V>(fileOut, keyValueSeparator); } else { FSDataOutputStream fileOut = fs.create(file, false); return new MatrixRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator); } }
From source file:matrixFormat.MatrixRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { //FileSplit split = (FileSplit) genericSplit; MatrixFileSplit split = (MatrixFileSplit) genericSplit; Configuration job = context.getConfiguration(); method = (job.get("method").compareTo("IPB") == 0) ? 1 : ((job.get("method").compareTo("OPB") == 0) ? 2 : 0); sparse = job.getBoolean("Sparse", false); this.maxLength = job.getInt("mapred.matrixrecordreader.maxlength", Integer.MAX_VALUE); start1 = split.getStart();//w w w . j a va 2 s . co m start2 = split.getStart(1); end1 = start1 + split.getLength(0); end2 = start2 + split.getLength(1); blkID = split.getId(); final Path file = split.getPath(0); final Path file2 = split.getPath(1); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FileSystem fs2 = file2.getFileSystem(job); FSDataInputStream fileIn1 = fs.open(split.getPath(0)); FSDataInputStream fileIn2 = fs2.open(split.getPath(1)); //FileInputStream fileIn2 = new FileInputStream(file2.toString()); //Don't care the compression stuff /*if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).createInputStream( fileIn1, decompressor, start1, end1, SplittableCompressionCodec.READ_MODE.BYBLOCK); final SplitCompressionInputStream cIn2 = ((SplittableCompressionCodec)codec).createInputStream( fileIn2, decompressor, start2, end2, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new MatrixReader(cIn, cIn2); start1 = cIn.getAdjustedStart(); end1 = cIn.getAdjustedEnd(); filePosition1 = cIn; } else { in = new MatrixReader(codec.createInputStream(fileIn1, decompressor), codec.createInputStream(fileIn2, decompressor), job, split.getStarts(0), split.getStarts(1) ); filePosition1 = fileIn1; } } else {*/ fileIn1.seek(start1); fileIn2.seek(start2); if (sparse) { in = new MatrixReader(fileIn1, fileIn2, job, split.getStart(0), split.getStart(1)); } else { in = new MatrixReader(fileIn1, fileIn2, job, split.getStarts(0), split.getStarts(1)); } //in = new MatrixReader(file, file2, job, split.getStarts(0), split.getStarts(1)); filePosition1 = fileIn1; filePosition2 = fileIn2; //} // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. /*if (start1 != 0) { start1 += in.readOldBlock(maxLength, maxBytesToConsume(pos1)); this.pos1 = start1; } in.readBlocks(maxLength, maxBytesToConsume(pos1)); start1 += in.getBytesComsumed(0); //start2 += in.getBytesComsumed(1); this.pos1 = start1;*/ }
From source file:ml.shifu.guagua.mapreduce.example.nn.NNInputFormat.java
License:Apache License
@Override public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) { return new GuaguaMRRecordReader( context.getConfiguration().getInt(GuaguaConstants.GUAGUA_ITERATION_COUNT, -1)); }
From source file:ml.shifu.shifu.core.mr.input.CombineInputFormat.java
License:Apache License
@Override public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) { String delimiter = context.getConfiguration().get(TEXTINPUTFORMAT_RECORD_DELIMITER); byte[] recordDelimiterBytes = null; if (null != delimiter) recordDelimiterBytes = delimiter.getBytes(); return new CombineRecordReader(recordDelimiterBytes); }