List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:brush.InterleavedFastqInputFormat.java
License:Apache License
/** * Creates the new record reader that underlies this input format. * * @param genericSplit The split that the record reader should read. * @param context The Hadoop task context. * @return Returns the interleaved FASTQ record reader. */// ww w . j av a 2s.c o m public RecordReader<Void, Text> createRecordReader(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { context.setStatus(genericSplit.toString()); // cast as per example in TextInputFormat return new InterleavedFastqRecordReader(context.getConfiguration(), (FileSplit) genericSplit); }
From source file:ca.sparkera.adapters.mapreduce.MainframeVBRecordReader.java
License:Apache License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); final Path file = split.getPath(); initialize(job, split.getStart(), split.getLength(), file); }
From source file:chaohBIM.ZipFileRecordReader.java
License:Apache License
/** * Initialise and open the ZIP file from the FileSystem *///from w ww.java 2s . com @Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Path path = split.getPath(); FileSystem fs = path.getFileSystem(conf); // Open the stream fsin = fs.open(path); zip = new ZipInputStream(fsin); zipfilename = path.getName().replaceAll(".zip", ""); //System.out.println(zipfilename); }
From source file:cloudbase.core.client.mapreduce.CloudbaseOutputFormatShim.java
License:Apache License
@Override public RecordWriter<Text, Mutation> getRecordWriter(TaskAttemptContext attempt) throws IOException { if (isMock(attempt.getConfiguration())) { try {/*from w w w . j av a2s .co m*/ return new MockCloudbaseRecordWriter(attempt); } catch (Exception e) { throw new IOException(e); } } return super.getRecordWriter(attempt); }
From source file:cn.edu.hfut.dmic.webcollector.fetcher.FetcherOutputFormat.java
@Override public RecordWriter<Text, Writable> getRecordWriter(TaskAttemptContext tac) throws IOException, InterruptedException { Configuration conf = tac.getConfiguration(); FileSystem fs = FileSystem.get(conf); String outputPath = conf.get("mapred.output.dir"); Path fetchPath = new Path(outputPath, "fetch/info"); Path contentPath = new Path(outputPath, "content/info"); Path parseDataPath = new Path(outputPath, "parse/info"); Path redirectPath = new Path(outputPath, "redirect/info"); final SequenceFile.Writer fetchOut = new SequenceFile.Writer(fs, conf, fetchPath, Text.class, CrawlDatum.class); final SequenceFile.Writer contentOut = new SequenceFile.Writer(fs, conf, contentPath, Text.class, Content.class); final SequenceFile.Writer parseDataOut = new SequenceFile.Writer(fs, conf, parseDataPath, Text.class, CrawlDatum.class); final SequenceFile.Writer redirectOut = new SequenceFile.Writer(fs, conf, redirectPath, CrawlDatum.class, Text.class); return new RecordWriter<Text, Writable>() { @Override//w w w . j a v a2 s.c o m public void write(Text k, Writable v) throws IOException, InterruptedException { if (v instanceof CrawlDatum) { fetchOut.append(k, v); } else if (v instanceof Content) { contentOut.append(k, v); } else if (v instanceof ParseData) { ParseData parseData = (ParseData) v; CrawlDatums next = parseData.next; for (CrawlDatum datum : next) { parseDataOut.append(new Text(datum.getKey()), datum); } } else if (v instanceof Redirect) { Redirect redirect = (Redirect) v; redirectOut.append(redirect.datum, new Text(redirect.realUrl)); } } @Override public void close(TaskAttemptContext tac) throws IOException, InterruptedException { fetchOut.close(); contentOut.close(); parseDataOut.close(); redirectOut.close(); } }; }
From source file:cn.uc.hadoop.mapreduce.lib.input.FileNameLineRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); start = split.getStart();// w w w . jav a2 s. co m end = start + split.getLength(); final Path file = split.getPath(); //ADD by qiujw key?? key = new Text(file.getName()); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); if (null == this.recordDelimiterBytes) { in = new LineReader(cIn, job); } else { in = new LineReader(cIn, job, this.recordDelimiterBytes); } start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { if (null == this.recordDelimiterBytes) { in = new LineReader(codec.createInputStream(fileIn, decompressor), job); } else { in = new LineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes); } filePosition = fileIn; } } else { fileIn.seek(start); if (null == this.recordDelimiterBytes) { in = new LineReader(fileIn, job); } else { in = new LineReader(fileIn, job, this.recordDelimiterBytes); } filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
From source file:cn.uc.hadoop.mapreduce.lib.input.FileNameTextInputFormat.java
License:Apache License
@Override public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) { String delimiter = context.getConfiguration().get("textinputformat.record.delimiter"); byte[] recordDelimiterBytes = null; if (null != delimiter) recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8); return new FileNameLineRecordReader(recordDelimiterBytes); }
From source file:cn.uc.hadoop.mapreduce.lib.input.FilePathLineRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); start = split.getStart();/*from w w w. j a v a2 s . c o m*/ end = start + split.getLength(); final Path file = split.getPath(); //ADD by qiujw key? key = new Text(file.toString()); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); if (null == this.recordDelimiterBytes) { in = new LineReader(cIn, job); } else { in = new LineReader(cIn, job, this.recordDelimiterBytes); } start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { if (null == this.recordDelimiterBytes) { in = new LineReader(codec.createInputStream(fileIn, decompressor), job); } else { in = new LineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes); } filePosition = fileIn; } } else { fileIn.seek(start); if (null == this.recordDelimiterBytes) { in = new LineReader(fileIn, job); } else { in = new LineReader(fileIn, job, this.recordDelimiterBytes); } filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
From source file:cn.uc.hadoop.mapreduce.lib.input.FilePathTextInputFormat.java
License:Apache License
@Override public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) { String delimiter = context.getConfiguration().get("textinputformat.record.delimiter"); byte[] recordDelimiterBytes = null; if (null != delimiter) recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8); return new FilePathLineRecordReader(recordDelimiterBytes); }
From source file:co.cask.cdap.data.stream.AbstractStreamInputFormat.java
License:Apache License
@Override public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { return new StreamRecordReader<>(createStreamEventDecoder(context.getConfiguration()), getAuthorizationEnforcer(context), getAuthenticationContext(context), GSON.fromJson(context.getConfiguration().get(STREAM_ID), StreamId.class)); }