List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:com.mongodb.hadoop.MongoOutputFormat.java
License:Apache License
/** * Get the record writer that points to the output collection. *//*from w w w . jav a2 s.c o m*/ public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) { return new MongoRecordWriter(MongoConfigUtil.getOutputCollections(context.getConfiguration()), context); }
From source file:com.moz.fiji.mapreduce.input.impl.WholeFileRecordReader.java
License:Apache License
/** * Implementation detail: This constructor is built to be called via * reflection from within CombineFileRecordReader. * * @param fileSplit The CombineFileSplit that this will read from. * @param context The context for this task. * @param pathToProcess The path index from the CombineFileSplit to process in this record. *///from w ww. j a v a2s .c om public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context, Integer pathToProcess) { mProcessed = false; mFileToRead = fileSplit.getPath(pathToProcess); mFileLength = fileSplit.getLength(pathToProcess); mConf = context.getConfiguration(); assert 0 == fileSplit.getOffset(pathToProcess); if (LOG.isDebugEnabled()) { LOG.debug("FileToRead is: " + mFileToRead.toString()); LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths()); try { final FileSystem fs = mFileToRead.getFileSystem(mConf); assert fs.getFileStatus(mFileToRead).getLen() == mFileLength; } catch (IOException ioe) { // oh well, I was just testing. } } mFileName = new Text(); mFileText = new Text(); }
From source file:com.mycompany.keywordsearch.LineRecordReaderV2.java
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); start = split.getStart();/*from www . ja va 2 s .co m*/ end = start + split.getLength(); final Path file = split.getPath(); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); locationKey.set(file.toString()); CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file); if (null != codec) { isCompressedInput = true; decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes); start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes); filePosition = fileIn; } } else { fileIn.seek(start); in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
From source file:com.mycompany.keywordsearch.TextInputFormatV2.java
@Override public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) { String delimiter = context.getConfiguration().get("textinputformat.record.delimiter"); byte[] recordDelimiterBytes = null; if (null != delimiter) recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8); return new LineRecordReaderV2(recordDelimiterBytes); }
From source file:com.netflix.aegisthus.input.readers.CommitLogRecordReader.java
License:Apache License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext ctx) throws IOException, InterruptedException { AegSplit split = (AegSplit) inputSplit; start = split.getStart();/*from w w w. j a va 2s . com*/ end = split.getEnd(); final Path file = split.getPath(); try { cfId = ctx.getConfiguration().getInt("commitlog.cfid", -1000); if (cfId == -1000) { throw new IOException("commitlog.cfid must be set"); } // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(ctx.getConfiguration()); FSDataInputStream fileIn = fs.open(split.getPath()); InputStream dis = new BufferedInputStream(fileIn); scanner = new CommitLogScanner(new DataInputStream(dis), split.getConvertors(), Descriptor.fromFilename(split.getPath().getName()).version); this.pos = start; } catch (IOException e) { throw new IOError(e); } }
From source file:com.netflix.aegisthus.input.readers.JsonRecordReader.java
License:Apache License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext ctx) throws IOException, InterruptedException { AegSplit split = (AegSplit) inputSplit; InputStream is = split.getInput(ctx.getConfiguration()); start = split.getStart();/*from w w w . ja v a 2s .c o m*/ end = split.getEnd(); pos = start; is.skip(split.getStart()); if (split.getPath().getName().endsWith(".gz")) { reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(is))); } else { reader = new BufferedReader(new InputStreamReader(is)); } }
From source file:com.netflix.aegisthus.input.readers.SSTableRecordReader.java
License:Apache License
@SuppressWarnings("rawtypes") @Override// w w w . j ava 2 s . c o m public void initialize(InputSplit inputSplit, TaskAttemptContext ctx) throws IOException, InterruptedException { AegSplit split = (AegSplit) inputSplit; start = split.getStart(); //TODO: This has a side effect of setting compressionmetadata. remove this. InputStream is = split.getInput(ctx.getConfiguration()); if (split.isCompressed()) { end = split.getCompressionMetadata().getDataLength(); } else { end = split.getEnd(); } outputFile = ctx.getConfiguration().getBoolean("aegsithus.debug.file", false); filename = split.getPath().toUri().toString(); LOG.info(String.format("File: %s", split.getPath().toUri().getPath())); LOG.info("Start: " + start); LOG.info("End: " + end); if (ctx instanceof TaskInputOutputContext) { context = (TaskInputOutputContext) ctx; } try { scanner = new SSTableScanner(new DataInputStream(is), split.getConvertors(), end, Descriptor.fromFilename(filename).version); if (ctx.getConfiguration().get("aegisthus.maxcolsize") != null) { scanner.setMaxColSize(ctx.getConfiguration().getLong("aegisthus.maxcolsize", -1L)); LOG.info(String.format("aegisthus.maxcolsize - %d", ctx.getConfiguration().getLong("aegisthus.maxcolsize", -1L))); } scanner.skipUnsafe(start); this.pos = start; } catch (IOException e) { throw new IOError(e); } }
From source file:com.netflix.aegisthus.tools.StorageHelper.java
License:Apache License
public StorageHelper(TaskAttemptContext ctx) { this.ctx = ctx; this.config = ctx.getConfiguration(); debug = config.getBoolean(CFG_STORAGE_DEBUG, false); }
From source file:com.netflix.aegisthus.tools.Utils.java
License:Apache License
public static void copy(Path from, Path to, boolean snappy, TaskAttemptContext ctx) throws IOException { FileSystem fromFs = from.getFileSystem(ctx.getConfiguration()); FileSystem toFs = to.getFileSystem(ctx.getConfiguration()); if (!to.isAbsolute()) { to = new Path(ctx.getConfiguration().get("mapred.working.dir"), to); }/*ww w .j av a 2 s .c o m*/ if (!snappy && onSameHdfs(ctx.getConfiguration(), from, to)) { LOG.info(String.format("renaming %s to %s", from, to)); toFs.mkdirs(to.getParent()); toFs.rename(from, to); return; } InputStream in = fromFs.open(from); OutputStream out = toFs.create(to, false); try { if (snappy) { in = new SnappyInputStream2(in); } byte[] buffer = new byte[65536]; int bytesRead; int count = 0; while ((bytesRead = in.read(buffer)) >= 0) { if (bytesRead > 0) { out.write(buffer, 0, bytesRead); } if (count++ % 50 == 0) { ctx.progress(); } } } finally { in.close(); out.close(); } }
From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java
License:Apache License
/** * Lists the output of a task under the task attempt path. Subclasses can * override this method to change how output files are identified. * <p>//from ww w. ja v a2s. c om * This implementation lists the files that are direct children of the output * path and filters hidden files (file names starting with '.' or '_'). * <p> * The task attempt path is provided by * {@link #getTaskAttemptPath(TaskAttemptContext)} * * @param context this task's {@link TaskAttemptContext} * @return the output files produced by this task in the task attempt path * @throws IOException */ protected Iterable<FileStatus> getTaskOutput(TaskAttemptContext context) throws IOException { // get files on the local FS in the attempt path Path attemptPath = getTaskAttemptPath(context); FileSystem attemptFS = attemptPath.getFileSystem(context.getConfiguration()); FileStatus[] stats = attemptFS.listStatus(attemptPath, HiddenPathFilter.get()); return Arrays.asList(stats); }