Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.mongodb.hadoop.MongoOutputFormat.java

License:Apache License

/**
 * Get the record writer that points to the output collection.
 *//*from w w w .  jav  a2  s.c  o m*/
public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) {
    return new MongoRecordWriter(MongoConfigUtil.getOutputCollections(context.getConfiguration()), context);
}

From source file:com.moz.fiji.mapreduce.input.impl.WholeFileRecordReader.java

License:Apache License

/**
 * Implementation detail: This constructor is built to be called via
 * reflection from within CombineFileRecordReader.
 *
 * @param fileSplit The CombineFileSplit that this will read from.
 * @param context The context for this task.
 * @param pathToProcess The path index from the CombineFileSplit to process in this record.
 *///from  w  ww.  j a v a2s .c  om
public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context, Integer pathToProcess) {
    mProcessed = false;
    mFileToRead = fileSplit.getPath(pathToProcess);
    mFileLength = fileSplit.getLength(pathToProcess);
    mConf = context.getConfiguration();

    assert 0 == fileSplit.getOffset(pathToProcess);
    if (LOG.isDebugEnabled()) {
        LOG.debug("FileToRead is: " + mFileToRead.toString());
        LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths());

        try {
            final FileSystem fs = mFileToRead.getFileSystem(mConf);
            assert fs.getFileStatus(mFileToRead).getLen() == mFileLength;
        } catch (IOException ioe) {
            // oh well, I was just testing.
        }
    }

    mFileName = new Text();
    mFileText = new Text();
}

From source file:com.mycompany.keywordsearch.LineRecordReaderV2.java

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();/*from www  .  ja  va  2  s .co  m*/
    end = start + split.getLength();
    final Path file = split.getPath();

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    locationKey.set(file.toString());
    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        isCompressedInput = true;
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job,
                    this.recordDelimiterBytes);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:com.mycompany.keywordsearch.TextInputFormatV2.java

@Override
public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
    String delimiter = context.getConfiguration().get("textinputformat.record.delimiter");
    byte[] recordDelimiterBytes = null;
    if (null != delimiter)
        recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
    return new LineRecordReaderV2(recordDelimiterBytes);
}

From source file:com.netflix.aegisthus.input.readers.CommitLogRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext ctx) throws IOException, InterruptedException {
    AegSplit split = (AegSplit) inputSplit;

    start = split.getStart();/*from w w  w.  j  a va 2s . com*/
    end = split.getEnd();
    final Path file = split.getPath();

    try {
        cfId = ctx.getConfiguration().getInt("commitlog.cfid", -1000);
        if (cfId == -1000) {
            throw new IOException("commitlog.cfid must be set");
        }
        // open the file and seek to the start of the split
        FileSystem fs = file.getFileSystem(ctx.getConfiguration());
        FSDataInputStream fileIn = fs.open(split.getPath());
        InputStream dis = new BufferedInputStream(fileIn);
        scanner = new CommitLogScanner(new DataInputStream(dis), split.getConvertors(),
                Descriptor.fromFilename(split.getPath().getName()).version);
        this.pos = start;
    } catch (IOException e) {
        throw new IOError(e);
    }
}

From source file:com.netflix.aegisthus.input.readers.JsonRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext ctx) throws IOException, InterruptedException {
    AegSplit split = (AegSplit) inputSplit;
    InputStream is = split.getInput(ctx.getConfiguration());
    start = split.getStart();/*from w  w  w .  ja  v  a  2s .c o m*/
    end = split.getEnd();
    pos = start;
    is.skip(split.getStart());
    if (split.getPath().getName().endsWith(".gz")) {
        reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(is)));
    } else {
        reader = new BufferedReader(new InputStreamReader(is));
    }
}

From source file:com.netflix.aegisthus.input.readers.SSTableRecordReader.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override//  w w w  . j ava  2  s .  c o m
public void initialize(InputSplit inputSplit, TaskAttemptContext ctx) throws IOException, InterruptedException {
    AegSplit split = (AegSplit) inputSplit;

    start = split.getStart();
    //TODO: This has a side effect of setting compressionmetadata. remove this.
    InputStream is = split.getInput(ctx.getConfiguration());
    if (split.isCompressed()) {
        end = split.getCompressionMetadata().getDataLength();
    } else {
        end = split.getEnd();
    }
    outputFile = ctx.getConfiguration().getBoolean("aegsithus.debug.file", false);
    filename = split.getPath().toUri().toString();

    LOG.info(String.format("File: %s", split.getPath().toUri().getPath()));
    LOG.info("Start: " + start);
    LOG.info("End: " + end);
    if (ctx instanceof TaskInputOutputContext) {
        context = (TaskInputOutputContext) ctx;
    }

    try {
        scanner = new SSTableScanner(new DataInputStream(is), split.getConvertors(), end,
                Descriptor.fromFilename(filename).version);
        if (ctx.getConfiguration().get("aegisthus.maxcolsize") != null) {
            scanner.setMaxColSize(ctx.getConfiguration().getLong("aegisthus.maxcolsize", -1L));
            LOG.info(String.format("aegisthus.maxcolsize - %d",
                    ctx.getConfiguration().getLong("aegisthus.maxcolsize", -1L)));
        }
        scanner.skipUnsafe(start);
        this.pos = start;
    } catch (IOException e) {
        throw new IOError(e);
    }
}

From source file:com.netflix.aegisthus.tools.StorageHelper.java

License:Apache License

public StorageHelper(TaskAttemptContext ctx) {
    this.ctx = ctx;
    this.config = ctx.getConfiguration();
    debug = config.getBoolean(CFG_STORAGE_DEBUG, false);
}

From source file:com.netflix.aegisthus.tools.Utils.java

License:Apache License

public static void copy(Path from, Path to, boolean snappy, TaskAttemptContext ctx) throws IOException {
    FileSystem fromFs = from.getFileSystem(ctx.getConfiguration());
    FileSystem toFs = to.getFileSystem(ctx.getConfiguration());

    if (!to.isAbsolute()) {
        to = new Path(ctx.getConfiguration().get("mapred.working.dir"), to);
    }/*ww w  .j av  a  2  s  .c  o m*/
    if (!snappy && onSameHdfs(ctx.getConfiguration(), from, to)) {
        LOG.info(String.format("renaming %s to %s", from, to));
        toFs.mkdirs(to.getParent());
        toFs.rename(from, to);
        return;
    }

    InputStream in = fromFs.open(from);
    OutputStream out = toFs.create(to, false);
    try {
        if (snappy) {
            in = new SnappyInputStream2(in);
        }
        byte[] buffer = new byte[65536];
        int bytesRead;
        int count = 0;
        while ((bytesRead = in.read(buffer)) >= 0) {
            if (bytesRead > 0) {
                out.write(buffer, 0, bytesRead);
            }
            if (count++ % 50 == 0) {
                ctx.progress();
            }
        }
    } finally {
        in.close();
        out.close();
    }
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

/**
 * Lists the output of a task under the task attempt path. Subclasses can
 * override this method to change how output files are identified.
 * <p>//from  ww w. ja v a2s.  c om
 * This implementation lists the files that are direct children of the output
 * path and filters hidden files (file names starting with '.' or '_').
 * <p>
 * The task attempt path is provided by
 * {@link #getTaskAttemptPath(TaskAttemptContext)}
 *
 * @param context this task's {@link TaskAttemptContext}
 * @return the output files produced by this task in the task attempt path
 * @throws IOException
 */
protected Iterable<FileStatus> getTaskOutput(TaskAttemptContext context) throws IOException {
    // get files on the local FS in the attempt path
    Path attemptPath = getTaskAttemptPath(context);
    FileSystem attemptFS = attemptPath.getFileSystem(context.getConfiguration());
    FileStatus[] stats = attemptFS.listStatus(attemptPath, HiddenPathFilter.get());
    return Arrays.asList(stats);
}