Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.mongodb.hadoop.MongoOutputFormat.java

License:Apache License

/**
 * Get the record writer that points to the output collection.
 *//*from w w w .  jav  a2  s.c  o m*/
public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) {
    return new MongoRecordWriter(MongoConfigUtil.getOutputCollections(context.getConfiguration()), context);
}

From source file:com.moz.fiji.mapreduce.input.impl.WholeFileRecordReader.java

License:Apache License

/**
 * Implementation detail: This constructor is built to be called via
 * reflection from within CombineFileRecordReader.
 *
 * @param fileSplit The CombineFileSplit that this will read from.
 * @param context The context for this task.
 * @param pathToProcess The path index from the CombineFileSplit to process in this record.
 *///from  w  ww.  j a v a2s .c  om
public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context, Integer pathToProcess) {
    mProcessed = false;
    mFileToRead = fileSplit.getPath(pathToProcess);
    mFileLength = fileSplit.getLength(pathToProcess);
    mConf = context.getConfiguration();

    assert 0 == fileSplit.getOffset(pathToProcess);
    if (LOG.isDebugEnabled()) {
        LOG.debug("FileToRead is: " + mFileToRead.toString());
        LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths());

        try {
            final FileSystem fs = mFileToRead.getFileSystem(mConf);
            assert fs.getFileStatus(mFileToRead).getLen() == mFileLength;
        } catch (IOException ioe) {
            // oh well, I was just testing.
        }
    }

    mFileName = new Text();
    mFileText = new Text();
}

From source file:com.mycompany.keywordsearch.LineRecordReaderV2.java

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();/*from www  .  ja  va  2  s .co  m*/
    end = start + split.getLength();
    final Path file = split.getPath();

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    locationKey.set(file.toString());
    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        isCompressedInput = true;
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job,
                    this.recordDelimiterBytes);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:com.mycompany.keywordsearch.TextInputFormatV2.java

@Override
public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
    String delimiter = context.getConfiguration().get("textinputformat.record.delimiter");
    byte[] recordDelimiterBytes = null;
    if (null != delimiter)
        recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
    return new LineRecordReaderV2(recordDelimiterBytes);
}

From source file:com.netflix.aegisthus.input.readers.CommitLogRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext ctx) throws IOException, InterruptedException {
    AegSplit split = (AegSplit) inputSplit;

    start = split.getStart();/*from w w  w.  j  a va 2s . com*/
    end = split.getEnd();
    final Path file = split.getPath();

    try {
        cfId = ctx.getConfiguration().getInt("commitlog.cfid", -1000);
        if (cfId == -1000) {
            throw new IOException("commitlog.cfid must be set");
        }
        // open the file and seek to the start of the split
        FileSystem fs = file.getFileSystem(ctx.getConfiguration());
        FSDataInputStream fileIn = fs.open(split.getPath());
        InputStream dis = new BufferedInputStream(fileIn);
        scanner = new CommitLogScanner(new DataInputStream(dis), split.getConvertors(),
                Descriptor.fromFilename(split.getPath().getName()).version);
        this.pos = start;
    } catch (IOException e) {
        throw new IOError(e);
    }
}

From source file:com.netflix.aegisthus.input.readers.JsonRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext ctx) throws IOException, InterruptedException {
    AegSplit split = (AegSplit) inputSplit;
    InputStream is = split.getInput(ctx.getConfiguration());
    start = split.getStart();/*from w  w  w .  ja  v  a  2s .c o m*/
    end = split.getEnd();
    pos = start;
    is.skip(split.getStart());
    if (split.getPath().getName().endsWith(".gz")) {
        reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(is)));
    } else {
        reader = new BufferedReader(new InputStreamReader(is));
    }
}

From source file:com.netflix.aegisthus.input.readers.SSTableRecordReader.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override//  w w w  . j ava  2  s .  c o m
public void initialize(InputSplit inputSplit, TaskAttemptContext ctx) throws IOException, InterruptedException {
    AegSplit split = (AegSplit) inputSplit;

    start = split.getStart();
    //TODO: This has a side effect of setting compressionmetadata. remove this.
    InputStream is = split.getInput(ctx.getConfiguration());
    if (split.isCompressed()) {
        end = split.getCompressionMetadata().getDataLength();
    } else {
        end = split.getEnd();
    }
    outputFile = ctx.getConfiguration().getBoolean("aegsithus.debug.file", false);
    filename = split.getPath().toUri().toString();

    LOG.info(String.format("File: %s", split.getPath().toUri().getPath()));
    LOG.info("Start: " + start);
    LOG.info("End: " + end);
    if (ctx instanceof TaskInputOutputContext) {
        context = (TaskInputOutputContext) ctx;
    }

    try {
        scanner = new SSTableScanner(new DataInputStream(is), split.getConvertors(), end,
                Descriptor.fromFilename(filename).version);
        if (ctx.getConfiguration().get("aegisthus.maxcolsize") != null) {
            scanner.setMaxColSize(ctx.getConfiguration().getLong("aegisthus.maxcolsize", -1L));
            LOG.info(String.format("aegisthus.maxcolsize - %d",
                    ctx.getConfiguration().getLong("aegisthus.maxcolsize", -1L)));
        }
        scanner.skipUnsafe(start);
        this.pos = start;
    } catch (IOException e) {
        throw new IOError(e);
    }
}

From source file:com.netflix.aegisthus.tools.StorageHelper.java

License:Apache License

public StorageHelper(TaskAttemptContext ctx) {
    this.ctx = ctx;
    this.config = ctx.getConfiguration();
    debug = config.getBoolean(CFG_STORAGE_DEBUG, false);
}

From source file:com.netflix.aegisthus.tools.Utils.java

License:Apache License

public static void copy(Path from, Path to, boolean snappy, TaskAttemptContext ctx) throws IOException {
    FileSystem fromFs = from.getFileSystem(ctx.getConfiguration());
    FileSystem toFs = to.getFileSystem(ctx.getConfiguration());

    if (!to.isAbsolute()) {
        to = new Path(ctx.getConfiguration().get("mapred.working.dir"), to);
    }/*ww w  .j av  a  2  s  .c  o m*/
    if (!snappy && onSameHdfs(ctx.getConfiguration(), from, to)) {
        LOG.info(String.format("renaming %s to %s", from, to));
        toFs.mkdirs(to.getParent());
        toFs.rename(from, to);
        return;
    }

    InputStream in = fromFs.open(from);
    OutputStream out = toFs.create(to, false);
    try {
        if (snappy) {
            in = new SnappyInputStream2(in);
        }
        byte[] buffer = new byte[65536];
        int bytesRead;
        int count = 0;
        while ((bytesRead = in.read(buffer)) >= 0) {
            if (bytesRead > 0) {
                out.write(buffer, 0, bytesRead);
            }
            if (count++ % 50 == 0) {
                ctx.progress();
            }
        }
    } finally {
        in.close();
        out.close();
    }
}

From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java

License:Apache License

/**
 * Lists the output of a task under the task attempt path. Subclasses can
 * override this method to change how output files are identified.
 * <p>//from  ww w. ja v a2s.  c om
 * This implementation lists the files that are direct children of the output
 * path and filters hidden files (file names starting with '.' or '_').
 * <p>
 * The task attempt path is provided by
 * {@link #getTaskAttemptPath(TaskAttemptContext)}
 *
 * @param context this task's {@link TaskAttemptContext}
 * @return the output files produced by this task in the task attempt path
 * @throws IOException
 */
protected Iterable<FileStatus> getTaskOutput(TaskAttemptContext context) throws IOException {
    // get files on the local FS in the attempt path
    Path attemptPath = getTaskAttemptPath(context);
    FileSystem attemptFS = attemptPath.getFileSystem(context.getConfiguration());
    FileStatus[] stats = attemptFS.listStatus(attemptPath, HiddenPathFilter.get());
    return Arrays.asList(stats);
}