Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.panguso.lc.analysis.format.mapreduce.TextOutputFormat.java

License:Open Source License

/**
 * @param job job/*w  w  w.j a  v  a 2 s.  c o m*/
 * @throws IOException IOException
 * @throws InterruptedException InterruptedException
 */
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = conf.get("mapred.textoutputformat.separator", separate);
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator);
    }
}

From source file:com.phantom.hadoop.examples.terasort.TeraOutputFormat.java

License:Apache License

public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException {
    Path file = getDefaultWorkFile(job, "");
    FileSystem fs = file.getFileSystem(job.getConfiguration());
    FSDataOutputStream fileOut = fs.create(file);
    return new TeraRecordWriter(fileOut, job);
}

From source file:com.pinterest.terrapin.hadoop.HFileOutputFormat.java

License:Apache License

public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputDir.getFileSystem(conf);

    int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384);
    // Default to snappy.
    Compression.Algorithm compressionAlgorithm = getAlgorithm(conf.get(Constants.HFILE_COMPRESSION));
    final StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, new CacheConfig(conf), fs, blockSize)
            .withFilePath(hfilePath(outputPath, context.getTaskAttemptID().getTaskID().getId()))
            .withCompression(compressionAlgorithm).build();
    return new HFileRecordWriter(writer);
}

From source file:com.pivotal.hawq.mapreduce.ao.HAWQAORecordReader.java

License:Apache License

/**
 * Called once at initialization./*from  www . j a  va2  s  . c om*/
 * 
 * @param split
 *            the split that defines the range of records to read
 * @param context
 *            the information about the task
 * @throws IOException
 * @throws InterruptedException
 */
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {

    // initialize the value
    Configuration conf = context.getConfiguration();

    // Extract the parameters needed by HAWQAOFileReader and HAWQAORecord
    String encoding = HAWQConfiguration.getInputTableEncoding(conf);
    HAWQSchema schema = HAWQConfiguration.getInputTableSchema(conf);
    /*
     * GPSQL-1047
     * 
     * Get version from configuration and init HAWQAORecord with it
     */
    String version = HAWQConfiguration.getDatabaseVersion(conf);

    filereader = new HAWQAOFileReader(conf, split);

    try {
        value = new HAWQAORecord(schema, encoding, version);
    } catch (HAWQException hawqE) {
        throw new IOException(hawqE.getMessage());
    }
}

From source file:com.pivotal.hawq.mapreduce.HAWQInputFormat.java

License:Apache License

@Override
public RecordReader<Void, HAWQRecord> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    HAWQTableFormat tableFormat = getTableFormat(context.getConfiguration());

    switch (tableFormat) {
    case AO:/*from  ww  w .jav  a2s  .c  om*/
        return aoInputFormat.createRecordReader(split, context);
    case Parquet:
        return parquetInputFormat.createRecordReader(split, context);
    default:
        throw new AssertionError("invalid table format: " + tableFormat);
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Create a file output committer/*from  w w  w.j  a  v  a  2s . c om*/
 * @param outputPath the job's output path
 * @param context the task's context
 * @throws IOException
 */
public FileOutputCommitter(Path outputPath, TaskAttemptContext context) throws IOException {
    super(outputPath, context);
    Job job = new Job(context.getConfiguration());
    String outputDirectories = job.getConfiguration().get(MULTIPLE_OUTPUTS, "");
    if (outputDirectories != null) {
        StringTokenizer st = new StringTokenizer(outputDirectories, " ");
        while (st.hasMoreTokens()) {
            pathNames.add(st.nextToken());
        }
    }
    if (outputPath != null) {
        this.outputPath = outputPath;
        outputFileSystem = outputPath.getFileSystem(context.getConfiguration());
        workPath = new Path(outputPath, (FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + "_"
                + context.getTaskAttemptID().toString())).makeQualified(outputFileSystem);
        for (String p : pathNames) {
            if (outputPath.toString().endsWith(p)) {
                committers.put(p, this);
                fake = false;
                break;
            }
        }
    }
}

From source file:com.rim.logdriver.mapreduce.avro.AvroBlockInputFormat.java

License:Apache License

/**
 * Creates a new AvroBlockRecordReader.//from   w  w  w . j av a2s .c  o m
 * 
 * Increases there default value mapreduce.job.max.split.locations to 100000,
 * if it's not already set.
 * 
 * Also sets mapred.max.split.size to the default block size for the root
 * directory ("/"), if it's not already set.
 * 
 * @param split
 *          The InputSplit.
 * @param context
 *          The TaskAttemptContext.
 * @return A new AvroBlockRecordReader.
 * @throws IOException
 *           If there is an I/O error.
 */
@Override
public RecordReader<AvroFileHeader, BytesWritable> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();

    // Ensure we have sensible defaults for how we build blocks.
    if (conf.get("mapreduce.job.max.split.locations") == null) {
        conf.setLong("mapreduce.job.max.split.locations", MAX_SPLIT_LOCATIONS);
    }
    if (conf.get("mapred.max.split.size") == null) {
        // Try to set the split size to the default block size. In case of
        // failure, we'll use this 128MB default.
        long blockSize = 128 * 1024 * 1024; // 128MB
        try {
            blockSize = FileSystem.get(conf).getDefaultBlockSize();
        } catch (IOException e) {
            LOG.error("Error getting filesystem to get get default block size (this does not bode well).");
        }
        conf.setLong("mapred.max.split.size", blockSize);
    }

    return new AvroBlockRecordReader();
}

From source file:com.rw.legion.input.CsvInputFormat.java

License:Apache License

@Override
public RecordReader<NullWritable, LegionRecord> createRecordReader(InputSplit split,
        TaskAttemptContext context) {

    String delimiter = context.getConfiguration().get("textinputformat.record.delimiter");

    byte[] recordDelimiterBytes = null;

    if (null != delimiter)
        recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);

    return new CsvRecordReader(recordDelimiterBytes);
}

From source file:com.rw.legion.input.JsonInputFormat.java

License:Apache License

@Override
public RecordReader<NullWritable, LegionRecord> createRecordReader(InputSplit split,
        TaskAttemptContext context) {

    String delimiter = context.getConfiguration().get("textinputformat.record.delimiter");

    byte[] recordDelimiterBytes = null;

    if (null != delimiter)
        recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);

    return new JsonRecordReader(recordDelimiterBytes);
}

From source file:com.rw.legion.input.LegionRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    /*/*from w  w w  . ja  v  a2s.co m*/
     * fileBroken tracks whether there's been an IOException while reading
     * this file. If there has, the record reader will simply stop reading
     * records for this particular file, rather than blowing up the whole
     * job.
     */
    fileBroken = false;
    currentLine = new Text();
    currentLineNumber = 0;

    FileSplit split = (FileSplit) genericSplit;

    if (split.getLength() == 0) {
        fileBroken = true;
    }

    // Load the Legion Objective.
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    legionObjective = ObjectiveDeserializer.deserialize(job.get("legion_objective"));

    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();

    // Open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);

    // Grab the file name to include with the data.
    fileName = file.toString();

    // Does the Legion Objective specify an input codec to use?
    if (legionObjective.getCodecOverride() != null) {
        isCompressedInput = true;
        CompressionCodec codec = new CompressionCodecFactory(job)
                .getCodecByClassName(legionObjective.getCodecOverride());
        decompressor = CodecPool.getDecompressor(codec);
        in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes);
        filePosition = fileIn;
    } else {
        CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
        if (null != codec) {
            isCompressedInput = true;
            decompressor = CodecPool.getDecompressor(codec);

            if (codec instanceof SplittableCompressionCodec) {
                final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                        fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
                in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
                start = cIn.getAdjustedStart();
                end = cIn.getAdjustedEnd();
                filePosition = cIn;
            } else {
                in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job,
                        this.recordDelimiterBytes);
                filePosition = fileIn;
            }
        } else {
            fileIn.seek(start);
            in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
            filePosition = fileIn;
        }
    }

    /*
     * If this is not the first split, we always throw away first record
     * because we always (except the last split) read one extra line in
     * next() method.
     */
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }

    this.pos = start;
}