Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.panguso.lc.analysis.format.mapreduce.TextOutputFormat.java

License:Open Source License

/**
 * @param job job/*w  w  w.j a  v  a 2 s.  c o m*/
 * @throws IOException IOException
 * @throws InterruptedException InterruptedException
 */
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = conf.get("mapred.textoutputformat.separator", separate);
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator);
    }
}

From source file:com.phantom.hadoop.examples.terasort.TeraOutputFormat.java

License:Apache License

public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException {
    Path file = getDefaultWorkFile(job, "");
    FileSystem fs = file.getFileSystem(job.getConfiguration());
    FSDataOutputStream fileOut = fs.create(file);
    return new TeraRecordWriter(fileOut, job);
}

From source file:com.pinterest.terrapin.hadoop.HFileOutputFormat.java

License:Apache License

public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputDir.getFileSystem(conf);

    int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384);
    // Default to snappy.
    Compression.Algorithm compressionAlgorithm = getAlgorithm(conf.get(Constants.HFILE_COMPRESSION));
    final StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, new CacheConfig(conf), fs, blockSize)
            .withFilePath(hfilePath(outputPath, context.getTaskAttemptID().getTaskID().getId()))
            .withCompression(compressionAlgorithm).build();
    return new HFileRecordWriter(writer);
}

From source file:com.pivotal.hawq.mapreduce.ao.HAWQAORecordReader.java

License:Apache License

/**
 * Called once at initialization./*from  www . j a  va2  s  . c om*/
 * 
 * @param split
 *            the split that defines the range of records to read
 * @param context
 *            the information about the task
 * @throws IOException
 * @throws InterruptedException
 */
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {

    // initialize the value
    Configuration conf = context.getConfiguration();

    // Extract the parameters needed by HAWQAOFileReader and HAWQAORecord
    String encoding = HAWQConfiguration.getInputTableEncoding(conf);
    HAWQSchema schema = HAWQConfiguration.getInputTableSchema(conf);
    /*
     * GPSQL-1047
     * 
     * Get version from configuration and init HAWQAORecord with it
     */
    String version = HAWQConfiguration.getDatabaseVersion(conf);

    filereader = new HAWQAOFileReader(conf, split);

    try {
        value = new HAWQAORecord(schema, encoding, version);
    } catch (HAWQException hawqE) {
        throw new IOException(hawqE.getMessage());
    }
}

From source file:com.pivotal.hawq.mapreduce.HAWQInputFormat.java

License:Apache License

@Override
public RecordReader<Void, HAWQRecord> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    HAWQTableFormat tableFormat = getTableFormat(context.getConfiguration());

    switch (tableFormat) {
    case AO:/*from  ww  w .jav  a2s  .c  om*/
        return aoInputFormat.createRecordReader(split, context);
    case Parquet:
        return parquetInputFormat.createRecordReader(split, context);
    default:
        throw new AssertionError("invalid table format: " + tableFormat);
    }
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Create a file output committer/*from  w w  w.j  a  v  a  2s . c om*/
 * @param outputPath the job's output path
 * @param context the task's context
 * @throws IOException
 */
public FileOutputCommitter(Path outputPath, TaskAttemptContext context) throws IOException {
    super(outputPath, context);
    Job job = new Job(context.getConfiguration());
    String outputDirectories = job.getConfiguration().get(MULTIPLE_OUTPUTS, "");
    if (outputDirectories != null) {
        StringTokenizer st = new StringTokenizer(outputDirectories, " ");
        while (st.hasMoreTokens()) {
            pathNames.add(st.nextToken());
        }
    }
    if (outputPath != null) {
        this.outputPath = outputPath;
        outputFileSystem = outputPath.getFileSystem(context.getConfiguration());
        workPath = new Path(outputPath, (FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + "_"
                + context.getTaskAttemptID().toString())).makeQualified(outputFileSystem);
        for (String p : pathNames) {
            if (outputPath.toString().endsWith(p)) {
                committers.put(p, this);
                fake = false;
                break;
            }
        }
    }
}

From source file:com.rim.logdriver.mapreduce.avro.AvroBlockInputFormat.java

License:Apache License

/**
 * Creates a new AvroBlockRecordReader.//from   w  w  w . j av a2s .c  o m
 * 
 * Increases there default value mapreduce.job.max.split.locations to 100000,
 * if it's not already set.
 * 
 * Also sets mapred.max.split.size to the default block size for the root
 * directory ("/"), if it's not already set.
 * 
 * @param split
 *          The InputSplit.
 * @param context
 *          The TaskAttemptContext.
 * @return A new AvroBlockRecordReader.
 * @throws IOException
 *           If there is an I/O error.
 */
@Override
public RecordReader<AvroFileHeader, BytesWritable> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();

    // Ensure we have sensible defaults for how we build blocks.
    if (conf.get("mapreduce.job.max.split.locations") == null) {
        conf.setLong("mapreduce.job.max.split.locations", MAX_SPLIT_LOCATIONS);
    }
    if (conf.get("mapred.max.split.size") == null) {
        // Try to set the split size to the default block size. In case of
        // failure, we'll use this 128MB default.
        long blockSize = 128 * 1024 * 1024; // 128MB
        try {
            blockSize = FileSystem.get(conf).getDefaultBlockSize();
        } catch (IOException e) {
            LOG.error("Error getting filesystem to get get default block size (this does not bode well).");
        }
        conf.setLong("mapred.max.split.size", blockSize);
    }

    return new AvroBlockRecordReader();
}

From source file:com.rw.legion.input.CsvInputFormat.java

License:Apache License

@Override
public RecordReader<NullWritable, LegionRecord> createRecordReader(InputSplit split,
        TaskAttemptContext context) {

    String delimiter = context.getConfiguration().get("textinputformat.record.delimiter");

    byte[] recordDelimiterBytes = null;

    if (null != delimiter)
        recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);

    return new CsvRecordReader(recordDelimiterBytes);
}

From source file:com.rw.legion.input.JsonInputFormat.java

License:Apache License

@Override
public RecordReader<NullWritable, LegionRecord> createRecordReader(InputSplit split,
        TaskAttemptContext context) {

    String delimiter = context.getConfiguration().get("textinputformat.record.delimiter");

    byte[] recordDelimiterBytes = null;

    if (null != delimiter)
        recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);

    return new JsonRecordReader(recordDelimiterBytes);
}

From source file:com.rw.legion.input.LegionRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    /*/*from w  w w  . ja  v  a2s.co m*/
     * fileBroken tracks whether there's been an IOException while reading
     * this file. If there has, the record reader will simply stop reading
     * records for this particular file, rather than blowing up the whole
     * job.
     */
    fileBroken = false;
    currentLine = new Text();
    currentLineNumber = 0;

    FileSplit split = (FileSplit) genericSplit;

    if (split.getLength() == 0) {
        fileBroken = true;
    }

    // Load the Legion Objective.
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    legionObjective = ObjectiveDeserializer.deserialize(job.get("legion_objective"));

    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();

    // Open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);

    // Grab the file name to include with the data.
    fileName = file.toString();

    // Does the Legion Objective specify an input codec to use?
    if (legionObjective.getCodecOverride() != null) {
        isCompressedInput = true;
        CompressionCodec codec = new CompressionCodecFactory(job)
                .getCodecByClassName(legionObjective.getCodecOverride());
        decompressor = CodecPool.getDecompressor(codec);
        in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes);
        filePosition = fileIn;
    } else {
        CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
        if (null != codec) {
            isCompressedInput = true;
            decompressor = CodecPool.getDecompressor(codec);

            if (codec instanceof SplittableCompressionCodec) {
                final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                        fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
                in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
                start = cIn.getAdjustedStart();
                end = cIn.getAdjustedEnd();
                filePosition = cIn;
            } else {
                in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job,
                        this.recordDelimiterBytes);
                filePosition = fileIn;
            }
        } else {
            fileIn.seek(start);
            in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
            filePosition = fileIn;
        }
    }

    /*
     * If this is not the first split, we always throw away first record
     * because we always (except the last split) read one extra line in
     * next() method.
     */
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }

    this.pos = start;
}