Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:de.gesundkrank.wikipedia.hadoop.util.MapFileOutputFormat.java

License:Open Source License

@Override
public RecordWriter<WritableComparable, Writable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();

    FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context);

    SequenceFile.Writer.Option keyClass = SequenceFile.Writer.keyClass(WritableComparable.class);
    SequenceFile.Writer.Option valueClass = SequenceFile.Writer.valueClass(Writable.class);
    SequenceFile.Writer.Option compressionType = SequenceFile.Writer
            .compression(SequenceFile.CompressionType.BLOCK);

    final MapFile.Writer out = new MapFile.Writer(conf, committer.getWorkPath(), keyClass, valueClass,
            compressionType);/*from   w  w w .jav a  2  s  . c  o m*/

    return new RecordWriter<WritableComparable, Writable>() {
        @Override
        public void close(TaskAttemptContext arg0) throws IOException, InterruptedException {
            out.close();
        }

        @Override
        public void write(WritableComparable key, Writable value) throws IOException, InterruptedException {
            out.append(key, value);
        }
    };
}

From source file:de.l3s.common.hadoop.WholeFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    this.fileSplit = (FileSplit) inputSplit;
    this.conf = taskAttemptContext.getConfiguration();
}

From source file:de.l3s.streamcorpus.terrier.ThriftFileCollectionRecordReader.java

License:Apache License

/** 
 * Read the urls / paths of files from the input file
 * *///from   www . j  a va  2 s .c  o m
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {

    collectionIndex = -1;

    conf = context.getConfiguration();
    fs = FileSystem.get(conf);

    loadPathsFromInputSplit(split, conf);
}

From source file:de.rwhq.hdfs.index.LineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();/*from   w w  w  .  jav a2 s  .  c  o  m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:de.sec.dns.playground.ARFFOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();

    String extension = ".arff";
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);

    FSDataOutputStream fileOut = fs.create(file, false);

    return new ARFFLineRecordWriter<K, V>(fileOut, job);
}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.deduplication.DocumentInfoOutputFormat.java

License:Apache License

@Override
public RecordWriter<NullWritable, List<DocumentInfo>> getRecordWriter(TaskAttemptContext job)
        throws IOException {

    //get the current path
    Configuration conf = job.getConfiguration();
    String extension = ".txt";
    //create the full path with the output directory plus our filename
    Path file = getDefaultWorkFile(job, extension);
    //create the file in the file system
    FileSystem fs = file.getFileSystem(conf);
    FSDataOutputStream fileOut = fs.create(file, false);

    //create our record writer with the new file
    return new DocumentInfoRecordWriter(fileOut);
}

From source file:dev.geminileft.outputformat.MyTextOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = conf.get(SEPERATOR, "\t");
    String keyValueDelimiter = conf.get(DELIMITER, "\n");
    CompressionCodec codec = null;// w w  w.j  ava2s  .  c om
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(fileOut, keyValueSeparator, keyValueDelimiter);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator, keyValueDelimiter);
    }
}

From source file:diamondmapreduce.NLineRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;

    final Path file = split.getPath();
    Configuration conf = context.getConfiguration();
    this.maxLineLength = conf.getInt("mapreduce.input.linerecordreader.line.maxlength", Integer.MAX_VALUE);
    FileSystem fs = file.getFileSystem(conf);
    start = split.getStart();//ww w .j  a v  a 2  s  .c  o m
    end = start + split.getLength();
    boolean skipFirstLine = false;
    FSDataInputStream filein = fs.open(split.getPath());

    if (start != 0) {
        skipFirstLine = true;
        --start;
        filein.seek(start);
    }
    in = new LineReader(filein, conf);
    if (skipFirstLine) {
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:dk.aau.cs.cloudetl.io.SequenceIndexFileReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) split;

    conf = context.getConfiguration();
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    this.in = new SequenceFile.Reader(fs, path, conf);
    this.end = fileSplit.getStart() + fileSplit.getLength();

    if (fileSplit.getStart() > in.getPosition()) {
        in.sync(fileSplit.getStart()); // sync to start
    }/*from   www  .  j  av  a2 s.c o  m*/

    this.start = in.getPosition();
    more = start < end;
}

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsFileOutputCommitter.java

License:Apache License

/**
 * Create a file output committer/* www .j  av a2  s  .co  m*/
 *
 * @param outputPath the job's output path
 * @param context the task's context
 * @throws IOException
 */
public HirodsFileOutputCommitter(Path outputPath, Path tempPath, TaskAttemptContext context)
        throws IOException {
    if (outputPath != null && tempPath != null) {
        this.outputPath = outputPath;
        this.outputFileSystem = outputPath.getFileSystem(context.getConfiguration());
        this.tempPath = tempPath;
        this.workFileSystem = tempPath.getFileSystem(context.getConfiguration());
        this.workPath = new Path(tempPath, (HirodsFileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + "_"
                + context.getTaskAttemptID().toString())).makeQualified(this.workFileSystem);
    }
}