Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:jp.ac.u.tokyo.m.pig.udf.store.FreeEncodingPigTextOutputFormat.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override//from  ww  w  .j  a va2 s .c o  m
public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new FreeEncodingPigLineRecordWriter(fileOut, mFieldDelimiter, mEncoding);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new FreeEncodingPigLineRecordWriter(new DataOutputStream(codec.createOutputStream(fileOut)),
                mFieldDelimiter, mEncoding);
    }
}

From source file:kafka.bridge.hadoop.KafkaOutputFormat.java

License:Apache License

@Override
public RecordWriter<NullWritable, W> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Path outputPath = getOutputPath(context);
    if (outputPath == null)
        throw new IllegalArgumentException("no kafka output url specified");
    URI uri = outputPath.toUri();
    Configuration job = context.getConfiguration();

    final String topic = uri.getPath().substring(1); // ignore the initial '/' in the path

    final int queueSize = job.getInt("kafka.output.queue_size", KAFKA_QUEUE_SIZE);
    final int timeout = job.getInt("kafka.output.connect_timeout", KAFKA_PRODUCER_CONNECT_TIMEOUT);
    final int interval = job.getInt("kafka.output.reconnect_interval", KAFKA_PRODUCER_RECONNECT_INTERVAL);
    final int bufSize = job.getInt("kafka.output.bufsize", KAFKA_PRODUCER_BUFFER_SIZE);
    final int maxSize = job.getInt("kafka.output.max_msgsize", KAFKA_PRODUCER_MAX_MESSAGE_SIZE);

    job.set("kafka.output.server", String.format("%s:%d", uri.getHost(), uri.getPort()));
    job.set("kafka.output.topic", topic);
    job.setInt("kafka.output.queue_size", queueSize);
    job.setInt("kafka.output.connect_timeout", timeout);
    job.setInt("kafka.output.reconnect_interval", interval);
    job.setInt("kafka.output.bufsize", bufSize);
    job.setInt("kafka.output.max_msgsize", maxSize);

    if (uri.getHost().isEmpty())
        throw new IllegalArgumentException("missing kafka server");
    if (uri.getPath().isEmpty())
        throw new IllegalArgumentException("missing kafka topic");

    Properties props = new Properties();
    props.setProperty("host", uri.getHost());
    props.setProperty("port", Integer.toString(uri.getPort()));
    props.setProperty("buffer.size", Integer.toString(bufSize));
    props.setProperty("connect.timeout.ms", Integer.toString(timeout));
    props.setProperty("reconnect.interval", Integer.toString(interval));
    props.setProperty("max.message.size", Integer.toString(maxSize));

    SyncProducer producer = new SyncProducer(new SyncProducerConfig(props));
    return new KafkaRecordWriter<W>(producer, topic, queueSize);
}

From source file:kafka.bridge.hadoop2.KafkaOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
    Path outputPath = getOutputPath(context);
    if (outputPath == null)
        throw new KafkaException("no kafka output url specified");
    URI uri = URI.create(outputPath.toString());
    Configuration job = context.getConfiguration();

    Properties props = new Properties();
    String topic;//from  www .jav  a2s .co  m

    props.putAll(kafkaConfigMap); // inject default configuration
    for (Map.Entry<String, String> m : job) { // handle any overrides
        if (!m.getKey().startsWith(KAFKA_CONFIG_PREFIX))
            continue;
        if (m.getKey().equals(KAFKA_URL))
            continue;

        String kafkaKeyName = m.getKey().substring(KAFKA_CONFIG_PREFIX.length() + 1);
        props.setProperty(kafkaKeyName, m.getValue()); // set Kafka producer property
    }

    // inject Kafka producer props back into jobconf for easier debugging
    for (Map.Entry<Object, Object> m : props.entrySet()) {
        job.set(KAFKA_CONFIG_PREFIX + "." + m.getKey().toString(), m.getValue().toString());
    }

    // KafkaOutputFormat specific parameters
    final int queueBytes = job.getInt(KAFKA_CONFIG_PREFIX + ".queue.bytes", KAFKA_QUEUE_BYTES);

    if (uri.getScheme().equals("kafka")) {
        // using the direct broker list
        // URL: kafka://<kafka host>/<topic>
        // e.g. kafka://kafka-server:9000,kafka-server2:9000/foobar
        String brokerList = uri.getAuthority();
        props.setProperty("metadata.broker.list", brokerList);
        job.set(KAFKA_CONFIG_PREFIX + ".metadata.broker.list", brokerList);

        if (uri.getPath() == null || uri.getPath().length() <= 1)
            throw new KafkaException("no topic specified in kafka uri");

        topic = uri.getPath().substring(1); // ignore the initial '/' in the path
        job.set(KAFKA_CONFIG_PREFIX + ".topic", topic);
        log.info(String.format("using kafka broker %s (topic %s)", brokerList, topic));
    } else
        throw new KafkaException("missing scheme from kafka uri (must be kafka://)");

    Producer<Object, byte[]> producer = new Producer<Object, byte[]>(new ProducerConfig(props));
    return new KafkaRecordWriter<K, V>(producer, topic, queueBytes);
}

From source file:kogiri.common.hadoop.io.format.map.BloomMapFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<WritableComparable<?>, Writable> getRecordWriter(TaskAttemptContext context)
        throws IOException {
    Configuration conf = context.getConfiguration();
    CompressionCodec codec = null;//  w w  w  . j  a  v a 2s .c o m
    CompressionType compressionType = CompressionType.NONE;
    if (getCompressOutput(context)) {
        // find the kind of compression to do
        compressionType = SequenceFileOutputFormat.getOutputCompressionType(context);

        // find the right codec
        Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
    }

    Path file = getDefaultWorkFile(context, "");
    FileSystem fs = file.getFileSystem(conf);
    // ignore the progress parameter, since MapFile is local
    final BloomMapFile.Writer out = new BloomMapFile.Writer(conf, fs, file.toString(),
            context.getOutputKeyClass().asSubclass(WritableComparable.class),
            context.getOutputValueClass().asSubclass(Writable.class), compressionType, codec, context);

    return new RecordWriter<WritableComparable<?>, Writable>() {
        @Override
        public void write(WritableComparable<?> key, Writable value) throws IOException {
            out.append(key, value);
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException {
            out.close();
        }
    };
}

From source file:kogiri.common.hadoop.io.reader.fasta.FastaRawReadReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    this.filename = file.getName();

    this.firstRead = true;

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {/*from   ww w . j a  v a 2s  .c om*/
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);

    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), job);
    } else {
        if (this.start != 0) {
            fileIn.seek(this.start);
        }
        this.in = new LineReader(fileIn, job);
    }

    // skip lines until we meet new read start
    while (this.start < this.end) {
        Text skipText = new Text();
        long newSize = this.in.readLine(skipText, this.maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength));
        if (newSize == 0) {
            // EOF
            this.hasNextRead = false;
            this.pos = this.end;
            break;
        }

        if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) {
            this.prevLine = skipText;
            this.prevSize = newSize;
            this.hasNextRead = true;
            this.pos = this.start;
            break;
        }

        this.start += newSize;

        if (this.start >= this.end) {
            // EOF
            this.hasNextRead = false;
            this.pos = this.end;
            break;
        }
    }

    this.key = null;
    this.value = null;
}

From source file:kogiri.common.hadoop.io.reader.fasta.FastaReadDescriptionReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    this.filename = file.getName();

    this.firstRead = true;

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {/* www . ja v a2s. c  o  m*/
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);

    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), job);
    } else {
        if (this.start != 0) {
            fileIn.seek(this.start);
        }
        this.in = new LineReader(fileIn, job);
    }

    // skip lines until we meet new record start
    while (this.start < this.end) {
        Text skipText = new Text();
        long newSize = this.in.readLine(skipText, this.maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength));
        if (newSize == 0) {
            // EOF
            this.hasNextRecord = false;
            this.pos = this.end;
            break;
        }

        if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) {
            this.prevLine = skipText;
            this.prevSize = newSize;
            this.hasNextRecord = true;
            this.pos = this.start;
            break;
        }

        this.start += newSize;

        if (this.start >= this.end) {
            // EOF
            this.hasNextRecord = false;
            this.pos = this.end;
            break;
        }
    }

    this.key = null;
    this.value = null;
}

From source file:kogiri.mapreduce.common.kmermatch.KmerJoiner.java

License:Open Source License

public KmerJoiner(Path[] kmerIndexPath, KmerRangePartition partition, AKmerIndexRecordFilter[] filter,
        TaskAttemptContext context) throws IOException {
    initialize(kmerIndexPath, partition, filter, context.getConfiguration());
}

From source file:kogiri.mapreduce.common.kmermatch.KmerMatchRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    if (!(split instanceof KmerMatchInputSplit)) {
        throw new IOException("split is not an instance of KmerMatchIndexSplit");
    }/* ww w.j a  va2 s . c  om*/

    KmerMatchInputSplit kmerIndexSplit = (KmerMatchInputSplit) split;
    this.conf = context.getConfiguration();
    this.inputIndexPath = kmerIndexSplit.getIndexFilePath();

    KmerRangePartition partition = kmerIndexSplit.getPartition();

    KmerMatchInputFormatConfig inputFormatConfig = KmerMatchInputFormatConfig.createInstance(this.conf);
    AKmerIndexRecordFilter[] kmerIndexRecordFilter = new AKmerIndexRecordFilter[this.inputIndexPath.length];

    for (int i = 0; i < this.inputIndexPath.length; i++) {
        String fastaFilename = KmerIndexHelper.getFastaFileName(this.inputIndexPath[i].getName());
        Path statisticsFile = new Path(inputFormatConfig.getKmerStatisticsPath(),
                KmerStatisticsHelper.makeKmerStatisticsFileName(fastaFilename));
        FileSystem fs = statisticsFile.getFileSystem(this.conf);
        KmerStatistics statistics = KmerStatistics.createInstance(fs, statisticsFile);

        KmerStandardDeviation stddev = new KmerStandardDeviation();
        stddev.setAverage(statistics.getAverageFrequency());
        stddev.setStdDeviation(statistics.getStdDeviation());
        stddev.setFactor(inputFormatConfig.getStandardDeviationFactor());

        kmerIndexRecordFilter[i] = new STDKmerIndexRecordFilter(stddev);
    }

    this.joiner = new KmerJoiner(this.inputIndexPath, partition, kmerIndexRecordFilter, context);
}

From source file:kogiri.mapreduce.preprocess.common.kmerindex.KmerIndexRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    if (!(split instanceof KmerIndexSplit)) {
        throw new IOException("split is not an instance of KmerIndexSplit");
    }//from w  w  w.j a  v a 2  s. c om

    KmerIndexSplit kmerIndexSplit = (KmerIndexSplit) split;
    this.conf = context.getConfiguration();
    this.inputIndexPaths = kmerIndexSplit.getIndexFilePaths();

    this.inputFormatConfig = KmerIndexInputFormatConfig.createInstance(this.conf);

    FileSystem fs = this.inputIndexPaths[0].getFileSystem(this.conf);
    this.indexReader = new KmerIndexReader(fs, new Path(this.inputFormatConfig.getKmerIndexIndexPath()),
            this.conf);

    this.currentProgress = BigInteger.ZERO;
    StringBuilder endKmer = new StringBuilder();
    for (int i = 0; i < this.inputFormatConfig.getKmerSize(); i++) {
        endKmer.append("T");
    }
    this.progressEnd = SequenceHelper.convertToBigInteger(endKmer.toString());

    this.curKey = null;
    this.curVal = null;
}

From source file:libra.common.hadoop.io.reader.fasta.FastaKmerReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration conf = context.getConfiguration();
    this.kmersize = FastaKmerInputFormat.getKmerSize(conf);
    this.maxLineLength = conf.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(conf);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(conf);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {//from  w w w.jav a2  s  . co m
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);
    boolean inTheMiddle = false;
    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), conf);
    } else {
        if (this.start != 0) {
            this.start--;
            fileIn.seek(this.start);

            inTheMiddle = true;
        }
        this.in = new LineReader(fileIn, conf);
    }

    this.buffer = new Text();

    if (inTheMiddle) {
        // find new start line
        this.start += this.in.readLine(new Text(), 0,
                (int) Math.min((long) Integer.MAX_VALUE, this.end - this.start));

        // back off
        FSDataInputStream fileIn2 = fs.open(file);
        fileIn2.seek(this.start - 1000);

        LineReader in2 = new LineReader(fileIn2, conf);
        Text tempLine = new Text();
        long curpos = this.start - 1000;
        while (curpos < this.start) {
            curpos += in2.readLine(tempLine, 0, (int) (this.start - curpos));
        }

        if (tempLine.charAt(0) == READ_DELIMITER) {
            // clean start
            this.buffer.clear();
        } else {
            // leave k-1 seq in the buffer
            String seq = tempLine.toString().trim();
            String left = seq.substring(seq.length() - this.kmersize + 1);
            this.buffer.set(left);
        }

        in2.close();
    }

    this.pos = this.start;

    this.key = null;
    this.value = null;
}